蛋蛋赞电影通过superagent请求数组

This commit is contained in:
zhl 2019-05-08 21:34:24 +08:00
parent c25eb6d213
commit bbaa44f8c1
3 changed files with 35 additions and 8 deletions

View File

@ -4,6 +4,7 @@ import stringUtil from '../utils/string.util';
import Movie from '../models/Movies';
import generalQueue from '../utils/general.queue';
import proxy from './proxy';
import proxyUtil from '../utils/proxy.util';
const URL_BASE = 'https://www.dandanzan.com'
const maxIdx = 100000;
@ -16,7 +17,7 @@ const maxIdx = 100000;
const parseOnePage = async (subLink, category, sortIdx) => {
const url = `${URL_BASE}${subLink}`
try {
let html = await netUtil.getDataProxy(url, {})
let html = await proxyUtil.getDataProxy(url)
const $ = cheerio.load(html);
if ($('.error404').text()) {
console.log(`>>>>>>>>>>>> ${url} not found`);
@ -89,7 +90,7 @@ const parseListPage = async (idx, category) => {
console.log(`begin parse category: ${category} page: ${subPage}`);
let html;
try {
html = await netUtil.getDataProxy(url, {})
html = await proxyUtil.getDataProxy(url)
} catch (err) {
console.log(err);
}
@ -146,10 +147,10 @@ const parseAllMovie = async (category) => {
export default {
run: async () => {
await proxy.run();
await parseAllMovie('movie');
await parseAllMovie('tv');
await parseAllMovie('show');
// await proxy.run();
// await parseAllMovie('movie');
// await parseAllMovie('tv');
// await parseAllMovie('show');
await parseAllMovie('cartoon');
}
}

View File

@ -1,6 +1,7 @@
import request from 'request';
import Promise from 'bluebird';
import random_useragent from 'random-useragent';
import proxy from './proxy.util';
const iconv = require('iconv-lite');

View File

@ -1,5 +1,7 @@
import stringUtil from './string.util';
import ProxyInfo from '../models/spider/ProxyInfo';
import Promise from 'bluebird';
import random_useragent from 'random-useragent';
let request = require('superagent');
require('superagent-proxy')(request);
@ -11,7 +13,7 @@ export default {
if (proxys.length === 0) {
proxys = await ProxyInfo.availableList();
}
return proxys[stringUtil.randomNum(0, proxys.length - 1)];
return proxys[stringUtil.randomNum(0, proxys.length - 1)].link;
},
async checkProxy(proxy){
return new Promise(async (resolve, reject) => {
@ -26,5 +28,28 @@ export default {
reject(err);
}
})
}
},
// 通过代理来get数据
async getDataProxy(url) {
if (proxys.length === 0) {
proxys = await ProxyInfo.availableList();
}
const proxy = 'http://' + proxys[stringUtil.randomNum(0, proxys.length - 1)].link;
return new Promise(async (resolve, reject) => {
try {
let response = await request.get(url)
.set('User-Agent', random_useragent.getRandom())
.proxy(proxy)
.retry(2)
.timeout(15000);
if(response.statusCode === 200 ){
resolve(response.text);
} else {
reject(new Error(' server response code: ' + response.statusCode));
}
} catch (err) {
reject(err);
}
})
},
}