蛋蛋赞电影通过superagent请求数组
This commit is contained in:
parent
c25eb6d213
commit
bbaa44f8c1
@ -4,6 +4,7 @@ import stringUtil from '../utils/string.util';
|
||||
import Movie from '../models/Movies';
|
||||
import generalQueue from '../utils/general.queue';
|
||||
import proxy from './proxy';
|
||||
import proxyUtil from '../utils/proxy.util';
|
||||
|
||||
const URL_BASE = 'https://www.dandanzan.com'
|
||||
const maxIdx = 100000;
|
||||
@ -16,7 +17,7 @@ const maxIdx = 100000;
|
||||
const parseOnePage = async (subLink, category, sortIdx) => {
|
||||
const url = `${URL_BASE}${subLink}`
|
||||
try {
|
||||
let html = await netUtil.getDataProxy(url, {})
|
||||
let html = await proxyUtil.getDataProxy(url)
|
||||
const $ = cheerio.load(html);
|
||||
if ($('.error404').text()) {
|
||||
console.log(`>>>>>>>>>>>> ${url} not found`);
|
||||
@ -89,7 +90,7 @@ const parseListPage = async (idx, category) => {
|
||||
console.log(`begin parse category: ${category} page: ${subPage}`);
|
||||
let html;
|
||||
try {
|
||||
html = await netUtil.getDataProxy(url, {})
|
||||
html = await proxyUtil.getDataProxy(url)
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
}
|
||||
@ -146,10 +147,10 @@ const parseAllMovie = async (category) => {
|
||||
|
||||
export default {
|
||||
run: async () => {
|
||||
await proxy.run();
|
||||
await parseAllMovie('movie');
|
||||
await parseAllMovie('tv');
|
||||
await parseAllMovie('show');
|
||||
// await proxy.run();
|
||||
// await parseAllMovie('movie');
|
||||
// await parseAllMovie('tv');
|
||||
// await parseAllMovie('show');
|
||||
await parseAllMovie('cartoon');
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
import request from 'request';
|
||||
import Promise from 'bluebird';
|
||||
import random_useragent from 'random-useragent';
|
||||
import proxy from './proxy.util';
|
||||
|
||||
|
||||
const iconv = require('iconv-lite');
|
||||
|
@ -1,5 +1,7 @@
|
||||
import stringUtil from './string.util';
|
||||
import ProxyInfo from '../models/spider/ProxyInfo';
|
||||
import Promise from 'bluebird';
|
||||
import random_useragent from 'random-useragent';
|
||||
|
||||
let request = require('superagent');
|
||||
require('superagent-proxy')(request);
|
||||
@ -11,7 +13,7 @@ export default {
|
||||
if (proxys.length === 0) {
|
||||
proxys = await ProxyInfo.availableList();
|
||||
}
|
||||
return proxys[stringUtil.randomNum(0, proxys.length - 1)];
|
||||
return proxys[stringUtil.randomNum(0, proxys.length - 1)].link;
|
||||
},
|
||||
async checkProxy(proxy){
|
||||
return new Promise(async (resolve, reject) => {
|
||||
@ -26,5 +28,28 @@ export default {
|
||||
reject(err);
|
||||
}
|
||||
})
|
||||
},
|
||||
// 通过代理来get数据
|
||||
async getDataProxy(url) {
|
||||
if (proxys.length === 0) {
|
||||
proxys = await ProxyInfo.availableList();
|
||||
}
|
||||
const proxy = 'http://' + proxys[stringUtil.randomNum(0, proxys.length - 1)].link;
|
||||
return new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
let response = await request.get(url)
|
||||
.set('User-Agent', random_useragent.getRandom())
|
||||
.proxy(proxy)
|
||||
.retry(2)
|
||||
.timeout(15000);
|
||||
if(response.statusCode === 200 ){
|
||||
resolve(response.text);
|
||||
} else {
|
||||
reject(new Error(' server response code: ' + response.statusCode));
|
||||
}
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
})
|
||||
},
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user