spider/src/utils/proxy.util.js
2019-05-09 11:57:23 +08:00

80 lines
2.3 KiB
JavaScript

import stringUtil from './string.util';
import ProxyInfo from '../models/spider/ProxyInfo';
import Promise from 'bluebird';
import random_useragent from 'random-useragent';
let request = require('superagent');
require('superagent-proxy')(request);
let proxys = [];
export default {
async randomProxy() {
if (proxys.length === 0) {
proxys = await ProxyInfo.availableList();
}
return proxys[stringUtil.randomNum(0, proxys.length - 1)].link;
},
async checkProxy(proxy){
return new Promise(async (resolve, reject) => {
try {
let response = await request.get('https://wechat-test.kingsome.cn/api/stat').proxy(proxy).timeout(5000);
if(response.statusCode === 200 && response.text === '1' ){
resolve();
} else {
reject(new Error(' server response code: ' + response.statusCode));
}
} catch (err) {
reject(err);
}
})
},
// 通过代理来get数据
async getDataProxy(url) {
if (proxys.length === 0) {
proxys = await ProxyInfo.availableList();
}
const proxy = 'http://' + proxys[stringUtil.randomNum(0, proxys.length - 1)].link;
return new Promise(async (resolve, reject) => {
let response
try {
response = await request.get(url)
.set('User-Agent', random_useragent.getRandom())
.proxy(proxy)
.retry(2)
.timeout(15000);
} catch (err) {
console.log('parse page with network error, try again :', url);
try {
response = await request.get(url)
.set('User-Agent', random_useragent.getRandom())
.proxy(proxy)
.retry(2)
.timeout(15000);
} catch (err2) {
reject(err2)
}
}
if(response.statusCode === 200 ){
resolve(response.text);
} else {
console.log('parse page with statusCode: ', statusCode, url);
try {
response = await request.get(url)
.set('User-Agent', random_useragent.getRandom())
.proxy(proxy)
.retry(2)
.timeout(15000);
if(response.statusCode === 200 ){
resolve(response.text);
} else {
reject(new Error('parse page with error statusCode: ' + response.statusCode))
}
} catch (err2) {
reject(err2)
}
}
})
},
}