From 6e4ef6793a99d2fa49e35064b33e0e7b17f59e7f Mon Sep 17 00:00:00 2001 From: zhl Date: Thu, 9 May 2019 11:57:23 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=8A=93=E5=8F=96=E9=87=8D?= =?UTF-8?q?=E8=AF=95=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/sites/dandanzan.js | 48 ++++++++++++----------------------------- src/utils/proxy.util.js | 38 ++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/src/sites/dandanzan.js b/src/sites/dandanzan.js index 811f699..339d7c7 100644 --- a/src/sites/dandanzan.js +++ b/src/sites/dandanzan.js @@ -25,23 +25,13 @@ const parseOnePage = async ({subLink, category, sortIdx}) => { lastStatus: true, }) } catch(err) { - console.log('parse page with network error, try again :', url); - try { - html = await proxyUtil.getDataProxy(url) - await CrawlRecord.updateRecord({url: url, - className: 'dandanzan', - methodName: 'parseOnePage', - params: {subLink, category, sortIdx}, - lastStatus: true, - }) - } catch (err2) { - await CrawlRecord.updateRecord({url: url, - className: 'dandanzan', - methodName: 'parseOnePage', - params: {subLink, category, sortIdx}, - lastStatus: false, - }) - } + console.log('parse page with network error:', url); + await CrawlRecord.updateRecord({url: url, + className: 'dandanzan', + methodName: 'parseOnePage', + params: {subLink, category, sortIdx}, + lastStatus: false, + }) } if (!html) { return false; @@ -127,23 +117,13 @@ const parseListPage = async ({idx, category}) => { lastStatus: true, }) } catch (err) { - console.log('parse page with network error, try again :', url); - try { - html = await proxyUtil.getDataProxy(url) - await CrawlRecord.updateRecord({url: url, - className: 'dandanzan', - methodName: 'parseListPage', - params: {idx, category}, - lastStatus: true, - }) - } catch (err2) { - await CrawlRecord.updateRecord({url: url, - className: 'dandanzan', - methodName: 'parseListPage', - params: {idx, category}, - lastStatus: false, - }) - } + console.log('parse page with network error:', url); + await CrawlRecord.updateRecord({url: url, + className: 'dandanzan', + methodName: 'parseListPage', + params: {idx, category}, + lastStatus: false, + }) } if (html) { const $ = cheerio.load(html); diff --git a/src/utils/proxy.util.js b/src/utils/proxy.util.js index 4ea2c4f..145a1dc 100644 --- a/src/utils/proxy.util.js +++ b/src/utils/proxy.util.js @@ -36,19 +36,43 @@ export default { } const proxy = 'http://' + proxys[stringUtil.randomNum(0, proxys.length - 1)].link; return new Promise(async (resolve, reject) => { + let response try { - let response = await request.get(url) + response = await request.get(url) .set('User-Agent', random_useragent.getRandom()) .proxy(proxy) .retry(2) .timeout(15000); - if(response.statusCode === 200 ){ - resolve(response.text); - } else { - reject(new Error(' server response code: ' + response.statusCode)); - } } catch (err) { - reject(err); + console.log('parse page with network error, try again :', url); + try { + response = await request.get(url) + .set('User-Agent', random_useragent.getRandom()) + .proxy(proxy) + .retry(2) + .timeout(15000); + } catch (err2) { + reject(err2) + } + } + if(response.statusCode === 200 ){ + resolve(response.text); + } else { + console.log('parse page with statusCode: ', statusCode, url); + try { + response = await request.get(url) + .set('User-Agent', random_useragent.getRandom()) + .proxy(proxy) + .retry(2) + .timeout(15000); + if(response.statusCode === 200 ){ + resolve(response.text); + } else { + reject(new Error('parse page with error statusCode: ' + response.statusCode)) + } + } catch (err2) { + reject(err2) + } } }) },