diff --git a/src/sites/dandanzan.js b/src/sites/dandanzan.js index 811f699..339d7c7 100644 --- a/src/sites/dandanzan.js +++ b/src/sites/dandanzan.js @@ -25,23 +25,13 @@ const parseOnePage = async ({subLink, category, sortIdx}) => { lastStatus: true, }) } catch(err) { - console.log('parse page with network error, try again :', url); - try { - html = await proxyUtil.getDataProxy(url) - await CrawlRecord.updateRecord({url: url, - className: 'dandanzan', - methodName: 'parseOnePage', - params: {subLink, category, sortIdx}, - lastStatus: true, - }) - } catch (err2) { - await CrawlRecord.updateRecord({url: url, - className: 'dandanzan', - methodName: 'parseOnePage', - params: {subLink, category, sortIdx}, - lastStatus: false, - }) - } + console.log('parse page with network error:', url); + await CrawlRecord.updateRecord({url: url, + className: 'dandanzan', + methodName: 'parseOnePage', + params: {subLink, category, sortIdx}, + lastStatus: false, + }) } if (!html) { return false; @@ -127,23 +117,13 @@ const parseListPage = async ({idx, category}) => { lastStatus: true, }) } catch (err) { - console.log('parse page with network error, try again :', url); - try { - html = await proxyUtil.getDataProxy(url) - await CrawlRecord.updateRecord({url: url, - className: 'dandanzan', - methodName: 'parseListPage', - params: {idx, category}, - lastStatus: true, - }) - } catch (err2) { - await CrawlRecord.updateRecord({url: url, - className: 'dandanzan', - methodName: 'parseListPage', - params: {idx, category}, - lastStatus: false, - }) - } + console.log('parse page with network error:', url); + await CrawlRecord.updateRecord({url: url, + className: 'dandanzan', + methodName: 'parseListPage', + params: {idx, category}, + lastStatus: false, + }) } if (html) { const $ = cheerio.load(html); diff --git a/src/utils/proxy.util.js b/src/utils/proxy.util.js index 4ea2c4f..145a1dc 100644 --- a/src/utils/proxy.util.js +++ b/src/utils/proxy.util.js @@ -36,19 +36,43 @@ export default { } const proxy = 'http://' + proxys[stringUtil.randomNum(0, proxys.length - 1)].link; return new Promise(async (resolve, reject) => { + let response try { - let response = await request.get(url) + response = await request.get(url) .set('User-Agent', random_useragent.getRandom()) .proxy(proxy) .retry(2) .timeout(15000); - if(response.statusCode === 200 ){ - resolve(response.text); - } else { - reject(new Error(' server response code: ' + response.statusCode)); - } } catch (err) { - reject(err); + console.log('parse page with network error, try again :', url); + try { + response = await request.get(url) + .set('User-Agent', random_useragent.getRandom()) + .proxy(proxy) + .retry(2) + .timeout(15000); + } catch (err2) { + reject(err2) + } + } + if(response.statusCode === 200 ){ + resolve(response.text); + } else { + console.log('parse page with statusCode: ', statusCode, url); + try { + response = await request.get(url) + .set('User-Agent', random_useragent.getRandom()) + .proxy(proxy) + .retry(2) + .timeout(15000); + if(response.statusCode === 200 ){ + resolve(response.text); + } else { + reject(new Error('parse page with error statusCode: ' + response.statusCode)) + } + } catch (err2) { + reject(err2) + } } }) },