修改抓取重试逻辑

This commit is contained in:
zhl 2019-05-09 11:57:23 +08:00
parent d9f106d9ef
commit 6e4ef6793a
2 changed files with 45 additions and 41 deletions

View File

@ -25,16 +25,7 @@ const parseOnePage = async ({subLink, category, sortIdx}) => {
lastStatus: true, lastStatus: true,
}) })
} catch(err) { } catch(err) {
console.log('parse page with network error, try again :', url); console.log('parse page with network error:', url);
try {
html = await proxyUtil.getDataProxy(url)
await CrawlRecord.updateRecord({url: url,
className: 'dandanzan',
methodName: 'parseOnePage',
params: {subLink, category, sortIdx},
lastStatus: true,
})
} catch (err2) {
await CrawlRecord.updateRecord({url: url, await CrawlRecord.updateRecord({url: url,
className: 'dandanzan', className: 'dandanzan',
methodName: 'parseOnePage', methodName: 'parseOnePage',
@ -42,7 +33,6 @@ const parseOnePage = async ({subLink, category, sortIdx}) => {
lastStatus: false, lastStatus: false,
}) })
} }
}
if (!html) { if (!html) {
return false; return false;
} }
@ -127,16 +117,7 @@ const parseListPage = async ({idx, category}) => {
lastStatus: true, lastStatus: true,
}) })
} catch (err) { } catch (err) {
console.log('parse page with network error, try again :', url); console.log('parse page with network error:', url);
try {
html = await proxyUtil.getDataProxy(url)
await CrawlRecord.updateRecord({url: url,
className: 'dandanzan',
methodName: 'parseListPage',
params: {idx, category},
lastStatus: true,
})
} catch (err2) {
await CrawlRecord.updateRecord({url: url, await CrawlRecord.updateRecord({url: url,
className: 'dandanzan', className: 'dandanzan',
methodName: 'parseListPage', methodName: 'parseListPage',
@ -144,7 +125,6 @@ const parseListPage = async ({idx, category}) => {
lastStatus: false, lastStatus: false,
}) })
} }
}
if (html) { if (html) {
const $ = cheerio.load(html); const $ = cheerio.load(html);
let hrefs = $('.thumbnail'); let hrefs = $('.thumbnail');

View File

@ -36,8 +36,31 @@ export default {
} }
const proxy = 'http://' + proxys[stringUtil.randomNum(0, proxys.length - 1)].link; const proxy = 'http://' + proxys[stringUtil.randomNum(0, proxys.length - 1)].link;
return new Promise(async (resolve, reject) => { return new Promise(async (resolve, reject) => {
let response
try { try {
let response = await request.get(url) response = await request.get(url)
.set('User-Agent', random_useragent.getRandom())
.proxy(proxy)
.retry(2)
.timeout(15000);
} catch (err) {
console.log('parse page with network error, try again :', url);
try {
response = await request.get(url)
.set('User-Agent', random_useragent.getRandom())
.proxy(proxy)
.retry(2)
.timeout(15000);
} catch (err2) {
reject(err2)
}
}
if(response.statusCode === 200 ){
resolve(response.text);
} else {
console.log('parse page with statusCode: ', statusCode, url);
try {
response = await request.get(url)
.set('User-Agent', random_useragent.getRandom()) .set('User-Agent', random_useragent.getRandom())
.proxy(proxy) .proxy(proxy)
.retry(2) .retry(2)
@ -45,10 +68,11 @@ export default {
if(response.statusCode === 200 ){ if(response.statusCode === 200 ){
resolve(response.text); resolve(response.text);
} else { } else {
reject(new Error(' server response code: ' + response.statusCode)); reject(new Error('parse page with error statusCode: ' + response.statusCode))
}
} catch (err2) {
reject(err2)
} }
} catch (err) {
reject(err);
} }
}) })
}, },