From e1dcbabd1fb59b7f1aae1c011216c3fd79f13b39 Mon Sep 17 00:00:00 2001 From: zhl Date: Thu, 9 May 2019 16:32:33 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0nimadaili.com=E4=BB=A3?= =?UTF-8?q?=E7=90=86=E6=95=B0=E6=8D=AE=E7=9A=84=E6=8A=93=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/sites/proxy.js | 60 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/src/sites/proxy.js b/src/sites/proxy.js index 2773425..9bd0ce6 100644 --- a/src/sites/proxy.js +++ b/src/sites/proxy.js @@ -12,7 +12,7 @@ const parseOneXiladaili = async (idx) => { let url = `http://www.xiladaili.com/https/${idx + 1}/`; console.log('begin parse page:', url); try { - let html = await netUtil.getData(url, {}) + let html = await proxyUtil.getDataProxy(url) if (html) { const $ = cheerio.load(html); const trArr = $('.fl-table tbody tr'); @@ -27,7 +27,7 @@ const parseOneXiladaili = async (idx) => { }); } } catch (err) { - console.log(err); + console.log(`#######################error parse page:`, url); } } /* * @@ -35,10 +35,10 @@ const parseOneXiladaili = async (idx) => { * https://www.xicidaili.com/nn/1 * */ const parseOneXicidaili = async (idx) => { - let url = `https://www.xicidaili.com/nn/${idx + 1}`; + let url = `https://www.xicidaili.com/wn/${idx + 1}`; console.log('begin parse page:', url); try { - let html = await proxyUtil.getData(url) + let html = await proxyUtil.getDataProxy(url) if (html) { const $ = cheerio.load(html); const trs = $("#ip_list tr"); @@ -62,10 +62,34 @@ const parseOneXicidaili = async (idx) => { } } } catch (err) { - console.log(err); + console.log(`#######################error parse page:`, url); } } +// http://www.nimadaili.com/https/1/ +const parseNimadaili = async (idx) => { + let url = `http://www.nimadaili.com/https/${idx + 1}/`; + console.log('begin parse page:', url); + try { + let html = await proxyUtil.getDataProxy(url) + if (html) { + const $ = cheerio.load(html); + const trArr = $('.fl-table tbody tr'); + $(trArr).each(async function (i, tr) { + const link = $(this).find('td').first().text(); + const record = { + link: link, + type: 'http', + info: $(this).find('td').eq(3).text() + } + await ProxyInfo.updateOne(link, record) + }); + } + } catch (err) { + console.log(`#######################error parse page:`, url); + } +} +// 检查代理状态 const checkAndUpdate = async (record) => { console.log('begin check:', record.link); try { @@ -75,6 +99,7 @@ const checkAndUpdate = async (record) => { } catch (err) { console.log('not available:', record.link); record.status = -1; + record.err_count += 1; } record.try_count += 1; record.last_check = new Date(); @@ -86,8 +111,9 @@ const checkAndUpdate = async (record) => { return record.status > 0; } -const parseXiladaili = async (maxPage) => { - for (let i = 0; i < maxPage; i++) { +const parseXiladaili = async (pageCount, beginPage = 0) => { + const maxPage = beginPage + pageCount; + for (let i = beginPage; i < maxPage; i++) { try { await parseOneXiladaili(i); } catch (err) { @@ -106,7 +132,18 @@ const parseXicidaili = async (maxPage) => { } console.log('finish parse all page, Xicidaili'); } -const checkAllProxy = async () => { +const parseNimadailidaili = async (pageCount, beginPage = 0) => { + const maxPage = beginPage + pageCount; + for (let i = beginPage; i < maxPage; i++) { + try { + await parseNimadaili(i); + } catch (err) { + console.log(err); + } + } + console.log('finish parse all page, nimadailidaili'); +} +const checkAllProxy = async (all) => { let records; return new Promise(async (resolve, reject) => { console.time('all') @@ -115,7 +152,7 @@ const checkAllProxy = async () => { resolve() }) try { - records = await ProxyInfo.needCheckList(); + records = await ProxyInfo.needCheckList(all); } catch (err) { reject(err); } @@ -134,9 +171,10 @@ const checkAllProxy = async () => { export default { run: async () => { try { - // await parseXiladaili(20); + // await parseXiladaili(50, 50); // await parseXicidaili(20); - await checkAllProxy(); + await parseNimadailidaili(50); + await checkAllProxy(true); } catch (err) { console.log(err); }