增加nimadaili.com代理数据的抓取

This commit is contained in:
zhl 2019-05-09 16:32:33 +08:00
parent 526b96ed78
commit e1dcbabd1f

View File

@ -12,7 +12,7 @@ const parseOneXiladaili = async (idx) => {
let url = `http://www.xiladaili.com/https/${idx + 1}/`;
console.log('begin parse page:', url);
try {
let html = await netUtil.getData(url, {})
let html = await proxyUtil.getDataProxy(url)
if (html) {
const $ = cheerio.load(html);
const trArr = $('.fl-table tbody tr');
@ -27,7 +27,7 @@ const parseOneXiladaili = async (idx) => {
});
}
} catch (err) {
console.log(err);
console.log(`#######################error parse page:`, url);
}
}
/* *
@ -35,10 +35,10 @@ const parseOneXiladaili = async (idx) => {
* https://www.xicidaili.com/nn/1
* */
const parseOneXicidaili = async (idx) => {
let url = `https://www.xicidaili.com/nn/${idx + 1}`;
let url = `https://www.xicidaili.com/wn/${idx + 1}`;
console.log('begin parse page:', url);
try {
let html = await proxyUtil.getData(url)
let html = await proxyUtil.getDataProxy(url)
if (html) {
const $ = cheerio.load(html);
const trs = $("#ip_list tr");
@ -62,10 +62,34 @@ const parseOneXicidaili = async (idx) => {
}
}
} catch (err) {
console.log(err);
console.log(`#######################error parse page:`, url);
}
}
// http://www.nimadaili.com/https/1/
const parseNimadaili = async (idx) => {
let url = `http://www.nimadaili.com/https/${idx + 1}/`;
console.log('begin parse page:', url);
try {
let html = await proxyUtil.getDataProxy(url)
if (html) {
const $ = cheerio.load(html);
const trArr = $('.fl-table tbody tr');
$(trArr).each(async function (i, tr) {
const link = $(this).find('td').first().text();
const record = {
link: link,
type: 'http',
info: $(this).find('td').eq(3).text()
}
await ProxyInfo.updateOne(link, record)
});
}
} catch (err) {
console.log(`#######################error parse page:`, url);
}
}
// 检查代理状态
const checkAndUpdate = async (record) => {
console.log('begin check:', record.link);
try {
@ -75,6 +99,7 @@ const checkAndUpdate = async (record) => {
} catch (err) {
console.log('not available:', record.link);
record.status = -1;
record.err_count += 1;
}
record.try_count += 1;
record.last_check = new Date();
@ -86,8 +111,9 @@ const checkAndUpdate = async (record) => {
return record.status > 0;
}
const parseXiladaili = async (maxPage) => {
for (let i = 0; i < maxPage; i++) {
const parseXiladaili = async (pageCount, beginPage = 0) => {
const maxPage = beginPage + pageCount;
for (let i = beginPage; i < maxPage; i++) {
try {
await parseOneXiladaili(i);
} catch (err) {
@ -106,7 +132,18 @@ const parseXicidaili = async (maxPage) => {
}
console.log('finish parse all page, Xicidaili');
}
const checkAllProxy = async () => {
const parseNimadailidaili = async (pageCount, beginPage = 0) => {
const maxPage = beginPage + pageCount;
for (let i = beginPage; i < maxPage; i++) {
try {
await parseNimadaili(i);
} catch (err) {
console.log(err);
}
}
console.log('finish parse all page, nimadailidaili');
}
const checkAllProxy = async (all) => {
let records;
return new Promise(async (resolve, reject) => {
console.time('all')
@ -115,7 +152,7 @@ const checkAllProxy = async () => {
resolve()
})
try {
records = await ProxyInfo.needCheckList();
records = await ProxyInfo.needCheckList(all);
} catch (err) {
reject(err);
}
@ -134,9 +171,10 @@ const checkAllProxy = async () => {
export default {
run: async () => {
try {
// await parseXiladaili(20);
// await parseXiladaili(50, 50);
// await parseXicidaili(20);
await checkAllProxy();
await parseNimadailidaili(50);
await checkAllProxy(true);
} catch (err) {
console.log(err);
}