增加nimadaili.com代理数据的抓取
This commit is contained in:
parent
526b96ed78
commit
e1dcbabd1f
@ -12,7 +12,7 @@ const parseOneXiladaili = async (idx) => {
|
||||
let url = `http://www.xiladaili.com/https/${idx + 1}/`;
|
||||
console.log('begin parse page:', url);
|
||||
try {
|
||||
let html = await netUtil.getData(url, {})
|
||||
let html = await proxyUtil.getDataProxy(url)
|
||||
if (html) {
|
||||
const $ = cheerio.load(html);
|
||||
const trArr = $('.fl-table tbody tr');
|
||||
@ -27,7 +27,7 @@ const parseOneXiladaili = async (idx) => {
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
console.log(`#######################error parse page:`, url);
|
||||
}
|
||||
}
|
||||
/* *
|
||||
@ -35,10 +35,10 @@ const parseOneXiladaili = async (idx) => {
|
||||
* https://www.xicidaili.com/nn/1
|
||||
* */
|
||||
const parseOneXicidaili = async (idx) => {
|
||||
let url = `https://www.xicidaili.com/nn/${idx + 1}`;
|
||||
let url = `https://www.xicidaili.com/wn/${idx + 1}`;
|
||||
console.log('begin parse page:', url);
|
||||
try {
|
||||
let html = await proxyUtil.getData(url)
|
||||
let html = await proxyUtil.getDataProxy(url)
|
||||
if (html) {
|
||||
const $ = cheerio.load(html);
|
||||
const trs = $("#ip_list tr");
|
||||
@ -62,10 +62,34 @@ const parseOneXicidaili = async (idx) => {
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
console.log(`#######################error parse page:`, url);
|
||||
}
|
||||
}
|
||||
|
||||
// http://www.nimadaili.com/https/1/
|
||||
const parseNimadaili = async (idx) => {
|
||||
let url = `http://www.nimadaili.com/https/${idx + 1}/`;
|
||||
console.log('begin parse page:', url);
|
||||
try {
|
||||
let html = await proxyUtil.getDataProxy(url)
|
||||
if (html) {
|
||||
const $ = cheerio.load(html);
|
||||
const trArr = $('.fl-table tbody tr');
|
||||
$(trArr).each(async function (i, tr) {
|
||||
const link = $(this).find('td').first().text();
|
||||
const record = {
|
||||
link: link,
|
||||
type: 'http',
|
||||
info: $(this).find('td').eq(3).text()
|
||||
}
|
||||
await ProxyInfo.updateOne(link, record)
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(`#######################error parse page:`, url);
|
||||
}
|
||||
}
|
||||
// 检查代理状态
|
||||
const checkAndUpdate = async (record) => {
|
||||
console.log('begin check:', record.link);
|
||||
try {
|
||||
@ -75,6 +99,7 @@ const checkAndUpdate = async (record) => {
|
||||
} catch (err) {
|
||||
console.log('not available:', record.link);
|
||||
record.status = -1;
|
||||
record.err_count += 1;
|
||||
}
|
||||
record.try_count += 1;
|
||||
record.last_check = new Date();
|
||||
@ -86,8 +111,9 @@ const checkAndUpdate = async (record) => {
|
||||
return record.status > 0;
|
||||
}
|
||||
|
||||
const parseXiladaili = async (maxPage) => {
|
||||
for (let i = 0; i < maxPage; i++) {
|
||||
const parseXiladaili = async (pageCount, beginPage = 0) => {
|
||||
const maxPage = beginPage + pageCount;
|
||||
for (let i = beginPage; i < maxPage; i++) {
|
||||
try {
|
||||
await parseOneXiladaili(i);
|
||||
} catch (err) {
|
||||
@ -106,7 +132,18 @@ const parseXicidaili = async (maxPage) => {
|
||||
}
|
||||
console.log('finish parse all page, Xicidaili');
|
||||
}
|
||||
const checkAllProxy = async () => {
|
||||
const parseNimadailidaili = async (pageCount, beginPage = 0) => {
|
||||
const maxPage = beginPage + pageCount;
|
||||
for (let i = beginPage; i < maxPage; i++) {
|
||||
try {
|
||||
await parseNimadaili(i);
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
}
|
||||
}
|
||||
console.log('finish parse all page, nimadailidaili');
|
||||
}
|
||||
const checkAllProxy = async (all) => {
|
||||
let records;
|
||||
return new Promise(async (resolve, reject) => {
|
||||
console.time('all')
|
||||
@ -115,7 +152,7 @@ const checkAllProxy = async () => {
|
||||
resolve()
|
||||
})
|
||||
try {
|
||||
records = await ProxyInfo.needCheckList();
|
||||
records = await ProxyInfo.needCheckList(all);
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
@ -134,9 +171,10 @@ const checkAllProxy = async () => {
|
||||
export default {
|
||||
run: async () => {
|
||||
try {
|
||||
// await parseXiladaili(20);
|
||||
// await parseXiladaili(50, 50);
|
||||
// await parseXicidaili(20);
|
||||
await checkAllProxy();
|
||||
await parseNimadailidaili(50);
|
||||
await checkAllProxy(true);
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user