增加定时抓取的任务

This commit is contained in:
zhl 2019-05-09 17:26:04 +08:00
parent 68ad43e4e4
commit 0e6516f8d6
6 changed files with 47 additions and 3 deletions

View File

@ -10,6 +10,10 @@ let config = {
db: 'mongodb://localhost/ghost-development',
logs_path: '/Users/zhl/Documents/logs/spider',
download_path: '/Users/zhl/Documents/spider',
schedule: {
website: '0 0 2 * * *',
proxy: '1 0 0 * * *'
}
};
module.exports = config;

View File

@ -30,6 +30,7 @@
"mkdirp": "^0.5.1",
"mongoose": "^5.2.15",
"multi-progress": "^2.0.0",
"node-schedule": "^1.3.2",
"proxy-lists": "^1.16.0",
"random-useragent": "^0.3.1",
"request": "^2.88.0",

View File

@ -10,6 +10,7 @@ import bookChapter from './sites/bookChapter';
import dandanzan from './sites/dandanzan';
import proxy from './sites/proxy';
import commonTask from './sites/common';
import websiteSchedule from './schedule/website.schedule';
mongoose.Promise = Promise;
@ -27,7 +28,8 @@ db.once('open', async function () {
// movie.run();
// bookChapter.run();
// await dandanzan.run();
await commonTask.run();
// await commonTask.run();
websiteSchedule.scheduleUpdateAll();
});
mongoose.connect(config.db, {promiseLibrary: Promise, useNewUrlParser: true});

View File

@ -0,0 +1,31 @@
import schedule from 'node-schedule';
import config from '../../config/config';
import dandanzan from '../sites/dandanzan';
import commonTask from '../sites/common';
import proxy from "../sites/proxy";
export default {
scheduleUpdateAll() {
schedule.scheduleJob(config.schedule.website, async () => {
try {
await dandanzan.run();
} catch (err) {
console.log('error execute dandanzan');
}
try {
await commonTask.run();
} catch (err) {
console.log('error execute commonTask');
}
});
console.log('已添加更新dandanzan电影的任务');
schedule.scheduleJob(config.schedule.proxy, async () => {
try {
await proxy.run();
} catch (err) {
console.log('error execute proxy');
}
});
console.log('已添加更proxy的定时任务');
},
}

View File

@ -3,6 +3,12 @@ import netUtil from "../utils/net.util";
import cheerio from "cheerio";
import generalQueue from '../utils/general.queue';
import proxyUtil from '../utils/proxy.util';
/**
* 负载抓取开放代理的task
* 1. www.xiladaili.com
* 2. www.xicidaili.com
* 3. www.nimadaili.com
* */
/* *
* 获取www.xiladaili.com的代理数据
@ -171,7 +177,7 @@ const checkAllProxy = async (all) => {
export default {
run: async () => {
try {
// await parseXiladaili(50, 50);
await parseXiladaili(50);
// await parseXicidaili(20);
await parseNimadailidaili(50);
await checkAllProxy(true);

View File

@ -11,7 +11,7 @@ let q = async.queue( async (reqObj, cb) => {
} catch (err) {
cb(err);
}
}, 20);
}, 30);
q.drain = function(){
console.info('all queue done');
console.timeEnd('all');