增加定时抓取的任务
This commit is contained in:
parent
68ad43e4e4
commit
0e6516f8d6
@ -10,6 +10,10 @@ let config = {
|
||||
db: 'mongodb://localhost/ghost-development',
|
||||
logs_path: '/Users/zhl/Documents/logs/spider',
|
||||
download_path: '/Users/zhl/Documents/spider',
|
||||
schedule: {
|
||||
website: '0 0 2 * * *',
|
||||
proxy: '1 0 0 * * *'
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = config;
|
||||
|
@ -30,6 +30,7 @@
|
||||
"mkdirp": "^0.5.1",
|
||||
"mongoose": "^5.2.15",
|
||||
"multi-progress": "^2.0.0",
|
||||
"node-schedule": "^1.3.2",
|
||||
"proxy-lists": "^1.16.0",
|
||||
"random-useragent": "^0.3.1",
|
||||
"request": "^2.88.0",
|
||||
|
@ -10,6 +10,7 @@ import bookChapter from './sites/bookChapter';
|
||||
import dandanzan from './sites/dandanzan';
|
||||
import proxy from './sites/proxy';
|
||||
import commonTask from './sites/common';
|
||||
import websiteSchedule from './schedule/website.schedule';
|
||||
mongoose.Promise = Promise;
|
||||
|
||||
|
||||
@ -27,7 +28,8 @@ db.once('open', async function () {
|
||||
// movie.run();
|
||||
// bookChapter.run();
|
||||
// await dandanzan.run();
|
||||
await commonTask.run();
|
||||
// await commonTask.run();
|
||||
websiteSchedule.scheduleUpdateAll();
|
||||
});
|
||||
mongoose.connect(config.db, {promiseLibrary: Promise, useNewUrlParser: true});
|
||||
|
||||
|
31
src/schedule/website.schedule.js
Normal file
31
src/schedule/website.schedule.js
Normal file
@ -0,0 +1,31 @@
|
||||
import schedule from 'node-schedule';
|
||||
import config from '../../config/config';
|
||||
import dandanzan from '../sites/dandanzan';
|
||||
import commonTask from '../sites/common';
|
||||
import proxy from "../sites/proxy";
|
||||
|
||||
export default {
|
||||
scheduleUpdateAll() {
|
||||
schedule.scheduleJob(config.schedule.website, async () => {
|
||||
try {
|
||||
await dandanzan.run();
|
||||
} catch (err) {
|
||||
console.log('error execute dandanzan');
|
||||
}
|
||||
try {
|
||||
await commonTask.run();
|
||||
} catch (err) {
|
||||
console.log('error execute commonTask');
|
||||
}
|
||||
});
|
||||
console.log('已添加更新dandanzan电影的任务');
|
||||
schedule.scheduleJob(config.schedule.proxy, async () => {
|
||||
try {
|
||||
await proxy.run();
|
||||
} catch (err) {
|
||||
console.log('error execute proxy');
|
||||
}
|
||||
});
|
||||
console.log('已添加更proxy的定时任务');
|
||||
},
|
||||
}
|
@ -3,6 +3,12 @@ import netUtil from "../utils/net.util";
|
||||
import cheerio from "cheerio";
|
||||
import generalQueue from '../utils/general.queue';
|
||||
import proxyUtil from '../utils/proxy.util';
|
||||
/**
|
||||
* 负载抓取开放代理的task
|
||||
* 1. www.xiladaili.com
|
||||
* 2. www.xicidaili.com
|
||||
* 3. www.nimadaili.com
|
||||
* */
|
||||
|
||||
/* *
|
||||
* 获取www.xiladaili.com的代理数据
|
||||
@ -171,7 +177,7 @@ const checkAllProxy = async (all) => {
|
||||
export default {
|
||||
run: async () => {
|
||||
try {
|
||||
// await parseXiladaili(50, 50);
|
||||
await parseXiladaili(50);
|
||||
// await parseXicidaili(20);
|
||||
await parseNimadailidaili(50);
|
||||
await checkAllProxy(true);
|
||||
|
@ -11,7 +11,7 @@ let q = async.queue( async (reqObj, cb) => {
|
||||
} catch (err) {
|
||||
cb(err);
|
||||
}
|
||||
}, 20);
|
||||
}, 30);
|
||||
q.drain = function(){
|
||||
console.info('all queue done');
|
||||
console.timeEnd('all');
|
||||
|
Loading…
x
Reference in New Issue
Block a user