增加定时抓取的任务
This commit is contained in:
parent
68ad43e4e4
commit
0e6516f8d6
@ -10,6 +10,10 @@ let config = {
|
|||||||
db: 'mongodb://localhost/ghost-development',
|
db: 'mongodb://localhost/ghost-development',
|
||||||
logs_path: '/Users/zhl/Documents/logs/spider',
|
logs_path: '/Users/zhl/Documents/logs/spider',
|
||||||
download_path: '/Users/zhl/Documents/spider',
|
download_path: '/Users/zhl/Documents/spider',
|
||||||
|
schedule: {
|
||||||
|
website: '0 0 2 * * *',
|
||||||
|
proxy: '1 0 0 * * *'
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports = config;
|
module.exports = config;
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
"mkdirp": "^0.5.1",
|
"mkdirp": "^0.5.1",
|
||||||
"mongoose": "^5.2.15",
|
"mongoose": "^5.2.15",
|
||||||
"multi-progress": "^2.0.0",
|
"multi-progress": "^2.0.0",
|
||||||
|
"node-schedule": "^1.3.2",
|
||||||
"proxy-lists": "^1.16.0",
|
"proxy-lists": "^1.16.0",
|
||||||
"random-useragent": "^0.3.1",
|
"random-useragent": "^0.3.1",
|
||||||
"request": "^2.88.0",
|
"request": "^2.88.0",
|
||||||
|
@ -10,6 +10,7 @@ import bookChapter from './sites/bookChapter';
|
|||||||
import dandanzan from './sites/dandanzan';
|
import dandanzan from './sites/dandanzan';
|
||||||
import proxy from './sites/proxy';
|
import proxy from './sites/proxy';
|
||||||
import commonTask from './sites/common';
|
import commonTask from './sites/common';
|
||||||
|
import websiteSchedule from './schedule/website.schedule';
|
||||||
mongoose.Promise = Promise;
|
mongoose.Promise = Promise;
|
||||||
|
|
||||||
|
|
||||||
@ -27,7 +28,8 @@ db.once('open', async function () {
|
|||||||
// movie.run();
|
// movie.run();
|
||||||
// bookChapter.run();
|
// bookChapter.run();
|
||||||
// await dandanzan.run();
|
// await dandanzan.run();
|
||||||
await commonTask.run();
|
// await commonTask.run();
|
||||||
|
websiteSchedule.scheduleUpdateAll();
|
||||||
});
|
});
|
||||||
mongoose.connect(config.db, {promiseLibrary: Promise, useNewUrlParser: true});
|
mongoose.connect(config.db, {promiseLibrary: Promise, useNewUrlParser: true});
|
||||||
|
|
||||||
|
31
src/schedule/website.schedule.js
Normal file
31
src/schedule/website.schedule.js
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import schedule from 'node-schedule';
|
||||||
|
import config from '../../config/config';
|
||||||
|
import dandanzan from '../sites/dandanzan';
|
||||||
|
import commonTask from '../sites/common';
|
||||||
|
import proxy from "../sites/proxy";
|
||||||
|
|
||||||
|
export default {
|
||||||
|
scheduleUpdateAll() {
|
||||||
|
schedule.scheduleJob(config.schedule.website, async () => {
|
||||||
|
try {
|
||||||
|
await dandanzan.run();
|
||||||
|
} catch (err) {
|
||||||
|
console.log('error execute dandanzan');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await commonTask.run();
|
||||||
|
} catch (err) {
|
||||||
|
console.log('error execute commonTask');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
console.log('已添加更新dandanzan电影的任务');
|
||||||
|
schedule.scheduleJob(config.schedule.proxy, async () => {
|
||||||
|
try {
|
||||||
|
await proxy.run();
|
||||||
|
} catch (err) {
|
||||||
|
console.log('error execute proxy');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
console.log('已添加更proxy的定时任务');
|
||||||
|
},
|
||||||
|
}
|
@ -3,6 +3,12 @@ import netUtil from "../utils/net.util";
|
|||||||
import cheerio from "cheerio";
|
import cheerio from "cheerio";
|
||||||
import generalQueue from '../utils/general.queue';
|
import generalQueue from '../utils/general.queue';
|
||||||
import proxyUtil from '../utils/proxy.util';
|
import proxyUtil from '../utils/proxy.util';
|
||||||
|
/**
|
||||||
|
* 负载抓取开放代理的task
|
||||||
|
* 1. www.xiladaili.com
|
||||||
|
* 2. www.xicidaili.com
|
||||||
|
* 3. www.nimadaili.com
|
||||||
|
* */
|
||||||
|
|
||||||
/* *
|
/* *
|
||||||
* 获取www.xiladaili.com的代理数据
|
* 获取www.xiladaili.com的代理数据
|
||||||
@ -171,7 +177,7 @@ const checkAllProxy = async (all) => {
|
|||||||
export default {
|
export default {
|
||||||
run: async () => {
|
run: async () => {
|
||||||
try {
|
try {
|
||||||
// await parseXiladaili(50, 50);
|
await parseXiladaili(50);
|
||||||
// await parseXicidaili(20);
|
// await parseXicidaili(20);
|
||||||
await parseNimadailidaili(50);
|
await parseNimadailidaili(50);
|
||||||
await checkAllProxy(true);
|
await checkAllProxy(true);
|
||||||
|
@ -11,7 +11,7 @@ let q = async.queue( async (reqObj, cb) => {
|
|||||||
} catch (err) {
|
} catch (err) {
|
||||||
cb(err);
|
cb(err);
|
||||||
}
|
}
|
||||||
}, 20);
|
}, 30);
|
||||||
q.drain = function(){
|
q.drain = function(){
|
||||||
console.info('all queue done');
|
console.info('all queue done');
|
||||||
console.timeEnd('all');
|
console.timeEnd('all');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user