From 0e6516f8d68db2e3292b96d5d17b15acb432989e Mon Sep 17 00:00:00 2001 From: zhl Date: Thu, 9 May 2019 17:26:04 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=AE=9A=E6=97=B6=E6=8A=93?= =?UTF-8?q?=E5=8F=96=E7=9A=84=E4=BB=BB=E5=8A=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/config.js.example | 4 ++++ package.json | 1 + src/app.js | 4 +++- src/schedule/website.schedule.js | 31 +++++++++++++++++++++++++++++++ src/sites/proxy.js | 8 +++++++- src/utils/general.queue.js | 2 +- 6 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 src/schedule/website.schedule.js diff --git a/config/config.js.example b/config/config.js.example index 9cf41d9..77645fe 100644 --- a/config/config.js.example +++ b/config/config.js.example @@ -10,6 +10,10 @@ let config = { db: 'mongodb://localhost/ghost-development', logs_path: '/Users/zhl/Documents/logs/spider', download_path: '/Users/zhl/Documents/spider', + schedule: { + website: '0 0 2 * * *', + proxy: '1 0 0 * * *' + } }; module.exports = config; diff --git a/package.json b/package.json index ae3c419..593f08a 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "mkdirp": "^0.5.1", "mongoose": "^5.2.15", "multi-progress": "^2.0.0", + "node-schedule": "^1.3.2", "proxy-lists": "^1.16.0", "random-useragent": "^0.3.1", "request": "^2.88.0", diff --git a/src/app.js b/src/app.js index caddd22..71c3300 100644 --- a/src/app.js +++ b/src/app.js @@ -10,6 +10,7 @@ import bookChapter from './sites/bookChapter'; import dandanzan from './sites/dandanzan'; import proxy from './sites/proxy'; import commonTask from './sites/common'; +import websiteSchedule from './schedule/website.schedule'; mongoose.Promise = Promise; @@ -27,7 +28,8 @@ db.once('open', async function () { // movie.run(); // bookChapter.run(); // await dandanzan.run(); - await commonTask.run(); + // await commonTask.run(); + websiteSchedule.scheduleUpdateAll(); }); mongoose.connect(config.db, {promiseLibrary: Promise, useNewUrlParser: true}); diff --git a/src/schedule/website.schedule.js b/src/schedule/website.schedule.js new file mode 100644 index 0000000..2840b68 --- /dev/null +++ b/src/schedule/website.schedule.js @@ -0,0 +1,31 @@ +import schedule from 'node-schedule'; +import config from '../../config/config'; +import dandanzan from '../sites/dandanzan'; +import commonTask from '../sites/common'; +import proxy from "../sites/proxy"; + +export default { + scheduleUpdateAll() { + schedule.scheduleJob(config.schedule.website, async () => { + try { + await dandanzan.run(); + } catch (err) { + console.log('error execute dandanzan'); + } + try { + await commonTask.run(); + } catch (err) { + console.log('error execute commonTask'); + } + }); + console.log('已添加更新dandanzan电影的任务'); + schedule.scheduleJob(config.schedule.proxy, async () => { + try { + await proxy.run(); + } catch (err) { + console.log('error execute proxy'); + } + }); + console.log('已添加更proxy的定时任务'); + }, +} diff --git a/src/sites/proxy.js b/src/sites/proxy.js index 9bd0ce6..4d9d602 100644 --- a/src/sites/proxy.js +++ b/src/sites/proxy.js @@ -3,6 +3,12 @@ import netUtil from "../utils/net.util"; import cheerio from "cheerio"; import generalQueue from '../utils/general.queue'; import proxyUtil from '../utils/proxy.util'; +/** + * 负载抓取开放代理的task + * 1. www.xiladaili.com + * 2. www.xicidaili.com + * 3. www.nimadaili.com + * */ /* * * 获取www.xiladaili.com的代理数据 @@ -171,7 +177,7 @@ const checkAllProxy = async (all) => { export default { run: async () => { try { - // await parseXiladaili(50, 50); + await parseXiladaili(50); // await parseXicidaili(20); await parseNimadailidaili(50); await checkAllProxy(true); diff --git a/src/utils/general.queue.js b/src/utils/general.queue.js index 36ade86..db5f06b 100644 --- a/src/utils/general.queue.js +++ b/src/utils/general.queue.js @@ -11,7 +11,7 @@ let q = async.queue( async (reqObj, cb) => { } catch (err) { cb(err); } -}, 20); +}, 30); q.drain = function(){ console.info('all queue done'); console.timeEnd('all');