From 102e92b0166052f5ed4dad980b36c27a1db1ce92 Mon Sep 17 00:00:00 2001 From: zhl Date: Thu, 9 May 2019 13:51:04 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=8A=93=E5=8F=96session=20m?= =?UTF-8?q?odel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/models/spider/CrawlRecord.js | 1 - src/models/spider/CrawlSession.js | 36 +++++++++++++++++++++++++++++++ src/sites/dandanzan.js | 4 ++-- 3 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 src/models/spider/CrawlSession.js diff --git a/src/models/spider/CrawlRecord.js b/src/models/spider/CrawlRecord.js index 9307f22..9c029a2 100644 --- a/src/models/spider/CrawlRecord.js +++ b/src/models/spider/CrawlRecord.js @@ -1,5 +1,4 @@ import mongoose from 'mongoose'; -import MovieModel from "../Movies"; let Schema = mongoose.Schema; diff --git a/src/models/spider/CrawlSession.js b/src/models/spider/CrawlSession.js new file mode 100644 index 0000000..89a8d6c --- /dev/null +++ b/src/models/spider/CrawlSession.js @@ -0,0 +1,36 @@ +import mongoose from 'mongoose'; + +let Schema = mongoose.Schema; + +let CrawlSessionSchema = new Schema({ + beginTime: {type: Date}, + endTime: {type: Date}, + name: {type: String}, + step: {type: Number}, + // YYYY-MM-DD + day: {type: String}, + // 0: 进行中, 1:已结束 + status: {Number} +}, { + collection: 'crawl_session', + timestamps: true +}); + +class CrawlSessionClass { + static async updateRecord(record) { + const query = {url: record.url}; + const options = {upsert: true, setDefaultsOnInsert:true}; + let incObj = {tryCount: 1} + if (!record.lastStatus) { + incObj.errCount = 1; + } + record['$inc'] = incObj; + record.lastTry = new Date(); + await CrawlSessionModel.update(query, record, options); + } +} +CrawlSessionSchema.loadClass(CrawlSessionClass); + +let CrawlSessionModel = mongoose.model('CrawlSession', CrawlSessionSchema); + +export default CrawlSessionModel; diff --git a/src/sites/dandanzan.js b/src/sites/dandanzan.js index 339d7c7..1ed5102 100644 --- a/src/sites/dandanzan.js +++ b/src/sites/dandanzan.js @@ -154,12 +154,12 @@ const parseListPage = async ({idx, category}) => { return 1; } } -const parseAllMovie = async (category) => { +const parseAllMovie = async (category, beginNo = 1) => { console.time('all'); let allPageNo = await parseListPage({idx: 0, category: category}); console.log('app page is', allPageNo); if (allPageNo > 1) { - for (let i = 1; i <= allPageNo; i++) { + for (let i = beginNo; i <= allPageNo; i++) { await parseListPage({idx: i, category: category}); } }