添加抓取session model

This commit is contained in:
zhl 2019-05-09 13:51:04 +08:00
parent 52fa5d7cb9
commit 102e92b016
3 changed files with 38 additions and 3 deletions

View File

@ -1,5 +1,4 @@
import mongoose from 'mongoose';
import MovieModel from "../Movies";
let Schema = mongoose.Schema;

View File

@ -0,0 +1,36 @@
import mongoose from 'mongoose';
let Schema = mongoose.Schema;
let CrawlSessionSchema = new Schema({
beginTime: {type: Date},
endTime: {type: Date},
name: {type: String},
step: {type: Number},
// YYYY-MM-DD
day: {type: String},
// 0: 进行中, 1已结束
status: {Number}
}, {
collection: 'crawl_session',
timestamps: true
});
class CrawlSessionClass {
static async updateRecord(record) {
const query = {url: record.url};
const options = {upsert: true, setDefaultsOnInsert:true};
let incObj = {tryCount: 1}
if (!record.lastStatus) {
incObj.errCount = 1;
}
record['$inc'] = incObj;
record.lastTry = new Date();
await CrawlSessionModel.update(query, record, options);
}
}
CrawlSessionSchema.loadClass(CrawlSessionClass);
let CrawlSessionModel = mongoose.model('CrawlSession', CrawlSessionSchema);
export default CrawlSessionModel;

View File

@ -154,12 +154,12 @@ const parseListPage = async ({idx, category}) => {
return 1;
}
}
const parseAllMovie = async (category) => {
const parseAllMovie = async (category, beginNo = 1) => {
console.time('all');
let allPageNo = await parseListPage({idx: 0, category: category});
console.log('app page is', allPageNo);
if (allPageNo > 1) {
for (let i = 1; i <= allPageNo; i++) {
for (let i = beginNo; i <= allPageNo; i++) {
await parseListPage({idx: i, category: category});
}
}