spider/src/sites/book.js
2019-04-03 09:34:04 +08:00

113 lines
3.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import netUtil from '../utils/net.util';
import SpiderCategory from '../models/SpiderCategory';
import SpiderData from '../models/SpiderData';
const generateHeader = () => {
return {
'User-Agent': 'quanminxiaoshuo_apple/1.0.9 CFNetwork/976 Darwin/18.2.0',
'ver': '5.0',
'pt': 'ios',
'package': 'quanminxiaoshuo',
'token': 'tc3ahpxco5ads8utnsbhxfuypkstc2oi',
'user': '2300097'
}
}
/**
* 获取分类列表
* @param {number} sex 性别 1: 男2
* */
const categoryList = async (sex) => {
let url = `https://bc.dzjgmp.com/core/book/type.api?sex=${0}`
let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data);
for (let obj of data.data) {
obj.sex = sex;
let record = new SpiderCategory({
data: obj,
type: 'book'
});
await record.save();
}
}
/**
* 获取某分类下所有书籍列表
* @param {number} sex sex 性别 1: 男2
* @param {number} ltype 大类
* @param {number} page 分页 1开始
* */
const bookList = async (sex, ltype, page) => {
console.log(`get book list, sex: ${sex}, ltype: ${ltype}, page: ${page}`);
let url = `https://bc.dzjgmp.com/core/book/type_all.api?sex=${sex}&ltype=${ltype}&stype=-1&status=-1&page=${page}&limit=20`;
let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data);
return data;
}
const bookInfo = async (bookId) => {
console.log(`parse book: ${bookId}`);
let url = `https://cache.dzjgmp.com/book.api?book_id=${bookId}`;
let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data);
return data.data;
}
// 处理单页的列表数据
const parsePageObj = async (dataArr) => {
for(let data of dataArr) {
let record = new SpiderData({
data: data,
type: 'book'
})
await record.save();
}
}
// 获取所有分类下的数据列表
const parseAllCategory = async () => {
try {
let cateList = await SpiderCategory.find({type: 'book'});
for(const cate of cateList) {
let firstPage = await bookList(cate.data.sex, cate.data.ltype_id, 1);
let totalPage = firstPage.data.last_page;
cate.data.total = firstPage.data.total;
cate.markModified('data');
await cate.save();
console.log(`parse ltype: ${cate.data.ltype_id}, totalPage: ${totalPage}`);
await parsePageObj(firstPage.data.list);
for(let i = 2; i <= totalPage; i ++) {
let pageObj = await bookList(cate.data.sex, cate.data.ltype_id, i);
await parsePageObj(pageObj.data.list);
}
}
} catch (err) {
console.log(err);
}
}
const parseAllBookList = async () => {
let bookList = await SpiderData.find({status: 0}).sort({'data.book_id': 1}).limit(1);
for(let book of bookList) {
console.time('oneBook');
let record = await bookInfo(book.data.book_id);
for (const key in record) {
if ({}.hasOwnProperty.call(record, key)) {
book.data[key] = record[key];
}
}
book.markModified('data');
console.timeEnd('oneBook')
await book.save();
}
}
export default {
run: async () => {
// await categoryList(1);
// await categoryList(2);
// await bookList(1, 0, 1);
// 获取所有分类下的数据列表
// await parseAllCategory();
await parseAllBookList();
}
}