diff --git a/docs/qmxs.md b/docs/qmxs.md new file mode 100644 index 0000000..5176b43 --- /dev/null +++ b/docs/qmxs.md @@ -0,0 +1,78 @@ +# 全民小说 + +## 分类列表 + +https://bc.dzjgmp.com/core/book/type.api?sex=1 + +| 字段 | 类型 | 说明 | +|----|----|----| +|sex| Number | 性别 1:男, 2:女 | + +Response: + +``` +{ code: 1, + data: + [ { ltype_id: 0, + ltype_image: 'https://res.jnmzsd.com/create/15457022763611.png', + ltype_name: '玄幻', + ltype_list: [Array] }, + ] +} +``` + +## 列表查询接口 + +https://bc.dzjgmp.com/core/book/type_all.api?sex=1<ype=0&stype=-1&status=-1&page=1&limit=20 + +| 字段 | 类型 | 说明 | +|----|----|----| +|sex| Number | 性别 0:未指定,1:男, 2:女 | +|ltype| Number | 分类的id,从0开始,具体含义从分类列表获取 | +|stype| Number | 子分类的id,-1:所有,其他:从分类列表的ltype_list获取 | +|status| Number |连载状态 -1:所有,1: 完结,2:连载 | +|page| Number | 分页页码,从1开始 | +|limit| Number | 每页数量 | + +Response: + +``` +{ code: 1, + data: + { total: 34248, + current_page: 1, + limit: 20, + from: 1, + to: 20, + last_page: 1713, + list: [] + } +} +``` + +单本书的结构 +``` +{ book_id: 668, + name: '逆天邪神', + author: '火星引力', + image: 'https://res.jnmzsd.com/zssq/308/542a5838a5ae10f815039a7f.jpeg', + sex: 1, + ltype: '玄幻', + stype: '东方玄幻', + remark: '掌天毒之珠,承邪神之血,修逆天之力,一代邪神,君临天下!【添加微信公众号:火星引力】【我们的yy频道:49554】......各位书友要是觉得《逆天邪神》还不错的话请不要忘记向您QQ群和微博里的朋友推荐哦!', + status: 2, + last_chapter: '第1385章 极怒凤炎', + updated_at: '2019-04-02 12:20:00' } +``` + +## 书籍详情 + +https://cache.dzjgmp.com/book.api?book_id=10887 + +## 数据章节列表 + +https://cache.dzjgmp.com/book_chapter.api?book_id=10887&site_id=wsg + +## 章节详情 + +https://cache.dzjgmp.com/chapter.api?chapter_id=19873&crawl_book_id=839413&id=418&site_id=wsg diff --git a/src/app.js b/src/app.js index d41840b..1751892 100644 --- a/src/app.js +++ b/src/app.js @@ -4,6 +4,7 @@ import logger from './utils/logger'; import glob from 'glob'; import config from '../config/config'; import hoh8 from './sites/hoh8'; +import book from './sites/book'; mongoose.Promise = Promise; @@ -16,7 +17,8 @@ db.on('error', function (err) { }); db.once('open', function () { logger.info('Connected to db.'); - hoh8.run(); + // hoh8.run(); + book.run(); }); mongoose.connect(config.db, {promiseLibrary: Promise, useNewUrlParser: true}); diff --git a/src/models/SpiderCategory.js b/src/models/SpiderCategory.js new file mode 100644 index 0000000..9283f79 --- /dev/null +++ b/src/models/SpiderCategory.js @@ -0,0 +1,23 @@ +import mongoose from 'mongoose'; + +let Schema = mongoose.Schema; + + +let SpiderCategorySchema = new Schema({ + data: {type: Schema.Types.Mixed}, + type: {type: String}, + status: {type: Number, default: 0} +}, { + collection: 'spider_category', + timestamps: true +}); + +class SpiderCategoryClass { +} + +SpiderCategorySchema.loadClass(SpiderCategoryClass); + + +let SpiderCategoryModel = mongoose.model('SpiderCategory', SpiderCategorySchema); + +export default SpiderCategoryModel; diff --git a/src/models/SpiderData.js b/src/models/SpiderData.js index a2a081d..5f06c2c 100644 --- a/src/models/SpiderData.js +++ b/src/models/SpiderData.js @@ -4,7 +4,6 @@ let Schema = mongoose.Schema; let SpiderDataSchema = new Schema({ - id: {type: Number}, data: {type: Schema.Types.Mixed}, type: {type: String}, status: {type: Number, default: 0} diff --git a/src/sites/book.js b/src/sites/book.js new file mode 100644 index 0000000..8c411bf --- /dev/null +++ b/src/sites/book.js @@ -0,0 +1,112 @@ +import netUtil from '../utils/net.util'; +import SpiderCategory from '../models/SpiderCategory'; +import SpiderData from '../models/SpiderData'; + +const generateHeader = () => { + return { + 'User-Agent': 'quanminxiaoshuo_apple/1.0.9 CFNetwork/976 Darwin/18.2.0', + 'ver': '5.0', + 'pt': 'ios', + 'package': 'quanminxiaoshuo', + 'token': 'tc3ahpxco5ads8utnsbhxfuypkstc2oi', + 'user': '2300097' + } +} +/** + * 获取分类列表 + * @param {number} sex 性别 1: 男,2:女 + * */ +const categoryList = async (sex) => { + let url = `https://bc.dzjgmp.com/core/book/type.api?sex=${0}` + let data = await netUtil.getData(url, generateHeader()); + data = JSON.parse(data); + for (let obj of data.data) { + obj.sex = sex; + let record = new SpiderCategory({ + data: obj, + type: 'book' + }); + await record.save(); + } +} +/** + * 获取某分类下所有书籍列表 + * @param {number} sex sex 性别 1: 男,2:女 + * @param {number} ltype 大类 + * @param {number} page 分页 1开始 + * */ +const bookList = async (sex, ltype, page) => { + console.log(`get book list, sex: ${sex}, ltype: ${ltype}, page: ${page}`); + let url = `https://bc.dzjgmp.com/core/book/type_all.api?sex=${sex}<ype=${ltype}&stype=-1&status=-1&page=${page}&limit=20`; + let data = await netUtil.getData(url, generateHeader()); + data = JSON.parse(data); + return data; +} + +const bookInfo = async (bookId) => { + console.log(`parse book: ${bookId}`); + let url = `https://cache.dzjgmp.com/book.api?book_id=${bookId}`; + let data = await netUtil.getData(url, generateHeader()); + data = JSON.parse(data); + return data.data; +} +// 处理单页的列表数据 +const parsePageObj = async (dataArr) => { + for(let data of dataArr) { + let record = new SpiderData({ + data: data, + type: 'book' + }) + await record.save(); + } +} +// 获取所有分类下的数据列表 +const parseAllCategory = async () => { + try { + let cateList = await SpiderCategory.find({type: 'book'}); + for(const cate of cateList) { + let firstPage = await bookList(cate.data.sex, cate.data.ltype_id, 1); + let totalPage = firstPage.data.last_page; + cate.data.total = firstPage.data.total; + cate.markModified('data'); + await cate.save(); + console.log(`parse ltype: ${cate.data.ltype_id}, totalPage: ${totalPage}`); + await parsePageObj(firstPage.data.list); + for(let i = 2; i <= totalPage; i ++) { + let pageObj = await bookList(cate.data.sex, cate.data.ltype_id, i); + await parsePageObj(pageObj.data.list); + } + } + } catch (err) { + console.log(err); + } +} + +const parseAllBookList = async () => { + let bookList = await SpiderData.find({status: 0}).sort({'data.book_id': 1}).limit(1); + for(let book of bookList) { + console.time('oneBook'); + let record = await bookInfo(book.data.book_id); + for (const key in record) { + if ({}.hasOwnProperty.call(record, key)) { + book.data[key] = record[key]; + } + } + book.markModified('data'); + console.timeEnd('oneBook') + await book.save(); + } +} + + + +export default { + run: async () => { + // await categoryList(1); + // await categoryList(2); + // await bookList(1, 0, 1); + // 获取所有分类下的数据列表 + // await parseAllCategory(); + await parseAllBookList(); + } +} diff --git a/src/utils/net.util.js b/src/utils/net.util.js index 0c82a76..b766b70 100644 --- a/src/utils/net.util.js +++ b/src/utils/net.util.js @@ -12,16 +12,28 @@ const requestData = (options) => { }); } export default { - postForm(url, data) { + postForm(url, data, header) { + header = header || { + 'Cache-Control': 'no-cache', + 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', + } const options = { method: 'POST', url: url, - headers: { - 'Cache-Control': 'no-cache', - 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', - }, + headers: header, form: data, }; return requestData(options); + }, + getData(url, header) { + header = header || { + 'Cache-Control': 'no-cache', + } + const options = { + method: 'GET', + url: url, + headers: header, + }; + return requestData(options); } }