import netUtil from '../utils/net.util'; import SpiderCategory from '../models/SpiderCategory'; import SpiderData from '../models/SpiderData'; const generateHeader = () => { return { 'User-Agent': 'quanminxiaoshuo_apple/1.0.9 CFNetwork/976 Darwin/18.2.0', 'ver': '5.0', 'pt': 'ios', 'package': 'quanminxiaoshuo', 'token': 'tc3ahpxco5ads8utnsbhxfuypkstc2oi', 'user': '2300097' } } /** * 获取分类列表 * @param {number} sex 性别 1: 男,2:女 * */ const categoryList = async (sex) => { let url = `https://bc.dzjgmp.com/core/book/type.api?sex=${0}` let data = await netUtil.getData(url, generateHeader()); data = JSON.parse(data); for (let obj of data.data) { obj.sex = sex; let record = new SpiderCategory({ data: obj, type: 'book' }); await record.save(); } } /** * 获取某分类下所有书籍列表 * @param {number} sex sex 性别 1: 男,2:女 * @param {number} ltype 大类 * @param {number} page 分页 1开始 * */ const bookList = async (sex, ltype, page) => { console.log(`get book list, sex: ${sex}, ltype: ${ltype}, page: ${page}`); let url = `https://bc.dzjgmp.com/core/book/type_all.api?sex=${sex}<ype=${ltype}&stype=-1&status=-1&page=${page}&limit=20`; let data = await netUtil.getData(url, generateHeader()); data = JSON.parse(data); return data; } const bookInfo = async (bookId) => { console.log(`parse book: ${bookId}`); let url = `https://cache.dzjgmp.com/book.api?book_id=${bookId}`; let data = await netUtil.getData(url, generateHeader()); data = JSON.parse(data); return data.data; } // 处理单页的列表数据 const parsePageObj = async (dataArr) => { for(let data of dataArr) { let record = new SpiderData({ data: data, type: 'book' }) await record.save(); } } // 获取所有分类下的数据列表 const parseAllCategory = async () => { try { let cateList = await SpiderCategory.find({type: 'book'}); for(const cate of cateList) { let firstPage = await bookList(cate.data.sex, cate.data.ltype_id, 1); let totalPage = firstPage.data.last_page; cate.data.total = firstPage.data.total; cate.markModified('data'); await cate.save(); console.log(`parse ltype: ${cate.data.ltype_id}, totalPage: ${totalPage}`); await parsePageObj(firstPage.data.list); for(let i = 2; i <= totalPage; i ++) { let pageObj = await bookList(cate.data.sex, cate.data.ltype_id, i); await parsePageObj(pageObj.data.list); } } } catch (err) { console.log(err); } } const parseAllBookList = async () => { let bookList = await SpiderData.find({status: 0}).sort({'data.book_id': 1}).limit(1); for(let book of bookList) { console.time('oneBook'); let record = await bookInfo(book.data.book_id); for (const key in record) { if ({}.hasOwnProperty.call(record, key)) { book.data[key] = record[key]; } } book.markModified('data'); console.timeEnd('oneBook') await book.save(); } } export default { run: async () => { // await categoryList(1); // await categoryList(2); // await bookList(1, 0, 1); // 获取所有分类下的数据列表 // await parseAllCategory(); await parseAllBookList(); } }