增加处理列表时的错误处理

This commit is contained in:
zhl 2019-04-03 13:21:30 +08:00
parent fd405e54ce
commit 875994edf9
4 changed files with 107 additions and 29 deletions

View File

@ -5,3 +5,6 @@
sites/hoh8.js sites/hoh8.js
## 20190403 全民小说抓取
sites/book.js

View File

@ -10,7 +10,7 @@ https://bc.dzjgmp.com/core/book/type.api?sex=1
Response: Response:
``` ```json
{ code: 1, { code: 1,
data: data:
[ { ltype_id: 0, [ { ltype_id: 0,
@ -36,7 +36,7 @@ https://bc.dzjgmp.com/core/book/type_all.api?sex=1&ltype=0&stype=-1&status=-1&pa
Response: Response:
``` ```json
{ code: 1, { code: 1,
data: data:
{ total: 34248, { total: 34248,
@ -51,7 +51,7 @@ Response:
``` ```
单本书的结构 单本书的结构
``` ```json
{ book_id: 668, { book_id: 668,
name: '逆天邪神', name: '逆天邪神',
author: '火星引力', author: '火星引力',
@ -73,6 +73,31 @@ https://cache.dzjgmp.com/book.api?book_id=10887
https://cache.dzjgmp.com/book_chapter.api?book_id=10887&site_id=wsg https://cache.dzjgmp.com/book_chapter.api?book_id=10887&site_id=wsg
Response:
```json
{ code: 1,
data:
[ { lists_id: 328,
chapter_id: 9011,
num: 0,
name: '第一章 小镇少年',
crawl_book_id: 739477,
url: 'http://www.2kxs.com/xiaoshuo/22/22295/6632341.html' },
]
}
```
## 章节详情 ## 章节详情
https://cache.dzjgmp.com/chapter.api?chapter_id=19873&crawl_book_id=839413&id=418&site_id=wsg https://cache.dzjgmp.com/chapter.api?chapter_id=19873&crawl_book_id=839413&id=418&site_id=wsg
Response:
```json
{ chapter_id: 9011,
name: '第一章 小镇少年',
crawl_book_id: 739477,
content: ''
}
```

View File

@ -38,19 +38,57 @@ const categoryList = async (sex) => {
const bookList = async (sex, ltype, page) => { const bookList = async (sex, ltype, page) => {
console.log(`get book list, sex: ${sex}, ltype: ${ltype}, page: ${page}`); console.log(`get book list, sex: ${sex}, ltype: ${ltype}, page: ${page}`);
let url = `https://bc.dzjgmp.com/core/book/type_all.api?sex=${sex}&ltype=${ltype}&stype=-1&status=-1&page=${page}&limit=20`; let url = `https://bc.dzjgmp.com/core/book/type_all.api?sex=${sex}&ltype=${ltype}&stype=-1&status=-1&page=${page}&limit=20`;
try {
let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data);
return data;
} catch (err) {
console.log(err);
return {data: {list: []}};
}
}
/**
* 单本书籍的详情
* @param {string} bookId 书籍id
* */
const bookInfo = async (bookId) => {
console.log(`parse book: ${bookId}`);
//书籍详情
let url = `https://cache.dzjgmp.com/book.api?book_id=${bookId}`;
let data = await netUtil.getData(url, generateHeader());
//书籍章节列表
let chapterListData = await bookChapterList(bookId, 'kxs2');
chapterListData = JSON.parse(chapterListData);
console.log(chapterListData);
data = JSON.parse(data);
return data.data;
}
/**
* 获取书本章节列表
* @param {string} bookId 书本id
* @param {string} siteId 线路id从书本详情返回的data.sites中获取
* */
const bookChapterList = async (bookId, siteId) => {
let url = `https://cache.dzjgmp.com/book_chapter.api?book_id=${bookId}&site_id=${siteId}`;
let data = await netUtil.getData(url, generateHeader()); let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data); data = JSON.parse(data);
return data; return data;
} }
/**
const bookInfo = async (bookId) => { * 获取单个章节的详情
console.log(`parse book: ${bookId}`); * @param {number} chapterId 章节id
let url = `https://cache.dzjgmp.com/book.api?book_id=${bookId}`; * @param {number} crawlBookId 从章节列表返回数据中的 data.crawl_book_id 获取
* @param {number} listId 从章节列表返回数据中的 data.lists_id 获取
* @param {string} siteId 线路id
* */
const bookChapterInfo = async (chapterId, crawlBookId, listId, siteId) => {
let url = `https://cache.dzjgmp.com/chapter.api?chapter_id=${chapterId}&crawl_book_id=${crawlBookId}&id=${listId}&site_id=${siteId}`;
let data = await netUtil.getData(url, generateHeader()); let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data); data = JSON.parse(data);
return data.data; return data;
} }
// 处理单页的列表数据 // 处理单页的列表数据,保存进库
const parsePageObj = async (dataArr) => { const parsePageObj = async (dataArr) => {
for(let data of dataArr) { for(let data of dataArr) {
let record = new SpiderData({ let record = new SpiderData({
@ -61,25 +99,30 @@ const parsePageObj = async (dataArr) => {
} }
} }
// 获取所有分类下的数据列表 // 获取所有分类下的数据列表
const parseAllCategory = async () => { const parseAllCategory = async (sex, ltype) => {
try { sex = sex || 1;
let cateList = await SpiderCategory.find({type: 'book'}); ltype = ltype || 0;
for(const cate of cateList) { try {
let firstPage = await bookList(cate.data.sex, cate.data.ltype_id, 1); let cateList = await SpiderCategory.find({type: 'book'});
let totalPage = firstPage.data.last_page; for(const cate of cateList) {
cate.data.total = firstPage.data.total; if (cate.data.sex < sex || cate.data.ltype_id < ltype) {
cate.markModified('data'); continue;
await cate.save(); }
console.log(`parse ltype: ${cate.data.ltype_id}, totalPage: ${totalPage}`); let firstPage = await bookList(cate.data.sex, cate.data.ltype_id, 1);
await parsePageObj(firstPage.data.list); let totalPage = firstPage.data.last_page;
for(let i = 2; i <= totalPage; i ++) { cate.data.total = firstPage.data.total;
let pageObj = await bookList(cate.data.sex, cate.data.ltype_id, i); cate.markModified('data');
await parsePageObj(pageObj.data.list); await cate.save();
} console.log(`parse ltype: ${cate.data.ltype_id}, totalPage: ${totalPage}`);
await parsePageObj(firstPage.data.list);
for(let i = 2; i <= totalPage; i ++) {
let pageObj = await bookList(cate.data.sex, cate.data.ltype_id, i);
await parsePageObj(pageObj.data.list);
} }
} catch (err) {
console.log(err);
} }
} catch (err) {
console.log(err);
}
} }
const parseAllBookList = async () => { const parseAllBookList = async () => {
@ -102,11 +145,15 @@ const parseAllBookList = async () => {
export default { export default {
run: async () => { run: async () => {
//step 1: 获取所有的分类
// await categoryList(1); // await categoryList(1);
// await categoryList(2); // await categoryList(2);
// await bookList(1, 0, 1); // step 2: 获取所有分类下的书籍列表
// 获取所有分类下的数据列表 // 获取所有分类下的数据列表
// await parseAllCategory(); await parseAllCategory(2, 0);
await parseAllBookList(); // 获取所有数据的详情
// await parseAllBookList();
// let data = await bookChapterInfo(9011, 739477, 328, 'kxs2');
// console.log(data.data);
} }
} }

View File

@ -7,6 +7,9 @@ const requestData = (options) => {
if (err) { if (err) {
return reject(err); return reject(err);
} }
if (response.statusCode >= 300) {
return reject(new Error('server response code: ' + response.statusCode));
}
resolve(body); resolve(body);
}); });
}); });