增加处理列表时的错误处理

This commit is contained in:
zhl 2019-04-03 13:21:30 +08:00
parent fd405e54ce
commit 875994edf9
4 changed files with 107 additions and 29 deletions

View File

@ -5,3 +5,6 @@
sites/hoh8.js
## 20190403 全民小说抓取
sites/book.js

View File

@ -10,7 +10,7 @@ https://bc.dzjgmp.com/core/book/type.api?sex=1
Response:
```
```json
{ code: 1,
data:
[ { ltype_id: 0,
@ -36,7 +36,7 @@ https://bc.dzjgmp.com/core/book/type_all.api?sex=1&ltype=0&stype=-1&status=-1&pa
Response:
```
```json
{ code: 1,
data:
{ total: 34248,
@ -51,7 +51,7 @@ Response:
```
单本书的结构
```
```json
{ book_id: 668,
name: '逆天邪神',
author: '火星引力',
@ -73,6 +73,31 @@ https://cache.dzjgmp.com/book.api?book_id=10887
https://cache.dzjgmp.com/book_chapter.api?book_id=10887&site_id=wsg
Response:
```json
{ code: 1,
data:
[ { lists_id: 328,
chapter_id: 9011,
num: 0,
name: '第一章 小镇少年',
crawl_book_id: 739477,
url: 'http://www.2kxs.com/xiaoshuo/22/22295/6632341.html' },
]
}
```
## 章节详情
https://cache.dzjgmp.com/chapter.api?chapter_id=19873&crawl_book_id=839413&id=418&site_id=wsg
Response:
```json
{ chapter_id: 9011,
name: '第一章 小镇少年',
crawl_book_id: 739477,
content: ''
}
```

View File

@ -38,19 +38,57 @@ const categoryList = async (sex) => {
const bookList = async (sex, ltype, page) => {
console.log(`get book list, sex: ${sex}, ltype: ${ltype}, page: ${page}`);
let url = `https://bc.dzjgmp.com/core/book/type_all.api?sex=${sex}&ltype=${ltype}&stype=-1&status=-1&page=${page}&limit=20`;
try {
let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data);
return data;
} catch (err) {
console.log(err);
return {data: {list: []}};
}
}
/**
* 单本书籍的详情
* @param {string} bookId 书籍id
* */
const bookInfo = async (bookId) => {
console.log(`parse book: ${bookId}`);
//书籍详情
let url = `https://cache.dzjgmp.com/book.api?book_id=${bookId}`;
let data = await netUtil.getData(url, generateHeader());
//书籍章节列表
let chapterListData = await bookChapterList(bookId, 'kxs2');
chapterListData = JSON.parse(chapterListData);
console.log(chapterListData);
data = JSON.parse(data);
return data.data;
}
/**
* 获取书本章节列表
* @param {string} bookId 书本id
* @param {string} siteId 线路id从书本详情返回的data.sites中获取
* */
const bookChapterList = async (bookId, siteId) => {
let url = `https://cache.dzjgmp.com/book_chapter.api?book_id=${bookId}&site_id=${siteId}`;
let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data);
return data;
}
const bookInfo = async (bookId) => {
console.log(`parse book: ${bookId}`);
let url = `https://cache.dzjgmp.com/book.api?book_id=${bookId}`;
/**
* 获取单个章节的详情
* @param {number} chapterId 章节id
* @param {number} crawlBookId 从章节列表返回数据中的 data.crawl_book_id 获取
* @param {number} listId 从章节列表返回数据中的 data.lists_id 获取
* @param {string} siteId 线路id
* */
const bookChapterInfo = async (chapterId, crawlBookId, listId, siteId) => {
let url = `https://cache.dzjgmp.com/chapter.api?chapter_id=${chapterId}&crawl_book_id=${crawlBookId}&id=${listId}&site_id=${siteId}`;
let data = await netUtil.getData(url, generateHeader());
data = JSON.parse(data);
return data.data;
return data;
}
// 处理单页的列表数据
// 处理单页的列表数据,保存进库
const parsePageObj = async (dataArr) => {
for(let data of dataArr) {
let record = new SpiderData({
@ -61,25 +99,30 @@ const parsePageObj = async (dataArr) => {
}
}
// 获取所有分类下的数据列表
const parseAllCategory = async () => {
try {
let cateList = await SpiderCategory.find({type: 'book'});
for(const cate of cateList) {
let firstPage = await bookList(cate.data.sex, cate.data.ltype_id, 1);
let totalPage = firstPage.data.last_page;
cate.data.total = firstPage.data.total;
cate.markModified('data');
await cate.save();
console.log(`parse ltype: ${cate.data.ltype_id}, totalPage: ${totalPage}`);
await parsePageObj(firstPage.data.list);
for(let i = 2; i <= totalPage; i ++) {
let pageObj = await bookList(cate.data.sex, cate.data.ltype_id, i);
await parsePageObj(pageObj.data.list);
}
const parseAllCategory = async (sex, ltype) => {
sex = sex || 1;
ltype = ltype || 0;
try {
let cateList = await SpiderCategory.find({type: 'book'});
for(const cate of cateList) {
if (cate.data.sex < sex || cate.data.ltype_id < ltype) {
continue;
}
let firstPage = await bookList(cate.data.sex, cate.data.ltype_id, 1);
let totalPage = firstPage.data.last_page;
cate.data.total = firstPage.data.total;
cate.markModified('data');
await cate.save();
console.log(`parse ltype: ${cate.data.ltype_id}, totalPage: ${totalPage}`);
await parsePageObj(firstPage.data.list);
for(let i = 2; i <= totalPage; i ++) {
let pageObj = await bookList(cate.data.sex, cate.data.ltype_id, i);
await parsePageObj(pageObj.data.list);
}
} catch (err) {
console.log(err);
}
} catch (err) {
console.log(err);
}
}
const parseAllBookList = async () => {
@ -102,11 +145,15 @@ const parseAllBookList = async () => {
export default {
run: async () => {
//step 1: 获取所有的分类
// await categoryList(1);
// await categoryList(2);
// await bookList(1, 0, 1);
// step 2: 获取所有分类下的书籍列表
// 获取所有分类下的数据列表
// await parseAllCategory();
await parseAllBookList();
await parseAllCategory(2, 0);
// 获取所有数据的详情
// await parseAllBookList();
// let data = await bookChapterInfo(9011, 739477, 328, 'kxs2');
// console.log(data.data);
}
}

View File

@ -7,6 +7,9 @@ const requestData = (options) => {
if (err) {
return reject(err);
}
if (response.statusCode >= 300) {
return reject(new Error('server response code: ' + response.statusCode));
}
resolve(body);
});
});