修改蛋蛋赞电影抓取规则,如果当前数据已存在,则只更新播放链接

This commit is contained in:
zhl 2019-05-07 21:43:24 +08:00
parent 45d6c70775
commit 593bd3e758
2 changed files with 31 additions and 43 deletions

View File

@ -19,7 +19,8 @@ const parseOnePage = async (subLink, category) => {
console.log(`>>>>>>>>>>>> ${url} not found`);
} else {
let resourceStr = stringUtil.getContentByReg(html,/links='(.+?)\|'/);
let idx = parseInt(subLink.replace('/dianying/', '').replace('.html', '')
let idx = parseInt(subLink.replace('/dianying/', '')
.replace('.html', '')
.replace('/dongman/', '')
.replace('/dianshiju/', '')
.replace('/zongyi/', ''));
@ -36,22 +37,27 @@ const parseOnePage = async (subLink, category) => {
let daoYan = $('meta[property="og:video:director"]').attr('content');
let zhuYan = $('meta[property="og:video:actor"]').attr('content').replace(/ \/ /g, ',');
let region = $('meta[property="og:video:area"]').attr('content');
let record = {
gid: idx,
name: $('meta[property="og:title"]').attr('content'),
type: typeArr,
resources: resourceArr,
daoYan: daoYan ? daoYan.split(',') : [],
zhuYan: zhuYan ? zhuYan.split(',') : [],
score: Number($('meta[property="og:video:score"]').attr('content')),
img: $('meta[property="og:image"]').attr('content'),
introduce: $('meta[property="og:description"]').attr('content'),
nameAlias: $('meta[property="og:video:alias"]').attr('content'),
region: region ? region.split(',') : [],
year: Number($('meta[property="og:video:release_date"]').attr('content')),
category: category,
let record = await Movie.findByGid(idx);
if (!record) {
record = {
gid: idx,
name: $('meta[property="og:title"]').attr('content'),
type: typeArr,
resources: resourceArr,
daoYan: daoYan ? daoYan.split(',') : [],
zhuYan: zhuYan ? zhuYan.split(',') : [],
score: Number($('meta[property="og:video:score"]').attr('content')),
img: $('meta[property="og:image"]').attr('content'),
introduce: $('meta[property="og:description"]').attr('content'),
nameAlias: $('meta[property="og:video:alias"]').attr('content'),
region: region ? region.split(',') : [],
year: Number($('meta[property="og:video:release_date"]').attr('content')),
category: category,
}
} else {
record.resources = resourceArr;
}
await Movie.updateOne(idx, record);
await record.save();
console.log(`@@@@@ ${subLink} @ ${record.name} saved`);
}
} catch (err) {
@ -124,34 +130,12 @@ const parseAllMovie = async (category) => {
export default {
run: async () => {
// await parseAllMovie('movie');
await parseAllMovie('movie');
// await parseAllMovie('tv');
// await parseAllMovie('show');
// await parseAllMovie('cartoon');
// console.log('all done');
let html = await netUtil.getData('https://wechat-test.kingsome.cn/', {})
console.log(html);
// var ProxyLists = require('proxy-lists');
//
// var options = {
// countries: ['cn'],
// protocols: ['https'],
// };
//
// var gettingProxies = ProxyLists.getProxies(options);
// gettingProxies.on('data', function(proxies) {
// // Received some proxies.
// console.log(proxies);
// });
//
// gettingProxies.on('error', function(error) {
// // Some error has occurred.
// // console.error(error);
// });
//
// gettingProxies.once('end', function() {
// // Done getting proxies.
// console.log('finish get proxy');
// });
// let html = await netUtil.getData('https://wechat-test.kingsome.cn/', {})
// console.log(html);
}
}

View File

@ -11,8 +11,12 @@ export default {
removeHtml(content, replceEnter) {
if (replceEnter) {
return content.replace(/<.+?>/g, '').replace(/\r\n/g, '<br/>').replace(/\s/g, '')
.replace(/(<br\/>)+/g, '<br/>').replace(/^<br\/>/, '').replace(/<br\/>$/, '');
return content.replace(/<.+?>/g, '')
.replace(/\r\n/g, '<br/>')
.replace(/\s/g, '')
.replace(/(<br\/>)+/g, '<br/>')
.replace(/^<br\/>/, '')
.replace(/<br\/>$/, '');
} else {
return content.replace(/<.+?>/g, '').replace(/\s/g, '');
}