修改蛋蛋赞电影抓取规则,如果当前数据已存在,则只更新播放链接

This commit is contained in:
zhl 2019-05-07 21:43:24 +08:00
parent 45d6c70775
commit 593bd3e758
2 changed files with 31 additions and 43 deletions

View File

@ -19,7 +19,8 @@ const parseOnePage = async (subLink, category) => {
console.log(`>>>>>>>>>>>> ${url} not found`); console.log(`>>>>>>>>>>>> ${url} not found`);
} else { } else {
let resourceStr = stringUtil.getContentByReg(html,/links='(.+?)\|'/); let resourceStr = stringUtil.getContentByReg(html,/links='(.+?)\|'/);
let idx = parseInt(subLink.replace('/dianying/', '').replace('.html', '') let idx = parseInt(subLink.replace('/dianying/', '')
.replace('.html', '')
.replace('/dongman/', '') .replace('/dongman/', '')
.replace('/dianshiju/', '') .replace('/dianshiju/', '')
.replace('/zongyi/', '')); .replace('/zongyi/', ''));
@ -36,7 +37,9 @@ const parseOnePage = async (subLink, category) => {
let daoYan = $('meta[property="og:video:director"]').attr('content'); let daoYan = $('meta[property="og:video:director"]').attr('content');
let zhuYan = $('meta[property="og:video:actor"]').attr('content').replace(/ \/ /g, ','); let zhuYan = $('meta[property="og:video:actor"]').attr('content').replace(/ \/ /g, ',');
let region = $('meta[property="og:video:area"]').attr('content'); let region = $('meta[property="og:video:area"]').attr('content');
let record = { let record = await Movie.findByGid(idx);
if (!record) {
record = {
gid: idx, gid: idx,
name: $('meta[property="og:title"]').attr('content'), name: $('meta[property="og:title"]').attr('content'),
type: typeArr, type: typeArr,
@ -51,7 +54,10 @@ const parseOnePage = async (subLink, category) => {
year: Number($('meta[property="og:video:release_date"]').attr('content')), year: Number($('meta[property="og:video:release_date"]').attr('content')),
category: category, category: category,
} }
await Movie.updateOne(idx, record); } else {
record.resources = resourceArr;
}
await record.save();
console.log(`@@@@@ ${subLink} @ ${record.name} saved`); console.log(`@@@@@ ${subLink} @ ${record.name} saved`);
} }
} catch (err) { } catch (err) {
@ -124,34 +130,12 @@ const parseAllMovie = async (category) => {
export default { export default {
run: async () => { run: async () => {
// await parseAllMovie('movie'); await parseAllMovie('movie');
// await parseAllMovie('tv'); // await parseAllMovie('tv');
// await parseAllMovie('show'); // await parseAllMovie('show');
// await parseAllMovie('cartoon'); // await parseAllMovie('cartoon');
// console.log('all done'); // console.log('all done');
let html = await netUtil.getData('https://wechat-test.kingsome.cn/', {}) // let html = await netUtil.getData('https://wechat-test.kingsome.cn/', {})
console.log(html); // console.log(html);
// var ProxyLists = require('proxy-lists');
//
// var options = {
// countries: ['cn'],
// protocols: ['https'],
// };
//
// var gettingProxies = ProxyLists.getProxies(options);
// gettingProxies.on('data', function(proxies) {
// // Received some proxies.
// console.log(proxies);
// });
//
// gettingProxies.on('error', function(error) {
// // Some error has occurred.
// // console.error(error);
// });
//
// gettingProxies.once('end', function() {
// // Done getting proxies.
// console.log('finish get proxy');
// });
} }
} }

View File

@ -11,8 +11,12 @@ export default {
removeHtml(content, replceEnter) { removeHtml(content, replceEnter) {
if (replceEnter) { if (replceEnter) {
return content.replace(/<.+?>/g, '').replace(/\r\n/g, '<br/>').replace(/\s/g, '') return content.replace(/<.+?>/g, '')
.replace(/(<br\/>)+/g, '<br/>').replace(/^<br\/>/, '').replace(/<br\/>$/, ''); .replace(/\r\n/g, '<br/>')
.replace(/\s/g, '')
.replace(/(<br\/>)+/g, '<br/>')
.replace(/^<br\/>/, '')
.replace(/<br\/>$/, '');
} else { } else {
return content.replace(/<.+?>/g, '').replace(/\s/g, ''); return content.replace(/<.+?>/g, '').replace(/\s/g, '');
} }