添加抓取hoh8小程序数据的方法

This commit is contained in:
zhl 2019-04-02 17:03:01 +08:00
parent 66acaf1cc5
commit 19468a8ad4
8 changed files with 452 additions and 0 deletions

14
config/config.js.example Normal file
View File

@ -0,0 +1,14 @@
let path = require('path');
let rootPath = path.normalize(__dirname + '/..')
let config = {
root: rootPath,
app: {
name: 'spider'
},
db: 'mongodb://localhost/ghost-development',
logs_path: '/Users/zhl/Documents/logs/spider'
};
module.exports = config;

View File

@ -16,7 +16,12 @@
"license": "ISC",
"dependencies": {
"bluebird": "^3.5.3",
"bunyan": "^1.8.12",
"file-stream-rotator": "^0.4.1",
"fs-extra": "^7.0.1",
"glob": "^7.1.3",
"mongoose": "^5.2.15",
"request": "^2.88.0",
"request-promise": "^4.2.4",
"ws": "^6.1.2"
},

View File

@ -0,0 +1,27 @@
import mongoose from 'mongoose';
import Promise from 'bluebird';
import logger from './utils/logger';
import glob from 'glob';
import config from '../config/config';
import hoh8 from './sites/hoh8';
mongoose.Promise = Promise;
const db = mongoose.connection;
db.on('error', function (err) {
logger.error(err);
process.exit(1);
});
db.once('open', function () {
logger.info('Connected to db.');
hoh8.run();
});
mongoose.connect(config.db, {promiseLibrary: Promise, useNewUrlParser: true});
let models = glob.sync(config.root + './models/*.js');
models.forEach(function (model) {
require(model);
});

53
src/models/SpiderData.js Normal file
View File

@ -0,0 +1,53 @@
import mongoose from 'mongoose';
let Schema = mongoose.Schema;
let SpiderDataSchema = new Schema({
id: {type: Number},
data: {type: Schema.Types.Mixed},
type: {type: String},
status: {type: Number, default: 0}
}, {
collection: 'spider_data',
timestamps: true
});
class SpiderDataClass {
static async updateData(data) {
try {
let record = await SpiderDataModel.findOne({'data.id': data.id});
record.data = data;
record.status = 1;
await record.save();
} catch (err) {
console.log(err);
}
}
static async saveList(list, type) {
for(let record of list) {
if (record.id > 0) {
let sdata = new SpiderDataModel({
type: type,
data: record,
status: 0
});
try {
await sdata.save();
} catch (err) {
console.log(err);
}
}
}
}
}
SpiderDataSchema.loadClass(SpiderDataClass);
SpiderDataSchema.query.byType = function(type) {
return this.where({ type: type, status: 0});
};
let SpiderDataModel = mongoose.model('SpiderData', SpiderDataSchema);
export default SpiderDataModel;

129
src/sites/hoh8.js Normal file
View File

@ -0,0 +1,129 @@
import socketClient from '../socket/client';
import SpiderData from '../models/SpiderData';
import netUtil from '../utils/net.util';
let client = socketClient;
const getMovieList = (id) => {
let msg = `{"path":"/video/searchVideoList","body":"{\\"pageNum\\":${id},\\"kw\\":null,\\"type\\":\\"所有类型\\",\\"region\\":\\"所有地区\\",\\"date\\":\\"所有年代\\"}","reqId":"movielist000${id}"}`;
client.send(msg);
}
const getMovieInfo = () => {
let record = list[current];
let msg = `{"path":"/video/getVideoDtl.page","body":"{\\"id\\":\\"${record.data.id}\\"}","reqId":"movieinfo${record.id}"}`;
client.send(msg);
}
const getGameList = (id) => {
let msg = `{"path":"/hg/getGameList","body":"{\\"pageNum\\":${id},\\"kw\\":\\"\\"}","reqId":"gamelist000${id}"}`;
client.send(msg);
}
const getGameInfo = () => {
let record = list[current];
let msg = `{"path":"/hg/getGameDtl.page","body":"{\\"gameId\\":\\"${record.id}\\"}","reqId":"gameinfo${record.id}"}`;
client.send(msg);
}
const getVideoToken = (id) => {
let msg = `{"path":"/deal/getVideoADToken","body":"{}","reqId":"videotoken${id}"}`;
client.send(msg);
}
const updateMoney = async (appId, token, appName) => {
let url = 'https://game.hoh8.cn/hh/deal/getVideoADReward.anon';
let jsonData = {"appId": appId,"appName":appName,"token":token,"userId":uid};
let data = {
deviceType: 5,
appVersion: 1,
jsonData: JSON.stringify(jsonData)
}
return netUtil.postForm(url, data);
}
const parseAllRecord = async (type) => {
try {
list = await SpiderData.find({type: type, status: 0});
console.log(list.length);
if (type === 'movie') {
getMovieInfo();
} else if (type === 'game') {
getGameInfo();
}
} catch (err) {
console.log(err);
}
}
let current = 0;
let list = [];
let uid = '2407044';
export default {
run: function () {
let url = `wss://game.hoh8.cn/hh/websocket?userId=${uid}&token=c402cdea067bb6d1ff51090edbe0d645&roomId=0&groupId=global`;
client.open(url, '');
client.onopen = (e) => {
console.log('socket on open');
// getMovieList(0);
// getGameList(0);
// parseAllRecord('movie');
getVideoToken(0);
}
client.onmessage = async (data, flags, number) => {
console.log(data);
data = JSON.parse(data);
let id = parseInt(data.reqId.replace('movielist000', ''));
if (data.reqId.indexOf('movielist') === 0 && data.data.length > 0) {
try {
await SpiderData.saveList(data.data, 'movie');
getMovieList(id + 1);
} catch (err) {
console.log(err);
}
} else if (data.reqId.indexOf('movieinfo') === 0) {
await SpiderData.updateData(data.data);
console.log('begin next movie');
current++;
if (current < list.length) {
getMovieInfo();
} else {
console.log('finish parse movie info')
process.exit(1);
}
} else if (data.reqId.indexOf('gamelist') === 0 && data.data.length > 0) {
try {
await SpiderData.saveList(data.data, 'game');
getGameList(id + 1);
} catch (err) {
console.log(err);
}
} else if (data.reqId.indexOf('gameinfo') === 0) {
await SpiderData.updateData(data.data);
console.log('begin next game');
current++;
if (current < list.length) {
getGameInfo();
} else {
console.log('finish parse movie info')
process.exit(1);
}
} else if (data.reqId.indexOf('videotoken') === 0) {
const appId = data.data.appId;
const videoToken = data.data.token;
const appName = data.data.appName;
console.log(`appId: ${appId}, token: ${videoToken}`);
try {
const result = await updateMoney(appId, videoToken, appName);
console.log(result);
setTimeout(function () {
if (current ++ < 10) {
getVideoToken(current);
}
}, 15000)
} catch (err) {
console.log(err);
}
}
}
}
}

98
src/socket/client.js Normal file
View File

@ -0,0 +1,98 @@
import WebSocket from 'ws';
import logger from '../utils/logger';
function WebSocketClient() {
this.number = 0; // Message number
this.autoReconnectInterval = 5 * 1000; // ms
}
WebSocketClient.prototype.open = function (url, id) {
this.url = url;
this.id = id;
this.instance = new WebSocket(this.url);
this.instance.on('open', () => {
this.onopen();
});
this.instance.on('message', (data, flags) => {
this.number++;
this.onmessage(data, flags, this.number);
});
this.instance.on('close', (e) => {
switch (e) {
case 1000: // CLOSE_NORMAL
logger.info("WebSocket: closed");
break;
default: // Abnormal closure
this.reconnect(e);
break;
}
this.onclose(e);
});
this.instance.on('error', (e) => {
switch (e.code) {
case 'ECONNREFUSED':
this.reconnect(e);
break;
default:
this.onerror(e);
break;
}
});
};
WebSocketClient.prototype.send = function (data, option) {
try {
this.instance.send(data, option);
} catch (e) {
this.instance.emit('error', e);
}
};
WebSocketClient.prototype.sendTestText = function (data, option) {
try {
let msg = {
msg_type: 'text',
content: data
};
let msgObj = {
type: 'm',
bot_id: '591bf8c21d2860389057fafa',
group: '6334778342@chatroom',
messages: [msg]
};
this.instance.send(JSON.stringify(msgObj), option);
} catch (e) {
this.instance.emit('error', e);
}
};
WebSocketClient.prototype.reconnect = function (e) {
logger.info(`WebSocketClient: retry in ${this.autoReconnectInterval}ms`, e);
this.instance.removeAllListeners();
let that = this;
setTimeout(function () {
logger.info("WebSocketClient: reconnecting...");
that.open(that.url, that.id);
}, this.autoReconnectInterval);
};
WebSocketClient.prototype.onopen = function (e) {
logger.info(arguments, "WebSocketClient: open");
};
WebSocketClient.prototype.onmessage = function (data, flags, number) {
logger.info(arguments, "WebSocketClient: message");
};
WebSocketClient.prototype.onerror = function (e) {
logger.info(arguments, "WebSocketClient: error");
};
WebSocketClient.prototype.onclose = function (e) {
logger.info(arguments, "WebSocketClient: closed");
};
WebSocketClient.client = null;
WebSocketClient.getClient = function () {
if (this.client === null) {
this.client = new WebSocketClient();
}
return this.client;
};
export default WebSocketClient.getClient();

99
src/utils/logger.js Normal file
View File

@ -0,0 +1,99 @@
import fs from 'fs-extra';
import FileStreamRotator from 'file-stream-rotator';
import bunyan from 'bunyan';
import config from '../../config/config';
let env = process.env.NODE_ENV || 'development';
let isDev = env === 'development';
const logDir = config.logs_path;
fs.existsSync(logDir) || fs.mkdirSync(logDir);
let logger = null;
let createLogger = function(appName) {
appName = !appName ? config.app.name : appName;
let streams = [{
level: 'info',
stream: FileStreamRotator.getStream({
date_format: 'YYYYMMDD',
filename: `${logDir}/${appName}-%DATE%.log`,
frequency: 'daily',
verbose: false
})
}];
if (isDev) {
streams.push({
level: 'debug',
stream: process.stdout
})
}
return bunyan.createLogger({
name: appName,
serializers: bunyan.stdSerializers,
streams: streams,
src: false
})
};
export default {
info(obj, msg) {
if (!logger) {
logger = createLogger(global.app_name);
}
if (msg) {
logger.info(obj, msg);
} else {
logger.info(obj);
}
},
error(obj, msg) {
if (!logger) {
logger = createLogger(global.app_name);
}
if (msg) {
logger.error(obj, msg);
} else {
logger.error(obj);
}
},
warn(obj, msg) {
if (!logger) {
logger = createLogger(global.app_name);
}
if (msg) {
logger.warn(obj, msg);
} else {
logger.warn(obj);
}
},
debug(obj, msg) {
if (!logger) {
logger = createLogger(global.app_name);
}
if (msg) {
logger.debug(obj, msg);
} else {
logger.debug(obj);
}
},
trace(obj, msg) {
if (!logger) {
logger = createLogger(global.app_name);
}
if (msg) {
logger.trace(obj, msg);
} else {
logger.trace(obj);
}
},
fatal(obj, msg) {
if (!logger) {
logger = createLogger(global.app_name);
}
if (msg) {
logger.fatal(obj, msg);
} else {
logger.fatal(obj);
}
}
}

27
src/utils/net.util.js Normal file
View File

@ -0,0 +1,27 @@
import request from 'request';
import Promise from 'bluebird';
const requestData = (options) => {
return new Promise((resolve, reject) => {
request(options, (err, response, body) => {
if (err) {
return reject(err);
}
resolve(body);
});
});
}
export default {
postForm(url, data) {
const options = {
method: 'POST',
url: url,
headers: {
'Cache-Control': 'no-cache',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
},
form: data,
};
return requestData(options);
}
}