datacollect/taptap/taptap_data_collect.py
2019-11-14 10:56:53 +08:00

155 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
from ops.mtga import FromTga
from ops.plog import define_logger
from ops.mmysql import MysqlBase
import logging
import datetime
import pdb
import sys
from bson.objectid import ObjectId
import json
define_logger("/data/logs/ops/taptap_collect.log")
log = logging.getLogger(__name__)
import sys
class TapTapReport:
def __init__(self, day):
self.db_conf = {'user': 'mytga', 'pswd': 'gzVwh4HGR68G', 'host': '10.10.3.5', 'db': 'external_data'}
self.url = "http://10.10.3.17:8992/querySql"
self.api_secret = "n9H4R32ZcjtSeN89ljCY6ESzTmOlnwwnOB3r4YsggnP5M1AXLtKtiS4sS1KKLOEQ"
self.tga = FromTga(url=self.url, token=self.api_secret)
self.day = day
self.bday = (datetime.datetime.strptime(self.day, "%Y-%m-%d") - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
self.bbday = (datetime.datetime.strptime(self.day, "%Y-%m-%d") - datetime.timedelta(days=2)).strftime(
'%Y-%m-%d')
def clear_noice(self):
# 结合前后2天的数据清理diff_order >200的数据
pass
def diff_item(self, all, item):
for key in self.gameid_info_1.keys():
all.setdefault(key, {})['gameid'] = key.split("#")[0]
all[key]['catename'] = key.split('#')[1]
all[key]['date'] = self.bday
diff_key = f"diff_{item}"
if self.gameid_info_1.get(key, {}).get(item, -1)>=0 and self.gameid_info_2.get(key, {}).get(item, -1)>=0:
all[key][diff_key] = int(self.gameid_info_1[key].get(item, 0) - self.gameid_info_2[key].get(item, 0))
all.setdefault(key, {})[item] = self.gameid_info_1[key].get(item, 0)
elif self.gameid_info_1.get(key, {}).get(item, -1)>=0 and self.gameid_info_0.get(key, {}).get(item, -1)>=0:
all[key][diff_key] = "新晋榜"
all.setdefault(key, {})[item] = self.gameid_info_1[key].get(item, 0)
elif self.gameid_info_0.get(key, {}).get(item, -1)>=0 and self.gameid_info_2.get(key, {}).get(item, -1)>=0:
all[key][diff_key] = int(self.gameid_info_0[key].get(item, 0) - self.gameid_info_2[key].get(item, 0))
all.setdefault(key, {})[item] = self.gameid_info_0[key].get(item, 0)
else:
pass
# 三天中有2天没数据写入日志不予处理 # log.error(f"{all[key]} {item} {self.gameid_info_0.get(key,{})} {self.gameid_info_1.get(key,
# {})} {self.gameid_info_2.get(key,{})} missing 2 days data")
if item == "order":
try:
all[key][diff_key] = -int(all[key][diff_key])
except:
pass
return all
def build_report(self):
mydb = MysqlBase(**self.db_conf)
all = dict()
gameid_order = dict()
# get 最新的gameid info数据
self.gameid_info_0 = self.get_gameid_info(self.day)
self.gameid_info_1 = self.get_gameid_info(self.bday)
self.gameid_info_2 = self.get_gameid_info(self.bbday)
for key in self.gameid_info_1.keys():
all.setdefault(key, {})['gameid'] = key.split("#")[0]
all[key]['catename'] = key.split('#')[1]
all[key]['date'] = self.bday
try:
# ("order", "watch", "download", "sell", "review", "reserve", "topic")
all[key]['title'] = self.gameid_info_1.get(key,{}).get('title', None) or self.gameid_info_0.get(key,{}).get('title',None) or self.gameid_info_2.get(key,{}).get('title', None)
all[key]['cate'] = self.gameid_info_1.get(key,{}).get('cate', None) or self.gameid_info_0.get(key,{}).get('cate',None) or self.gameid_info_2.get(key,{}).get('cate', None)
all[key]['score'] = self.gameid_info_1.get(key,{}).get('score', 0) or self.gameid_info_0.get(key,{}).get('score',0) or self.gameid_info_2.get(key,{}).get('score', 0)
all[key]['tags'] = self.gameid_info_1.get(key,{}).get('tags', None) or self.gameid_info_0.get(key,{}).get('tags',None) or self.gameid_info_2.get(key,{}).get('tags', None)
except Exception:
log.error(f"split {all[key]} error ", exc_info=True)
items = ("order", "watch", "download", "sell", "review", "reserve", "topic")
for item in items:
all = self.diff_item(all, item)
i = 0
for line in all.values():
try:
#print(line)
if line.get("order", 0) >= 1:
mydb.insert("taptap_collect", line)
i += 1
except Exception:
log.error(f"install 2 db failed ,values={line}", exc_info=True)
finally:
log.info(f"insert {i} 2 mysql!")
def get_gameid_info(self, day):
all_data = dict()
sql = f"""SELECT
gameid,
catename,
title,
cate,
"order",
topic,
score,
reserve,
watch,
download,
sell,
review ,
tags
FROM
v_event_25
where
"$part_date"='{day}'"""
data = self.tga.get_data(sql)
if data:
for line in data:
try:
gameid, catename, title, cate, order, topic, score, reserve, watch, download, sell, review, tags = line
key = f"{str(int(gameid))}#{catename}"
all_data.setdefault(key, {})["key"] = key
all_data.setdefault(key, {})["catename"] = catename
all_data.setdefault(key, {})["title"] = title
all_data.setdefault(key, {})["cate"] = cate
all_data.setdefault(key, {})["order"] = order
all_data.setdefault(key, {})["topic"] = topic
all_data.setdefault(key, {})["score"] = score
all_data.setdefault(key, {})["reserve"] = reserve
all_data.setdefault(key, {})["watch"] = watch
all_data.setdefault(key, {})["download"] = download
all_data.setdefault(key, {})["sell"] = sell
all_data.setdefault(key, {})["review"] = review
all_data.setdefault(key, {})["tags"] = tags
except Exception:
log.error(f"split {line} failed", exc_info=True)
return all_data
def main():
if len(sys.argv) == 2:
day = sys.argv[1]
else:
day = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
tap = TapTapReport(day)
tap.build_report()
if __name__ == "__main__":
main()