diff --git a/taptap/taptap_data_collect.py b/taptap/taptap_data_collect.py new file mode 100644 index 0000000..f737eea --- /dev/null +++ b/taptap/taptap_data_collect.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +from ops.mtga import FromTga +from ops.plog import define_logger +from ops.mmysql import MysqlBase +import logging +import datetime +import pdb +import sys +from bson.objectid import ObjectId +import json + +define_logger("/data/logs/ops/taptap_report.log") +log = logging.getLogger(__name__) +import sys + + +class TapTapReport: + def __init__(self, day): + self.db_conf = {'user': 'mytga', 'pswd': 'gzVwh4HGR68G', 'host': '10.10.3.5', 'db': 'external_data'} + self.url = "http://10.10.3.17:8992/querySql" + self.api_secret = "n9H4R32ZcjtSeN89ljCY6ESzTmOlnwwnOB3r4YsggnP5M1AXLtKtiS4sS1KKLOEQ" + self.tga = FromTga(url=self.url, token=self.api_secret) + self.day = day + self.bday = (datetime.datetime.strptime(self.day, "%Y-%m-%d") - datetime.timedelta(days=1)).strftime('%Y-%m-%d') + self.bbday = (datetime.datetime.strptime(self.day, "%Y-%m-%d") - datetime.timedelta(days=2)).strftime( + '%Y-%m-%d') + + def clear_noice(self): + # 结合前后2天的数据,清理diff_order >200的数据 + pass + + + def diff_item(self, all, item): + for key in self.gameid_info_1.keys(): + all.setdefault(key, {})['gameid'] = key.split("#")[0] + all[key]['catename'] = key.split('#')[1] + all[key]['date'] = self.bday + diff_key = f"diff_{item}" + if self.gameid_info_1[key].get(item, 0) and self.gameid_info_2[key].get(item, 0): + all[key][diff_key] = self.gameid_info_2[key].get(item, 0) - self.gameid_info_1[key].get(item, 0) + all.setdefault(key, {})[item] = self.gameid_info_1[key].get(item, 0) + elif self.gameid_info_1[key].get(item, 0) and self.gameid_info_0[key].get(item, 0): + all[key][diff_key] = "新晋榜" + all.setdefault(key, {})[item] = self.gameid_info_1[key].get(item, 0) + elif self.gameid_info_0[key].get(item, 0) and self.gameid_info_2[key].get(item, 0): + all[key][diff_key] = self.gameid_info_2[key].get(item, 0) - self.gameid_info_0[key].get(item, 0) + all.setdefault(key, {})[item] = self.gameid_info_0[key].get(item, 0) + else: + # 三天中有2天没数据,写入日志不予处理 + log.error(f"{key} missing 2 days data") + return all + + def build_report(self): + mydb = MysqlBase(**self.db_conf) + all = dict() + gameid_order = dict() + # get 最新的gameid info数据 + self.gameid_info_0 = self.get_gameid_info(self.day) + self.gameid_info_1 = self.get_gameid_info(self.bday) + self.gameid_info_2 = self.get_gameid_info(self.bbday) + + for key in self.gameid_info_1.keys(): + all.setdefault(key, {})['gameid'] = key.split("#")[0] + all[key]['catename'] = key.split('#')[1] + all[key]['date'] = self.bday + try: + # ("order", "watch", "download", "sell", "review", "reserve", "topic") + all[key]['title'] = self.gameid_info_1[key]['title'] or self.gameid_info_0[key]['title'] or \ + self.gameid_info_2[key]['title'] + all[key]['cate'] = self.gameid_info_1[key]['cate'] or self.gameid_info_0[key]['cate'] or \ + self.gameid_info_2[key]['cate'] + all[key]['score'] = self.gameid_info_1[key]['score'] or self.gameid_info_0[key]['score'] or \ + self.gameid_info_2[key]['score'] + all[key]['tags'] = self.gameid_info_1[key]['tags'] or self.gameid_info_0[key]['title'] or \ + self.gameid_info_2[key]['title'] + except Exception: + log.error(f"split {all[key]} error ", exc_info=True) + + items = ("order", "watch", "download", "sell", "review", "reserve", "topic") + for item in items: + all = self.diff_item(all, item) + pdb.set_trace() + for line in all.values(): + try: + print(line) + if line.get('current_order', 0) > 0: + pass # mydb.insert("taptap_data_new", line) + else: + log.error(f"some value is zore ,{line}!") + except Exception: + log.error(f"install 2 db failed ,values={line}", exc_info=True) + + + def get_gameid_info(self, day): + all_data = dict() + sql = f"""SELECT + gameid, + catename, + title, + cate, + "order", + topic, + score, + reserve, + watch, + download, + sell, + review , + tags + FROM + v_event_25 + where + "$part_date"='{day}'""" + data = self.tga.get_data(sql) + if data: + for line in data: + try: + gameid, catename, title, cate, order, topic, score, reserve, watch, download, sell, review, tags = line + key = f"{str(int(gameid))}#{catename}" + all_data.setdefault(key, {})["key"] = key + all_data.setdefault(key, {})["catename"] = catename + all_data.setdefault(key, {})["title"] = title + all_data.setdefault(key, {})["cate"] = cate + all_data.setdefault(key, {})["order"] = order + all_data.setdefault(key, {})["topic"] = topic + all_data.setdefault(key, {})["score"] = score + all_data.setdefault(key, {})["reserve"] = reserve + all_data.setdefault(key, {})["watch"] = watch + all_data.setdefault(key, {})["download"] = download + all_data.setdefault(key, {})["sell"] = sell + all_data.setdefault(key, {})["review"] = review + all_data.setdefault(key, {})["tags"] = tags + except Exception: + log.error(f"split {line} failed", exc_info=True) + return all_data + + +def main(): + if len(sys.argv) == 2: + day = sys.argv[2] + else: + day = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') + + tap = TapTapReport(day) + tap.build_report() + + +if __name__ == "__main__": + main()