tools/scripts/md_csv/convert_newjson.py
aozhiwei 923263ad93 1
2018-11-08 14:17:05 +08:00

82 lines
2.8 KiB
Python

# -*- coding: utf-8 -*-
import os
import re
import json
import pprint
OLD_PATH = '/root/opensource/tools/scripts/md_csv/data/old/'
NEW_PATH = '/root/opensource/tools/scripts/md_csv/data/new/'
log_dict = json.loads(open('dict.json', 'r').read())
def getFiles(path):
files = []
for f in os.listdir(path):
if os.path.isfile(path + '/' + f) and f.find('upload_prod-bj') >= 0:
files.append(f)
return files
def convertOldEvent(oldJson):
assert(oldJson['#event_name'] != '')
event_name = oldJson['#event_name']
strings = event_name.split('_')
assert(len(strings) == 3)
node = log_dict[strings[1] + '-' + strings[2]]
assert(node != None)
properties = oldJson['properties']
for i in range(1, 20):
strKey = 'str' + str(i)
numKey = 'num' + str(i)
if strKey in properties:
assert(strKey in node)
properties[node[strKey]['en_name']] = properties[strKey]
del properties[strKey]
if numKey in properties:
if numKey not in node:
print(numKey, node, oldJson)
assert(numKey in node)
properties[node[numKey]['en_name']] = properties[numKey]
del properties[numKey]
#end for
if 'ext' in properties:
ext = properties['ext']
if isinstance(ext, str):
rawdata = ext.encode('utf8')
else:
ext = json.dumps(ext)
rawdata = ext.encode('utf8')
if len(rawdata) <= 1024 * 2:
properties['ext1'] = rawdata.decode('utf8')
else:
assert(len(rawdata) <= 1024 * 4)
ext1 = ext[0:2000].encode('utf8')
ext2 = ext[2000:].encode('utf8')
print(len(ext1), len(ext2), len(rawdata))
assert(len(ext1) + len(ext2) == len(rawdata))
assert(len(ext1) <= 1024 * 2 and len(ext2) <= 1024 * 2)
properties['ext1'] = ext1.decode('utf8')
properties['ext2'] = ext2.decode('utf8')
#endif
del properties['ext']
def convert_newjson(gameid):
files = getFiles(OLD_PATH + gameid)
for f in files:
print(f)
new_file = open(NEW_PATH + gameid + '/' + f, 'w')
for line in open(OLD_PATH + gameid + '/' + f, 'r'):
jsonObj = json.loads(line)
assert(jsonObj['#type'] in ['track', 'user_add', 'user_set', 'user_setOnce'])
if jsonObj['#type'] == 'track':
convertOldEvent(jsonObj)
new_file.write(json.dumps(jsonObj) + '\n')
elif jsonObj['#type'] == 'user_add':
properties = jsonObj['properties']
del properties['account_id']
new_file.write(json.dumps(jsonObj) + '\n')
else:
new_file.write(line + '\n')
convert_newjson('1004')
convert_newjson('1011')