# -*- coding: utf-8 -*- import os import re import json import pprint TOKEN_REGEX = r"""(\d+)(-)(\d+)(\.)(.+)""" TOKEN_REGEX2 = r"""(?:\|\s*)([^\s]*)(?:\s*)(?:\|\s*)([^\s]*)(?:\s*)(?:\|\s*)([^\s]*)(?:\s*)(?:\|\s*)([^\s]*)(?:\s*)""" def md2csv(mdfile): state = 0 log_dict = {} log_dict['__keys__'] = [] curr_node = None for line in open(mdfile).readlines(): match = re.match(TOKEN_REGEX, line) if match: logclass1, _, logclass2, _, title = match.groups() state = 1 assert((logclass1 + '-' + logclass2) not in log_dict) curr_node = {} curr_node['__keys__'] = [] log_key = logclass1 + '-' + logclass2 log_dict[log_key] = curr_node log_dict['__keys__'].append(log_key) continue match2 = re.match(TOKEN_REGEX2, line) if match2 and state == 1: field1, field2, field3, field4 = match2.groups() if field1[0] != '-' and field1 != '字段': field_name = field1.replace('\\', '') en_name = field2.replace('\\', '') curr_node[field_name] = { 'short_name': field_name, 'en_name': en_name, 'desc': field4 } assert (field_name != 'ext') or (field_name == 'ext' and en_name == 'ext') curr_node['__keys__'].append(field_name) if not (en_name in curr_node): curr_node['__keys__'].append(en_name) curr_node[en_name] = curr_node[field_name] #end for print(json.dumps(log_dict, sort_keys=True, indent=4, separators=(',', ': '))) md2csv('/root/opensource/doc/api说明/log_normal.md')