# -*- coding: utf-8 -*- import sys import copy def read_files(filename): all = dict() with open(filename, 'r') as f: data = f.read().strip() for line in data.split("\n"): try: all[line.split("======")[-1]] = line.split("======")[0] except Exception: pass return all def main(): # 要读的文件写入其中,多条记录用,分割,注意不要用全角的"," filenames = ("a.txt", "b.txt") # 记录所有数据,标题为字典的key,URL为字典的值(如果有价格,可以将(url,价格)作为字典的values) new_data = dict() # 这个列表记录有交集的标题,会在数据统计完毕从new_data中删除 intersection = list() for filename in filenames: data = read_files(filename) for key in data.keys(): if key in new_data: intersection.append(key) else: new_data[key] = data[key] # 将全部数据做一个备份,可能会有其他需求用到 all_data = copy.deepcopy(new_data) with open('out1.txt', "a+") as f: for item in intersection: all_data.pop(item) #将重复的内容写入out1.txt temp = f"{new_data[item]}===={item}\n" f.write(temp) out = list() with open('out.txt', "a+") as f: for key in all_data: # 这种写法是python3.6后支持的,版本低的话可以写temp="{0}===={1}".format(all_data[key],key) temp = f"{all_data[key]}===={key}\n" out.append(temp) f.write(temp) print(out) if __name__ == "__main__": main()