5、python 读取大型json文件,输出键值
<pre><code>import os
import json
import logging
import numpy as np
from collections import Counter
import hashlib
def get_json_key(dic_json):
dic = {}
if isinstance(dic_json, dict): # 判断是否是字典类型isinstance 返回True false
for key in dic_json:
if isinstance(dic_json[key], dict): # 如果dic_json[key]依旧是字典类型
# print("****key--:%s value--: " % key)
json_key(dic_json[key])
dic[key] = ''
else:
# print("****key--:%s value--: " % key)
dic[key] = ''
return dic
# raw
with open('search.train.json') as fin:
num = 0
keys_set = set()
keys_list = []
sample_list = []
for lidx, line in enumerate(fin):
if num > 1000:
break
else:
sample = json.loads(line.strip())
# print(sample)
key = get_json_key(sample)
# print(key)
hash_key = hashlib.md5(str(key).encode(encoding='UTF-8')).hexdigest()
if hash_key in keys_set:
continue
keys_list.append(key)
keys_set.add(hash_key)
sample_list.append(sample)
num = num + 1
print(keys_set)
print(keys_list)
print(sample_list)</code></pre>