I have a huge amount of json files (4000) and I need to check every single one of them for a specific object. My code is like the following:
import os
import json
files = sorted(os.listdir("my files path"))
for f in files:
if f.endswith(".json"):
myFile = open("my path\\" f)
myJson = json.load(bayesFile)
if myJson["something"]["something"]["what im looking for"] == "ACTION"
#do stuff
myFile.close()
As you can imagine this is taking a lot of execution time and I was wondering if there is a quicker way...?
CodePudding user response:
Here's a multithreaded approach that may help you:
from glob import glob
import json
from concurrent.futures import ThreadPoolExecutor
import os
BASEDIR = 'myDirectory' # the directory containing the json files
def process(filename):
with open(filename) as infile:
data = json.load(infile)
if data.get('foo', '') == 'ACTION':
pass # do stuff
def main():
with ThreadPoolExecutor() as executor:
executor.map(process, glob(os.path.join(BASEDIR, '*.json')))
if __name__ == '__main__':
main()