How to save multiple values in CSV file from loop in python?-CodePudding

I have around 20000 files, I want to calculate the peak for each file based on threshold (I have 10 thresholds ) and then save it in csv file. I am confused how to save the values of the file based on the threshold in csv file.

for threshold in np.arange(1,10,1):
threshold_p=calculate_th(n,m, threshold)
for root, dirs, files in os.walk(dir_path, topdown=False):
    for name in files:
        allfiles.append(os.path.join(root, name))
    for filename in tqdm.tqdm(allfiles, desc= "files progress"):
        output_g = np.load(filename)
        filtered=np.sum(output_g > threshold_p)
        result= [filename,filtered, threshold_p, threshold]

I want to save the "result " value as csv file with 4 column, but I am not able to save them as csv file without rewriting them.

CodePudding user response：

You didn't show how you write it but there can be different methods to do it.

First. There is standard rule: if you use for-loop then use list to keep all result.

And this way you can create empty list before loop, append result to this list inside loop, and write all results after loop.

To make all versions similar I will NOT use with open(..) as fh - but you could try to use it.

# --- before loop ---

all_results = []  # <-- list for all results

# --- loop ---

for threshold in np.arange(1,10):
    threshold_p = calculate_th(n, m, threshold)
    
    for root, dirs, files in os.walk(dir_path, topdown=False):
        for name in tqdm.tqdm(files, desc="files progress"):
            filename = os.path.join(root, name)
            output_g = np.load(filename)
            filtered = np.sum(output_g > threshold_p)
            result = [filename, filtered, threshold_p, threshold]
     
            all_results.append( result )  # <-- keep result           

# --- after loop ---

fh = open('output.csv', 'w')
cvs_writer = cvs.writer(fh)

# write one row with headers (using `writerow` without `s` at the end)
cvs_writer.writerow(["filename", "filtered", "threshold_p", "threshold"]    cvs_writer.writerow(["filename", "filtered", "threshold_p", "threshold"]

# write many rows with results (using `writerows` with `s` at the end)
cvs_writer.writerows(all_results)
                                                                                                
fh.close()

Second. You should open file before loop, write row inside loop, and close file after loop.

# --- before loop ---

fh = open('output.csv', 'w')
cvs_writer = cvs.writer(fh)

# write one row with headers (using `writerow` without `s` at the end)
cvs_writer.writerow(["filename", "filtered", "threshold_p", "threshold"]    cvs_writer.writerow(["filename", "filtered", "threshold_p", "threshold"]

# --- loop ---

for threshold in np.arange(1,10):
    threshold_p = calculate_th(n, m, threshold)
    
    for root, dirs, files in os.walk(dir_path, topdown=False):
        for name in tqdm.tqdm(files, desc="files progress"):
            filename = os.path.join(root, name)
            output_g = np.load(filename)
            filtered = np.sum(output_g > threshold_p)
            result = [filename, filtered, threshold_p, threshold]
     
            # write row row with result (using `writerow` without `s` at the end)
            cvs_writer.writerow(result)

# --- after loop ---
                                                                                                
fh.close()

Third. Before loop create file only with headers - and close it. Inside loop open file in append mode to add new row at the end of file.

This method will keep all results if code crash.

# --- before loop ---

fh = open('output.csv', 'w')
cvs_writer = cvs.writer(fh)

# write one row with headers (using `writerow` without `s` at the end)
cvs_writer.writerow(["filename", "filtered", "threshold_p", "threshold"]    cvs_writer.writerow(["filename", "filtered", "threshold_p", "threshold"]
                                                                                                
fh.close()
                                                                                                
# --- loop ---

for threshold in np.arange(1,10):
    threshold_p = calculate_th(n, m, threshold)
    
    for root, dirs, files in os.walk(dir_path, topdown=False):
        for name in tqdm.tqdm(files, desc="files progress"):
            filename = os.path.join(root, name)
            output_g = np.load(filename)
            filtered = np.sum(output_g > threshold_p)
            result = [filename, filtered, threshold_p, threshold]

            fh = open('output.csv', 'a')  # `a` for `append mode`
            cvs_writer = cvs.writer(fh)
     
            # write row row with result (using `writerow` without `s` at the end)
            cvs_writer.writerow(result)

            fh.close()

# --- after loop ---

# nothing