Add one more value to csv from text file in python-CodePudding

I am converting multiple text files to a csv file. My text file looks like this:

ITEM: TIMESTEP
55000
ITEM: NUMBER OF ATOMS
4365
ITEM: BOX BOUNDS ff ff ff
-0.3 0.3
-0.6 0.6
-0.6 0.6
ITEM: ATOMS id type x y z vx vy vz fx fy fz omegax omegay omegaz radius 
4356 1 -0.0885288 -0.0101421 -0.48871 -0.000941682 0.778688 -0.0153902 -0.00720861 -0.0533703 0.0104717 0.35581 -0.0601358 -0.436049 0.01 
4227 1 0.0157977 0.00542603 -0.488429 -0.00996111 0.784119 0.00813807 -0.000491847 0.0144889 -0.0120111 1.08208 -0.0671177 0.369492 0.01 
3973 1 0.0179724 0.0256167 -0.48799 -0.00582994 0.772455 0.0394544 0.0109589 -0.0187232 -0.00111718 -0.0586513 -0.162943 1.12784 0.01 
4300 1 0.0900919 0.0248592 -0.488025 -0.000455483 0.769978 0.0388239 -0.00364509 0.0409803 -0.00269227 3.94355 -0.0249566 -0.223111 0.01 
4200 1 -0.0230223 0.0329911 -0.483108 -0.00238 0.778547 0.0500186 0.0421189 -0.021588 0.05607 0.112989 -0.0813771 -1.09981 0.015 
4339 1 0.00143577 0.0368542 -0.488107 0.000587848 0.784672 0.0593572 0.00385562 -0.00475113 -0.00710483 -0.201196 0.158512 -5.63826 0.01 
4106 1 0.0648392 0.0269728 -0.483248 -0.00365836 0.766081 0.0395827 0.0418642 0.1802 0.0547313 -0.0578358 0.124205 -0.96464 0.015 
4104 1 -0.084453 0.0507114 -0.482726 -0.000596577 0.75636 0.0806599 0.000817826 0.0119286 -0.0150014 -0.0864852 -0.103877 0.198773 0.015

Right now my csv file contains value after line 9 (in python code line 8). I want to include line 2 (Header - TIMESTEP) also in csv along with all the values after 9.

I tried to edit my code but couldn't succeed. Can I get some help: My code is here:

import numpy as np
import pandas as pd
import csv
import glob
import time


def main():
    start = time.time()
    data_folder = "./all/" #folder name
    files = glob.glob(data_folder   '*dump*.data')
    print("Total files:", len(files))
    # get header from one of the files
    #header = []
    with open('all/dump46000.data', 'r') as f:
        #lines = f.readlines()
        for _ in range(8):
            next(f) # skip first 8 lines
        header = ','.join(f.readline().split()[2:])   '\n'
        headers = ','.join(f.readline().split()[2:])
        #header.append(headers)
        #header.append('timestep')
        print(header)
    for file in files:
        with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
            g.write(header) # write the header
            for _ in range(9):
                next(f) # skip first 9 lines
            for line in f:
                g.write(line.rstrip().replace(' ', ',')   '\n')
    print(time.time() - start)


if __name__ == "__main__":
    main()

My folder all contains more than 600 files:

['./all/dump501000.data', 
'./all/dump307000.data',
'./all/dump612000.data',
'./all/dump369000.data',
'./all/dump23000.data',
'./all/dump470000.data',
'./all/dump235000.data',
'./all/dump6000.data',
'./all/dump568000.data',
'./all/dump506000.data',
'./all/dump623000.data',
'./all/dump329000.data',
'./all/dump220000.data', 
.....................
....................

I want this csv file from text file:

id type x y z vx vy vz fx fy fz omegax omegay omegaz radius TIMESTEP

But I am getting this csv

id type x y z vx vy vz fx fy fz omegax omegay omegaz radius

Thank you

CodePudding user response：

Based on what you want, here's what should work

import numpy as np
import pandas as pd
import csv
import glob
import time


def main():
    start = time.perf_counter()
    data_folder = "./all/" #folder name
    files = glob.glob(data_folder   '*dump*.data')
    print("Total files:", len(files))
    for file in files:
        with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
            header = f.readline().split("ITEM: ")[1]   '\n'
            headers = f.readline()
            print(header)
            g.write(header)
            g.write(headers)
            for _ in range(6):
                next(f)
            for line in f:
                g.write(line.rstrip().replace(' ', ',')   '\n')
    print(time.perf_counter() - start)


if __name__ == "__main__":
    main()

Let me know if you need any other syntax or something else in the final CSV. Also to time something always use time.perf_counter it's more accurate.

CodePudding user response：

Here is something you can try to add TIMESTEP with your data in csv. I am just wondering if you need to print the header for each file. My understanding is you can print header at the top for once. If you want to print that for each file, bring it into the for loop.

import numpy as np
import pandas as pd
import csv
import glob
import time


def main():
    start = time.time()
    data_folder = "./all/" #folder name
    files = glob.glob(data_folder   '*dump*.data')
    print("Total files:", len(files))
    # get header from one of the files
    header = []

    with open('all/dump46000.data', 'r') as f:
        #lines = f.readlines()
        header.extend(f.readline().split()[1:]) 
        timeStep = f.readline().split()
        
        for _ in range(6):
            next(f) # skip first 8 lines
        header.extend(f.readline().split()[2:]) 
        a = True
        print(header)
        headerString = ','.join(header)
        g.write(headerString  '\n') # write the header

    for file in files:
        with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
            next(f)
            timeStep = f.readline().split()

            for _ in range(7):
                next(f)

            for line in f:
                file_line = line.split()
                file_line.insert(0,timeStep[0])
                data = ','.join(file_line)
                g.write(data   '\n')

    print(time.time() - start)

if __name__ == "__main__":
    main()