How do I add the directory path as another Column to my dataframe?-CodePudding

I am using this python script to get csv files with particular name in sub-directories of my folder and reading them into dataframe, I have been trying to add their directory paths as another column to the dataframe but i keep encountering errors.

import pandas as pd
import glob
import os

path = 'main/directory/path'
file_extension = '.csv'
csv_file_list = []
for root, dirs, files in os.walk(path):
    for name in files:
        if name.startswith("FileName.csv"):
            file_path = os.path.join(root, name)
            csv_file_list.append(file_path)

            #print(file_path)
        #data = pd.concat(csv_file_list, ignore_index=True)



dfs = [pd.read_csv(f) for f in csv_file_list]

print(dfs)

How can I add the directory paths for where each of these files are found as another column in the data frame.

CodePudding user response：

Read and append the path to the df as a new column in the loop itself.

import pandas as pd
import glob
import os

path = 'main/directory/path'
file_extension = '.csv'
dfs= []
for root, dirs, files in os.walk(path):
    for name in files:
        if name.startswith("FileName.csv"):
            file_path = os.path.join(root, name)
            df=pd.read_csv(file_path)
            df["file_path"] = str(file_path)
            dfs.append(df)
dfs = pd.concat(dfs)
print(dfs)

CodePudding user response：

Hope this is what you're looking for

import pandas as pd
import glob
import os

path = 'main/directory/path'
file_extension = '.csv'
csv_file_list = []
for root, dirs, files in os.walk(path):
    for name in files:
        if name.startswith("FileName.csv"):
            file_path = os.path.join(root, name)
            csv_file_list.append(file_path)

            #print(file_path)
        #data = pd.concat(csv_file_list, ignore_index=True)



dfs = [pd.read_csv(f) for f in csv_file_list]
dfs = [df[i].insert(0,"file_path",csv_file_list[i]) for i in range(len(dfs))]

print(dfs)

you can concat your dataframes as well

df_concated = pd.concat(dfs)
print(df_concated)