Below is the code that I have tried and the image is the output when I try running this.
There are 3 csv's I am loading with the index column name key
import numpy as np
import pandas as pd
from pathlib import Path
import glob
import os
cwd = os.getcwd()
directory = './csvdir'
output_filename = "output.csv"
def combine_csv_files(directory, output_filename):
for f in Path(directory).glob("*.csv"):
try:
df = pd.read_csv(f, index_col="key")
concat1 = pd.concat([df], axis=0)
print(concat1)
except ValueError as e:
print(f"{f.name} does not have a key column")
combine_csv_files(directory, output_filename)
CodePudding user response:
Your function is printing the dataframe, one at the time, instead of concatenating them. Try:
def combine_csv_files(directory, output_filename):
dfs = []
for f in Path(directory).glob("*.csv"):
try:
df = pd.read_csv(f, index_col="key")
dfs.append(df)
except ValueError as e:
print(f"{f.name} does not have a key column")
return pd.concat(dfs)
combine_csv_files(directory, output_filename)
CodePudding user response:
You didn't concat
the new data to the old one.
Try this :
def combine_csv_files(directory, output_filename):
for i, f in enumerate(Path(directory).glob("*.csv")):
try:
df = pd.read_csv(f, index_col="key")
if i == 0 :
concat1 = df
else :
concat1 = pd.concat([concat1, df], axis = 0)
print(concat1)
except ValueError as e:
print(f"{f.name} does not have a key column")
combine_csv_files(directory, output_filename)
CodePudding user response:
You should first define a container outside of the loop and then use np.append
or pd.concat
method. You can also use pd.merge
method, but note that if you don't define a global variable(i.e., out of the loop), your stored data will be lost after each iteration.