I have a series of txt files in a folder and I wanted to move these files to a data frame
but I only managed to do this by saving a csv file, how do I work with the data frame directly without having to save to a csv file?
follow my code below
import os
import csv
import pandas as pd
main_folder = ('F:\PROJETOS\LOTE45\ARQUIVOS\RISK\RISK_CUSTOM_FUND_N1'
def get_filename(path):
filenames = []
files = [i.path for i in os.scandir(path) if i.is_file()]
for filename in files:
filename = os.path.basename(filename)
filenames.append(filename)
return filenames
files = get_filename(main_folder)
with open('some.csv', 'w', encoding = 'utf8', newline = '') as csv_file:
for _file in files:
file_name = _file
with open(main_folder '\\' _file,'r') as f:
text = f.read()
writer = csv.writer(csv_file)
writer.writerow([file_name, text])
df = pd.read_csv('some.csv')
CodePudding user response:
You can try to run something like this:
df = pd.DataFrame()
for _file in files:
df = df.append(pd.read_csv(_file), ignore_index=True, sort=False)
CodePudding user response:
You can try to use a dictionnary and turn it into a dataframe.
# your code to obtain the files
data = {"filename":[], "text":[]}
for file in files:
with open(file, "r") as file_object:
content = file_object.read()
data["filename"].append(file)
data["text"].append(content)
dataframe = pd.DataFrame(data)