I am trying to read log files from different folder and store in excel sheet
import os
import pandas as pd
import bs4
path = "D:\\logfolder"
filelist = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith("History.txt"):
filelist.append(os.path.join(root, file))
for name in filelist:
print(name "\n") # D:\logfolder\logfolder_1\History.txt
# D:\logfolder\logfolder_2\History.txt
# D:\logfolder\logfolder_3\History.txt
# D:\logfolder\logfolder_4\History.txt
for name in filelist:
with open(name,"r") as f:
soupObj = bs4.BeautifulSoup(f, "lxml")
df = pd.DataFrame([(x["uri"], *x["t"].split("T"), x["u"], x["desc"])
for x in soupObj.find_all("log")],
columns=["Database", "Date", "Time", "User", "Description"])
df.to_excel("logfile.xlsx", index=False)
Expected output is to get the final excel sheet of all logfile.txt from all logfolders But I am get only the data from last logfile.txt with is logfolder_4 in the final excel sheet with is logfile.xlsx what I am doing wrong.
CodePudding user response:
I believe you need pandas.concat
.
Replace the last for-loop bloc by this :
dfList = []
for name in filelist:
with open(name,"r") as f:
soupObj = bs4.BeautifulSoup(f, "lxml")
df = pd.DataFrame([(x["uri"], *x["t"].split("T"), x["u"], x["desc"])
for x in soupObj.find_all("log")],
columns=["Database", "Date", "Time", "User", "Description"])
dfList.append(df)
pd.concat(dfList).to_excel("logfile.xlsx", index=False)