I try to convert a .xlsx file into .csv file whenever a new file is added into the Inputfolder and put the conversion .csv file in the OutputFolder.
import glob
import time
import os
import pandas as pd
#Get timestamp
timestr = time.strftime("%Y%m%d_%H%M%S")
#Input file path
input_filepath = 'C:/Documents/InputFile'
folderSize = 0
#Function to convert file
def format_csv(latest_file):
#Output file path
filenamepath = 'C:/Documents/OutputFile/' timestr '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)
while True:
checkFolder = folderSize
folderSize = 0
#Check the size of the Input Folder
for path, dirs, files in os.walk(input_filepath):
for f in files:
fp = os.path.join(path, f)
folderSize = os.path.getsize(fp)
print(folderSize)
#Create new .csv file if the Input folder has new file added
if(folderSize > checkFolder):
list_of_files = glob.glob('C:/Documents/InputFile/*.xlsx')
latest_file = max(list_of_files, key=os.path.getctime)
format_csv(latest_file)
print(latest_file)
time.sleep(15)
Right now the program will only convert the first .xlsx file only. If I add a new .xlsx file into InputFolder, the file is not converted.
CodePudding user response:
You could try something like reading through the folder for all .xlsx files if it finds one convert that to .csv
Here we are reading through the directory for all xlsx files, converting them by creating copies in csv version and then deleting the original xlsx version
import pandas as pd
import os
path = 'C:/Documents/InputFile'
files = os.listdir(path)
for file in files:
if '.xlsx' in file:
filename = file[:-5]
new_filename = path "/" filename ".csv"
if filename ".csv" in files:
pass
else:
df = pd.read_excel(file)
df.to_csv(new_filename)
CodePudding user response:
I already improvise my original code. So, whenever I put a new excel file into InputFolder, the program will convert the file to .csv format and insert the formatted file in OutputFolder
import glob
import time
import os
import pandas as pd
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
#Function if new file is created in the folder
def on_created(event):
list_of_files = glob.glob('C:/Users/Documents/InputFolder/*.xlsx')
latest_file = max(list_of_files, key=os.path.getctime)
format_csv(latest_file)
#Function to convert .xlsx to .csv
def format_csv(latest_file):
# Get timestamp
timestr = time.strftime("%d%m%Y_%H%M%S")
#Output file path
filenamepath = 'C:/Users/Documents/OutputFolder/' timestr '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)
print(filenamepath)
if __name__ == "__main__":
event_handler = FileSystemEventHandler()
#Calling function for file insertion
event_handler.on_created = on_created
#Input Folder
path = 'C:/Users/Documents/InputFolder'
#Function to observe file
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
try:
#Check every one second
while True:
time.sleep(1)
except KeyboardInterrupt:
#Program stop if keyboard interupt
observer.stop()
observer.join()