Home > Enterprise >  XSLX Conversion to CSV file whenever a new file is added into the folder
XSLX Conversion to CSV file whenever a new file is added into the folder

Time:09-22

I try to convert a .xlsx file into .csv file whenever a new file is added into the Inputfolder and put the conversion .csv file in the OutputFolder.

import glob
import time
import os
import pandas as pd

#Get timestamp
timestr = time.strftime("%Y%m%d_%H%M%S")

#Input file path
input_filepath = 'C:/Documents/InputFile'
folderSize = 0


#Function to convert file
def format_csv(latest_file):
#Output file path
filenamepath = 'C:/Documents/OutputFile/'   timestr   '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)

while True:
  checkFolder = folderSize
  folderSize = 0

  #Check the size of the Input Folder
  for path, dirs, files in os.walk(input_filepath):
      for f in files:
          fp = os.path.join(path, f)
          folderSize  = os.path.getsize(fp)
  print(folderSize)

  #Create new .csv file if the Input folder has new file added
  if(folderSize > checkFolder):
      list_of_files = glob.glob('C:/Documents/InputFile/*.xlsx')
      latest_file = max(list_of_files, key=os.path.getctime)
      format_csv(latest_file)
      print(latest_file)

  time.sleep(15)

Right now the program will only convert the first .xlsx file only. If I add a new .xlsx file into InputFolder, the file is not converted.

CodePudding user response:

You could try something like reading through the folder for all .xlsx files if it finds one convert that to .csv

Here we are reading through the directory for all xlsx files, converting them by creating copies in csv version and then deleting the original xlsx version

import pandas as pd
import os

path = 'C:/Documents/InputFile'

files = os.listdir(path)
for file in files:
    if '.xlsx' in file:
        filename = file[:-5]
        new_filename = path   "/"   filename   ".csv"
        if filename   ".csv" in files:
            pass
        else:
            df = pd.read_excel(file)
            df.to_csv(new_filename)
        

CodePudding user response:

I already improvise my original code. So, whenever I put a new excel file into InputFolder, the program will convert the file to .csv format and insert the formatted file in OutputFolder

import glob
import time
import os
import pandas as pd
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

#Function if new file is created in the folder
def on_created(event):
    list_of_files = glob.glob('C:/Users/Documents/InputFolder/*.xlsx')
    latest_file = max(list_of_files, key=os.path.getctime)
    format_csv(latest_file)

#Function to convert .xlsx to .csv
def format_csv(latest_file):

  # Get timestamp
  timestr = time.strftime("%d%m%Y_%H%M%S")

  #Output file path
  filenamepath = 'C:/Users/Documents/OutputFolder/'   timestr   '.csv'
  read_Excelfile = pd.read_excel(latest_file)
  read_Excelfile.to_csv(filenamepath, index=None, header=True)
  print(filenamepath)

if __name__ == "__main__":
  event_handler = FileSystemEventHandler()

  #Calling function for file insertion
  event_handler.on_created = on_created

  #Input Folder
  path = 'C:/Users/Documents/InputFolder'

  #Function to observe file
  observer = Observer()
  observer.schedule(event_handler, path, recursive=True)
  observer.start()

  try:
      #Check every one second
      while True:
          time.sleep(1)
  except KeyboardInterrupt:
      #Program stop if keyboard interupt
      observer.stop()
  observer.join()
  • Related