I created this code to get all excel files in a folder and make a csv file to every sheet in every file. This script works fine, but sometimes the last Excel file converted still locked by python on file system. Can anyone help me to understand what's happening?
import sys
from os import listdir
from os.path import isfile, join
import pandas as pd
import csv
import re
def removeEspecialCharacters(obj):
if isinstance(obj, str) :
retorno = re.sub('[(\x90|\x8F)]','',obj).replace("\r","").replace("\n","")
else:
retorno = obj
return retorno
myFolder = r'C:\Users\myuser\Downloads\ConvertFilesToCsv'
myFiles = [f for f in listdir(myFolder) if isfile(join(myFolder, f))]
for x in range(len(myFiles)):
if (myFiles[x].lower().endswith('.xls') or myFiles[x].lower().endswith('.xlsx') or myFiles[x].lower().endswith('.xlsb')):
print('Converting file: ' myFiles[x]);
if (myFiles[x].lower().endswith('.xlsb')):
file = pd.ExcelFile(myFolder '\\' myFiles[x], engine='pyxlsb')
else:
file = pd.ExcelFile(myFolder '\\' myFiles[x])
for mySheetName in file.sheet_names:
df = pd.read_excel(file, sheet_name=mySheetName)
df = df.applymap(removeEspecialCharacters)
csvFileName = myFolder '\\' myFiles[x].replace('.xlsx','').replace('.xlsb','').replace('.xls','') '_' mySheetName '.csv'
df.to_csv(csvFileName,encoding='utf-8-sig',index=False,sep=",",quoting=csv.QUOTE_NONNUMERIC,quotechar="\"",escapechar="\"",decimal=".",date_format='%Y-%m-%d')#,quotechar='\'', escapechar='\\')
file.close()
file = ''
CodePudding user response:
Note: this is a comment putting here for code format.
Your code looks fine to me. I would advise you to use context management, similar to the doc, like this:
for filename in myFiles:
extension = filename.split('.')[-1]
# you didn't seem to check xlsb in your code
if extension not in ['xls', 'xlsx', 'xlsb']:
continue
kwargs = {'engine': 'pyxlsb'} if extension=='xlsb' else {}
with pd.ExcelFile(myFolder '\\' filename, **kwargs) as file:
# do other stuff with file
...
# you don't need to close file here
# file.close()