I'm working on this script that first takes all .csv's and converts them .xlsx's in a separate folder. I'm getting the first file to output exactly how I want in the 'Script files' folder, but then it throws a Traceback error before it does the second one.
Script code below, Traceback error below that. Some path data removed for privacy:
import pandas as pd
import matplotlib.pyplot as plt
import os
# Assign current directory and list files there
f_path = os.path.dirname(__file__)
rd_path = f_path '\\Raw Data'
sc_path = f_path '\\Script files'
# Create /Script files folder
if os.path.isdir(sc_path) == False:
os.mkdir(sc_path)
print("\nCreating new Script files path here...",sc_path)
else:
print("\nScript files directory exists!")
# List files in Raw Data directory
print("\nRaw Data files in the directory:\n",rd_path,"\n")
for filename in os.listdir(rd_path):
f = os.path.join(rd_path,filename)
if os.path.isfile(f):
print(filename)
print("\n\n\n")
# Copy and edit data files to /Script files folder
for filename in os.listdir(rd_path):
src = os.path.join(rd_path,filename)
if os.path.isfile(src):
name = os.path.splitext(filename)[0]
read_file = pd.read_csv(src)
result = sc_path "\\" name '.xlsx'
read_file.to_excel(result)
print(src,"\nconverted and written to: \n",result,"\n\n")
Traceback (most recent call last):
File "C:\Users\_________________\Graph.py", line 32, in <module>
read_file = pd.read_csv(src)
File "C:\Users\_____________\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\______________\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\readers.py", line 680, in read_csv
return _read(filepath_or_buffer, kwds)
File "C:\Users\_____________\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\readers.py", line 581, in _read
return parser.read(nrows)
File "C:\Users\_____________\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\readers.py", line 1250, in read
index, columns, col_dict = self._engine.read(nrows)
File "C:\Users\_____________\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 225, in read
chunks = self._reader.read_low_memory(nrows)
File "pandas\_libs\parsers.pyx", line 805, in pandas._libs.parsers.TextReader.read_low_memory
File "pandas\_libs\parsers.pyx", line 861, in pandas._libs.parsers.TextReader._read_rows
File "pandas\_libs\parsers.pyx", line 847, in pandas._libs.parsers.TextReader._tokenize_rows
File "pandas\_libs\parsers.pyx", line 1960, in pandas._libs.parsers.raise_parser_error
pandas.errors.ParserError: Error tokenizing data. C error: Expected 2 fields in line 47, saw 8
CodePudding user response:
Have you tried to convert to xlsx the second file in the folder? I'm not sure but it seems like there's a problem when Pandas reads the csv.
CodePudding user response:
I think this is because os.listdir
contains new excel files you converted.
You can either put excel files elsewhere outside rd_path
, or exclude input files not ends with .csv
.
for filename in os.listdir(rd_path):
# exclude non csv files
if not filename.endswith('.csv'):
continue