I'm following an online tutorial (from 2 years ago, for some reason that mattered the last time I asked a question, because apparently the syntax had already changed for the latest version of Python). Anyway here's the code I am using:
files = [file for file in os.listdir ('./Sales_Data')]
all_months_data=pd.DataFrame()
for file in files:
df= pd.read_csv("./Sales_Data" file)
all_months_data= pd.concat ([all_months_data, df])
all_months_data.to_csv("all_data.csv",index= False)
And here's the error(s) I am getting:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_13984/1832777700.py in <module>
2 all_months_data=pd.DataFrame()
3 for file in files:
----> 4 df= pd.read_csv("./Sales_Data" file)
5 all_months_data= pd.concat ([all_months_data, df])
6 all_months_data.to_csv("all_data.csv",index= False)
~\anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~\anaconda3\lib\site-packages\pandas\io\parsers\readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
584 kwds.update(kwds_defaults)
585
--> 586 return _read(filepath_or_buffer, kwds)
587
588
~\anaconda3\lib\site-packages\pandas\io\parsers\readers.py in _read(filepath_or_buffer, kwds)
480
481 # Create the parser.
--> 482 parser = TextFileReader(filepath_or_buffer, **kwds)
483
484 if chunksize or iterator:
~\anaconda3\lib\site-packages\pandas\io\parsers\readers.py in __init__(self, f, engine, **kwds)
809 self.options["has_index_names"] = kwds["has_index_names"]
810
--> 811 self._engine = self._make_engine(self.engine)
812
813 def close(self):
~\anaconda3\lib\site-packages\pandas\io\parsers\readers.py in _make_engine(self, engine)
1038 )
1039 # error: Too many arguments for "ParserBase"
-> 1040 return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
1041
1042 def _failover_to_python(self):
~\anaconda3\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py in __init__(self, src, **kwds)
49
50 # open handles
---> 51 self._open_handles(src, kwds)
52 assert self.handles is not None
53
~\anaconda3\lib\site-packages\pandas\io\parsers\base_parser.py in _open_handles(self, src, kwds)
220 Let the readers open IOHandles after they are done with their potential raises.
221 """
--> 222 self.handles = get_handle(
223 src,
224 "r",
~\anaconda3\lib\site-packages\pandas\io\common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
700 if ioargs.encoding and "b" not in ioargs.mode:
701 # Encoding
--> 702 handle = open(
703 handle,
704 ioargs.mode,
FileNotFoundError: [Errno 2] No such file or directory: './Sales_DataSales_April_2019.csv'
1
2
I tried checking/changing for spelling/syntax errors, it helped with one of the errors.
I also tried adding the last line of code, it only added more errors I think.
CodePudding user response:
The answer is obvious if you keep reading the stack trace:
FileNotFoundError: [Errno 2] No such file or directory: './Sales_DataSales_April_2019.csv'
A forward slash (/
) is missing when building the path of the file in the for
loop.
So, to fix it:
files = [file for file in os.listdir ('./Sales_Data')]
all_months_data=pd.DataFrame()
for file in files:
df= pd.read_csv("./Sales_Data/" file) # This line!
all_months_data= pd.concat ([all_months_data, df])
all_months_data.to_csv("all_data.csv",index= False)
A cleaner solution would be to use the method os.path.join
, see this other StackOverflow answer: Create file path from variables
If we use os.path.join
files = [file for file in os.listdir ('./Sales_Data')]
all_months_data=pd.DataFrame()
for file in files:
df= pd.read_csv(os.path.join("./Sales_Data", file)) # This line!
all_months_data= pd.concat ([all_months_data, df])
all_months_data.to_csv("all_data.csv",index= False)
My last tip is this: always read and understand the error messages before asking!
CodePudding user response:
Hi