Home > database >  Empty body response when reading csv from s3 using pandas : FileNotFoundError: [Errno 2]
Empty body response when reading csv from s3 using pandas : FileNotFoundError: [Errno 2]

Time:11-20

I am using boto3 pandas to read csv from s3. I get a response and a stream of bytes, however, when I attempt to read it in pandas, I see an empty dataframe error.

import boto3
import pandas as pd

client = boto3.client(
    's3',
    aws_access_key_id="xxx",
    aws_secret_access_key="xxx",
)

key = 'Dir/filename.csv' 

result = client.get_object(Bucket="buck-1", Key=key)
print(result['Body'])

<botocore.response.StreamingBody at 0x7f44ecdd9220>

df = pd.read_csv(result['Body'].read().decode('utf-8'))

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_9157/3467576557.py in <cell line: 4>()
      2 #df = pd.read_csv(io.BytesIO(result['Body'].read()))
      3 
----> 4 df = pd.read_csv(result['Body'].read().decode('utf-8'))

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    309                     stacklevel=stacklevel,
    310                 )
--> 311             return func(*args, **kwargs)
    312 
    313         return wrapper

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    584     kwds.update(kwds_defaults)
    585 
--> 586     return _read(filepath_or_buffer, kwds)
    587 
    588 

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
    480 
    481     # Create the parser.
--> 482     parser = TextFileReader(filepath_or_buffer, **kwds)
    483 
    484     if chunksize or iterator:

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
    809             self.options["has_index_names"] = kwds["has_index_names"]
    810 
--> 811         self._engine = self._make_engine(self.engine)
    812 
    813     def close(self):

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/parsers/readers.py in _make_engine(self, engine)
   1038             )
   1039         # error: Too many arguments for "ParserBase"
-> 1040         return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
   1041 
   1042     def _failover_to_python(self):

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/parsers/c_parser_wrapper.py in __init__(self, src, **kwds)
     49 
     50         # open handles
---> 51         self._open_handles(src, kwds)
     52         assert self.handles is not None
     53 

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/parsers/base_parser.py in _open_handles(self, src, kwds)
    220         Let the readers open IOHandles after they are done with their potential raises.
    221         """
--> 222         self.handles = get_handle(
    223             src,
    224             "r",

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    700         if ioargs.encoding and "b" not in ioargs.mode:
    701             # Encoding
--> 702             handle = open(
    703                 handle,
    704                 ioargs.mode,

FileNotFoundError: [Errno 2] No such file or directory: ''

df = pd.read_csv(io.BytesIO(result['Body'].read()))

    ~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/parsers/readers.py in _make_engine(self, engine)
   1038             )
   1039         # error: Too many arguments for "ParserBase"
-> 1040         return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
   1041 
   1042     def _failover_to_python(self):

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/io/parsers/c_parser_wrapper.py in __init__(self, src, **kwds)
     67         kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
     68         try:
---> 69             self._reader = parsers.TextReader(self.handles.handle, **kwds)
     70         except Exception:
     71             self.handles.close()

~/anaconda3/envs/python3/lib/python3.8/site-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

EmptyDataError: No columns to parse from file

I have tried different decoding values without any luck.

CodePudding user response:

The correct way is:

df = pd.read_csv(io.BytesIO(result['Body'].read()))

So its unclear why would you comment this out, as this is how the csv file should be read from the s3.

  • Related