I'm using pandas to read Dataframe()...
After converting to datetime64, I need to use the max() method on that datetime column. But the problem is that I can't convert :(...
When I go to read my Excel Dataframe it is presenting the dates this way 2020-03-10T00:00:00.000Z
. I need to convert to datetime64 type but it is reading as string. Here's the example below:
df['Date'].dtype
Output: dtype('O')
df['Date'] = pd.to_datetime(df['Date'] )
Output:
ERROR:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
c:\Users\XXXXXX\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
2084 try:
-> 2085 values, tz_parsed = conversion.datetime_to_datetime64(data)
2086 # If tzaware, these values represent unix timestamps, so we
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
ParserError Traceback (most recent call last)
<ipython-input-66-917cb86929c6> in <module>
----> 1 df['Date'] = pd.to_datetime(df['Date'] )
c:\Users\XXXXXX\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)
799 result = result.tz_localize(tz)
800 elif isinstance(arg, ABCSeries):
--> 801 cache_array = _maybe_cache(arg, format, cache, convert_listlike)
802 if not cache_array.empty:
803 result = arg.map(cache_array)
c:\Users\XXXXXX\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _maybe_cache(arg, format, cache, convert_listlike)
176 unique_dates = unique(arg)
177 if len(unique_dates) < len(arg):
--> 178 cache_dates = convert_listlike(unique_dates, format)
179 cache_array = Series(cache_dates, index=unique_dates)
180 return cache_array
c:\Users\XXXXXX\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike_datetimes(arg, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
463 assert format is None or infer_datetime_format
464 utc = tz == "utc"
--> 465 result, tz_parsed = objects_to_datetime64ns(
466 arg,
467 dayfirst=dayfirst,
c:\Users\XXXXXX\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
2088 return values.view("i8"), tz_parsed
2089 except (ValueError, TypeError):
-> 2090 raise e
2091
2092 if tz_parsed is not None:
c:\Users\XXXXXX\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
2073
2074 try:
-> 2075 result, tz_parsed = tslib.array_to_datetime(
2076 data,
2077 errors=errors,
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime_object()
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime_object()
pandas\_libs\tslibs\parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()
c:\Users\XXXXXX\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(timestr, parserinfo, **kwargs)
1372 return parser(parserinfo).parse(timestr, **kwargs)
1373 else:
-> 1374 return DEFAULTPARSER.parse(timestr, **kwargs)
1375
1376
c:\Users\XXXXXX\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
650
651 if len(res) == 0:
--> 652 raise ParserError("String does not contain a date: %s", timestr)
653
654 try:
ParserError: String does not contain a date: -
CodePudding user response:
Please see if this snippet helps you out, I have worked with some sample data and added some values to reproduce the error:
import numpy as np
df = pd.DataFrame({'Date': ['2023-03-15 00:00:00', '2020-03-10T00:00:00.000Z', '-', '23', 33, np.nan, '', None, '2020-03-10T00:00:00.000Z', '2026-01-15 00:00:00']})
df.head(10)
Date
0 2023-03-15 00:00:00
1 2020-03-10T00:00:00.000Z
2 -
3 23
4 33
5 NaN
6
7 None
8 2020-03-10T00:00:00.000Z
9 2026-01-15 00:00:00
df['Date_new'] = pd.to_datetime(df['Date'], errors='coerce')
df.head(10)
Date Date_new
0 2023-03-15 00:00:00 2023-03-15 00:00:00.000000000
1 2020-03-10T00:00:00.000Z 2020-03-10 00:00:00.000000000
2 - NaT
3 23 NaT
4 33 1970-01-01 00:00:00.000000033
5 NaN NaT
6 NaT
7 None NaT
8 2020-03-10T00:00:00.000Z 2020-03-10 00:00:00.000000000
9 2026-01-15 00:00:00 2026-01-15 00:00:00.000000000
df.Date_new.max()
Timestamp('2026-01-15 00:00:00')
CodePudding user response:
Maybe you should make your own date function? Apparently, according to this, Python does not have any built-in parser for ISO-8601 so...
from datetime import datetime
def fix_iso_date_str(dstr):
return datetime.fromisoformat(dstr.replace('Z', ' 00:00'))
df['Date'] = fix_iso_date_str(df['Date'] )