I'm having a datetime error but only for some rows. Here's my code:
import requests
import csv
import datetime
def import_data_files():
r = requests.get('https://raw.githubusercontent.com/anyoneai/notebooks/main/customers_and_orders/data/orders.csv')
with open('./sample_data/orders.csv', 'wb') as f:
f.write(r.content)
import_data_files()
path = './sample_data/orders.csv'
row_cnt = 0
orders_placed = 0
orders_date = []
with open(path, 'r') as fl:
csv = csv.reader(fl, delimiter=',', quotechar='"')
next(csv)
for CustomerID, OrderID, Date, OrderTotal, ProductName, Price in csv:
date = datetime.datetime.strptime(Date, '%Y-%m-%d %H:%M:%S.%f')
if date.year == 2021:
if date.month == 10:
#print(Date)
#print(date)
orders_placed =1
row_cnt =1
if row_cnt > 234:
#if row_cnt > 232:
break
print(orders_placed)
if I run until row 232 everything works, but if I run more than that I get an error. Parsing only the strings without timedata conversion I can get the result, but I don't know if that's ok or maybe I'm missing something. Here's the string code:
import requests
import csv
def import_data_files():
r = requests.get('https://raw.githubusercontent.com/anyoneai/notebooks/main/customers_and_orders/data/orders.csv')
with open('./sample_data/orders.csv', 'wb') as f:
f.write(r.content)
import_data_files()
path = './sample_data/orders.csv'
orders_placed = 0
with open(path, 'r') as fl:
csv = csv.reader(fl, delimiter=',', quotechar='"')
next(csv)
for CustomerID, OrderID, Date, OrderTotal, ProductName, Price in csv:
date = Date.split(" ")
date = Date.split("-")
if date[0] == '2021':
if date[1] == '10':
orders_placed =1
print(orders_placed)
CodePudding user response:
Many of the rows do not contain a date field. In these cases Date
is empty and as such your .strptime()
call will fail. I suggest you skip over these rows, for example:
import requests
import csv
import datetime
def import_data_files():
r = requests.get('https://raw.githubusercontent.com/anyoneai/notebooks/main/customers_and_orders/data/orders.csv')
with open('orders.csv', 'wb') as f:
f.write(r.content)
import_data_files()
path = 'orders.csv'
orders_placed = 0
orders_date = []
with open(path, 'r') as fl:
csv = csv.reader(fl, delimiter=',', quotechar='"')
next(csv)
for row_number, (CustomerID, OrderID, Date, OrderTotal, ProductName, Price) in enumerate(csv):
if Date:
date = datetime.datetime.strptime(Date, '%Y-%m-%d %H:%M:%S.%f')
if date.year == 2021 and date.month == 10:
orders_placed = 1
print(orders_placed)
This would display: 437
Normally a row looks like:
1900610,7692429,2020-01-12 13:52:40.677,200.0000,0981,200.0000
But the failing rows without a date look like:
5014,5365,,120.0000,211,60.0000