I am trying to convert a csv file into a dataset. Here is that code.
import csv
import json
import pandas as pd
def csv_to_json(csvFilePath, jsonFilePath):
dataset = {
"dataset_id": "???",
"areas": []
}
areas = []
cnt = 0
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for row in csvReader:
area = {
"boundary_id": row['boundary_id'],
"metric": pd.to_numeric(row['risk1']),
"data": {
"Risk1": pd.to_numeric(row["risk1"]),
"Risk2": pd.to_numeric(row["risk2"]),
"Risk3": pd.to_numeric(row["risk3"]),
"Risk4": pd.to_numeric(row["risk4"]),
"Risk5": pd.to_numeric(row["risk5"]),
"Risk6": pd.to_numeric(row["risk6"]),
"Risk7": pd.to_numeric(row["risk7"]),
"populationdensitycount":
pd.to_numeric(row["populationdensitycount"])
}
}
areas.append(area)
cnt = 1
dataset["areas"] = areas
print(cnt)
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(dataset, indent=4)
jsonf.write(jsonString)
csvFilePath = r'file.csv'
jsonFilePath = r'file.json'
csv_to_json(csvFilePath, jsonFilePath)
And here is the shortened version of the csv file
"boundary_id","us_state_id","us_state_abbr","zcta","boundary_type","boundary_subtype","boundary_centroid_lat","boundary_centroid_long","zip_code_array","risk1","risk2","risk3","risk4","risk5","risk6","risk6","populationdensitycount"
"11891","22","MA","01001","zcta",,"42.06259","-72.62589","01001"," 4"," 2.1"," 9"," 2.8"," 3.9"," 10.8"," 3.8","17312"
"24929","22","MA","01002","zcta",,"42.37492","-72.46211","01004, 01059, 01002"," 3.7"," 3.3"," 1.8"," 3.1"," 4.0"," 1.9"," 3.7","30014"
"4431","22","MA","01003","zcta",,"42.39192","-72.52479","01003"," 4.0"," 3.5"," 1.9"," 5.0"," 6.0"," 1.9"," 4.0","11357"
I am receiving this error Object type of type int64 is not JSON serializable
and it points to jsonString = json.dumps(dataset, indent=4)
as the issue. I have previously ran this script many times with no issues. I am very confused on what the problem could be. Any suggestions?
CodePudding user response:
I suggest you avoid using pd.to_numeric()
and choose either float
or int
for each of your entries. pd.to_numeric()
returns either a float64
or an int64
which is not compatible with the json functions you are using.
For example:
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
dataset = {
"dataset_id": "???",
"areas": []
}
areas = []
cnt = 0
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for row in csvReader:
area = {
"boundary_id": row['boundary_id'],
"metric": float(row['risk1']),
"data": {
"Risk1": float(row["risk1"]),
"Risk2": float(row["risk2"]),
"Risk3": float(row["risk3"]),
"Risk4": float(row["risk4"]),
"Risk5": float(row["risk5"]),
"Risk6": float(row["risk6"]),
"Risk7": float(row["risk7"]),
"populationdensitycount": int(row["populationdensitycount"])
}
}
areas.append(area)
cnt = 1
dataset["areas"] = areas
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(dataset, indent=4)
jsonf.write(jsonString)
csvFilePath = r'file.csv'
jsonFilePath = r'file.json'
csv_to_json(csvFilePath, jsonFilePath)
Giving you a JSON output file:
{
"dataset_id": "???",
"areas": [
{
"boundary_id": "11891",
"metric": 4.0,
"data": {
"Risk1": 4.0,
"Risk2": 2.1,
"Risk3": 9.0,
"Risk4": 2.8,
"Risk5": 3.9,
"Risk6": 10.8,
"Risk7": 3.8,
"populationdensitycount": 17312
}
},
{
"boundary_id": "24929",
"metric": 3.7,
"data": {
"Risk1": 3.7,
"Risk2": 3.3,
"Risk3": 1.8,
"Risk4": 3.1,
"Risk5": 4.0,
"Risk6": 1.9,
"Risk7": 3.7,
"populationdensitycount": 30014
}
},
{
"boundary_id": "4431",
"metric": 4.0,
"data": {
"Risk1": 4.0,
"Risk2": 3.5,
"Risk3": 1.9,
"Risk4": 5.0,
"Risk5": 6.0,
"Risk6": 1.9,
"Risk7": 4.0,
"populationdensitycount": 11357
}
}
]
}
CodePudding user response:
You should convert the data from int64
to a normal python int
so that the built in libraries are better able to handle it.