Python error: TypeErrpr: Object of type int64 is not JSON serializable-CodePudding

I am trying to convert a csv file into a dataset. Here is that code.

import csv
import json
import pandas as pd


def csv_to_json(csvFilePath, jsonFilePath):
    dataset = {
        "dataset_id": "???",
        "areas": []
    }
    areas = []
    cnt = 0
    with open(csvFilePath, encoding='utf-8') as csvf:
        csvReader = csv.DictReader(csvf)

        for row in csvReader:
       
            area = {
               
                "boundary_id": row['boundary_id'],
                "metric": pd.to_numeric(row['risk1']),
                "data": {
                 "Risk1": pd.to_numeric(row["risk1"]),
                 "Risk2": pd.to_numeric(row["risk2"]),
                 "Risk3": pd.to_numeric(row["risk3"]),
                  "Risk4": pd.to_numeric(row["risk4"]),
                  "Risk5": pd.to_numeric(row["risk5"]),
                  "Risk6": pd.to_numeric(row["risk6"]),
                  "Risk7": pd.to_numeric(row["risk7"]),
                  "populationdensitycount": 
                   pd.to_numeric(row["populationdensitycount"])
                }
            }
            areas.append(area)
            cnt  = 1
    dataset["areas"] = areas
    print(cnt)
    with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
        jsonString = json.dumps(dataset, indent=4)
        jsonf.write(jsonString)


csvFilePath = r'file.csv'
jsonFilePath = r'file.json'
csv_to_json(csvFilePath, jsonFilePath)

And here is the shortened version of the csv file

"boundary_id","us_state_id","us_state_abbr","zcta","boundary_type","boundary_subtype","boundary_centroid_lat","boundary_centroid_long","zip_code_array","risk1","risk2","risk3","risk4","risk5","risk6","risk6","populationdensitycount"
"11891","22","MA","01001","zcta",,"42.06259","-72.62589","01001"," 4"," 2.1"," 9"," 2.8"," 3.9"," 10.8"," 3.8","17312"
"24929","22","MA","01002","zcta",,"42.37492","-72.46211","01004, 01059, 01002"," 3.7"," 3.3"," 1.8"," 3.1"," 4.0"," 1.9"," 3.7","30014"
"4431","22","MA","01003","zcta",,"42.39192","-72.52479","01003"," 4.0"," 3.5"," 1.9"," 5.0"," 6.0"," 1.9"," 4.0","11357"

I am receiving this error Object type of type int64 is not JSON serializable and it points to jsonString = json.dumps(dataset, indent=4) as the issue. I have previously ran this script many times with no issues. I am very confused on what the problem could be. Any suggestions?

CodePudding user response：

I suggest you avoid using pd.to_numeric() and choose either float or int for each of your entries. pd.to_numeric() returns either a float64 or an int64 which is not compatible with the json functions you are using.

For example:

import csv
import json


def csv_to_json(csvFilePath, jsonFilePath):
    dataset = {
        "dataset_id": "???",
        "areas": []
    }
    areas = []
    cnt = 0
    with open(csvFilePath, encoding='utf-8') as csvf:
        csvReader = csv.DictReader(csvf)

        for row in csvReader:
       
            area = {
               
                "boundary_id": row['boundary_id'],
                "metric": float(row['risk1']),
                "data": {
                    "Risk1": float(row["risk1"]),
                    "Risk2": float(row["risk2"]),
                    "Risk3": float(row["risk3"]),
                    "Risk4": float(row["risk4"]),
                    "Risk5": float(row["risk5"]),
                    "Risk6": float(row["risk6"]),
                    "Risk7": float(row["risk7"]),
                    "populationdensitycount": int(row["populationdensitycount"])
                }
            }
            
            areas.append(area)
            cnt  = 1
    dataset["areas"] = areas

    with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
        jsonString = json.dumps(dataset, indent=4)
        jsonf.write(jsonString)


csvFilePath = r'file.csv'
jsonFilePath = r'file.json'
csv_to_json(csvFilePath, jsonFilePath)

Giving you a JSON output file:

{
    "dataset_id": "???",
    "areas": [
        {
            "boundary_id": "11891",
            "metric": 4.0,
            "data": {
                "Risk1": 4.0,
                "Risk2": 2.1,
                "Risk3": 9.0,
                "Risk4": 2.8,
                "Risk5": 3.9,
                "Risk6": 10.8,
                "Risk7": 3.8,
                "populationdensitycount": 17312
            }
        },
        {
            "boundary_id": "24929",
            "metric": 3.7,
            "data": {
                "Risk1": 3.7,
                "Risk2": 3.3,
                "Risk3": 1.8,
                "Risk4": 3.1,
                "Risk5": 4.0,
                "Risk6": 1.9,
                "Risk7": 3.7,
                "populationdensitycount": 30014
            }
        },
        {
            "boundary_id": "4431",
            "metric": 4.0,
            "data": {
                "Risk1": 4.0,
                "Risk2": 3.5,
                "Risk3": 1.9,
                "Risk4": 5.0,
                "Risk5": 6.0,
                "Risk6": 1.9,
                "Risk7": 4.0,
                "populationdensitycount": 11357
            }
        }
    ]
}

CodePudding user response：

You should convert the data from int64 to a normal python int so that the built in libraries are better able to handle it.