Convert nested CSV data to json-CodePudding

I have a csv that is in the following format:

p1,p2,p3,p4,p5,p6,p7,p8,p9
u1,u2,u3,u4,u5,u6,u7,u8,u9
10,500,10,10,6.192187,0,0,10.3313,8.58659
2,150,5,3,6.7125,8.497266,9.130859,6.46657,7.645142111
,,,4,6.960938,9.472656,,6.64784,8.060439698
,,,5,7.819531,0,0,6.88975,8.972818303

where 'p' represents properties and 'u' represents units. Note that there are missing values when subsequent values are the same for that column. Unfortunately I cannot pad them.

My aim is to convert the csv to JSON using python. The format should be something like the following structure:

val1:{
      val2:{
        val3:{ [val4,val5,val6,val7,val8,val9]
                    }}}

val1:{
      val2:{
        val3:{ [val4,val5,val6,val7,val8,val9],
               [val4,val5,val6,val7,val8,val9],
               [val4,val5,val6,val7,val8,val9],
                 }}}

Note that the first two lines can be ignored. I have the following python code to start with :

import csv
import json
 
 
# Function to convert a CSV to JSON
# Takes the file paths as arguments


def make_json(csvFilePath, jsonFilePath):
 
    # create a dictionary
    data = {}
 
    # Open a csv reader called DictReader
    with open(csvFilePath,  mode='r', encoding='utf-8-sig') as csvf:
        csvReader = csv.DictReader(csvf,delimiter =',')

    for rows in csvReader:
        # Assuming a column named 'p1' to
        # be the primary key
        key = rows['p1']
        data[key] = rows
    
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
    jsonf.write(json.dumps(data, indent=4))
     
# Driver Code
 
# Decide the two file paths according to your
# computer system
csvFilePath = r'./data/chart-0.csv'
jsonFilePath = r'./js/try.js'

 
# Call the make_json function
make_json(csvFilePath, jsonFilePath)

How should I go about this?

CodePudding user response：

If you want to convert .csv to .json, use pandas and json

First of all, we would convert csv to dictionary and then dict to json

import pandas as pd
import json

file = pd.read_csv('file.csv')
print(json.dumps(file.to_dict(), indent=4))

Output:

{
    "p1": {
        "0": "u1",
        "1": "10",
        "2": "2",
        "3": NaN,
        "4": NaN
    },
    "p2": {
        "0": "u2",
        "1": "500",
        "2": "150",
        "3": NaN,
        "4": NaN
    },
    "p3": {
        "0": "u3",
        "1": "10",
        "2": "5",
        "3": NaN,
        "4": NaN
    },
    "p4": {
        "0": "u4",
        "1": "10",
        "2": "3",
        "3": "4",
        "4": "5"
    },
    "p5": {
        "0": "u5",
        "1": "6.192187",
        "2": "6.7125",
        "3": "6.960938",
        "4": "7.819531"
    },
    "p6": {
        "0": "u6",
        "1": "0",
        "2": "8.497266",
        "3": "9.472656",
        "4": "0"
    },
    "p7": {
        "0": "u7",
        "1": "0",
        "2": "9.130859",
        "3": NaN,
        "4": "0"
    },
    "p8": {
        "0": "u8",
        "1": "10.3313",
        "2": "6.46657",
        "3": "6.64784",
        "4": "6.88975"
    },
    "p9": {
        "0": "u9",
        "1": "8.58659",
        "2": "7.645142111",
        "3": "8.060439698",
        "4": "8.972818303"
    }
}

CodePudding user response：

I think this does what you want if I've understood what's implied by the desired output format shown in your question with regards to handling the missing values.

import csv
import json


# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):

    # create a dictionary
    data = {}

    # Open and read with a csv reader.
    with open(csvFilePath,  mode='r', encoding='utf-8-sig') as csvf:
        csvReader = csv.reader(csvf, delimiter =',')
        next(csvReader); next(csvReader)  # Skip first 2 rows.
        prev_key = None
        for row in csvReader:
            key = row[0:3]
            if not all(key):  # If keys missing, use previous key.
                data[prev_key[0]][prev_key[1]][prev_key[2]].append(row[4:])
            else:
                data.setdefault(key[0], {}).setdefault(key[1], {}).setdefault(key[2], [])
                data[key[0]][key[1]][key[2]].append(row[4:])
                prev_key = key

    # Open a json writer, and use the json.dumps()
    # function to dump data
    with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
        jsonf.write(json.dumps(data, indent=4))


# Driver Code

# Decide the two file paths according to your
# computer system
csvFilePath = 'chart-0.csv'
jsonFilePath = 'try.json'


# Call the make_json function
make_json(csvFilePath, jsonFilePath)

Contents of JSON output file after running:

{
    "10": {
        "500": {
            "10": [
                [
                    "6.192187",
                    "0",
                    "0",
                    "10.3313",
                    "8.58659"
                ]
            ]
        }
    },
    "2": {
        "150": {
            "5": [
                [
                    "6.7125",
                    "8.497266",
                    "9.130859",
                    "6.46657",
                    "7.645142111"
                ],
                [
                    "6.960938",
                    "9.472656",
                    "",
                    "6.64784",
                    "8.060439698"
                ],
                [
                    "7.819531",
                    "0",
                    "0",
                    "6.88975",
                    "8.972818303"
                ]
            ]
        }
    }
}