I have a csv that is in the following format:
p1,p2,p3,p4,p5,p6,p7,p8,p9
u1,u2,u3,u4,u5,u6,u7,u8,u9
10,500,10,10,6.192187,0,0,10.3313,8.58659
2,150,5,3,6.7125,8.497266,9.130859,6.46657,7.645142111
,,,4,6.960938,9.472656,,6.64784,8.060439698
,,,5,7.819531,0,0,6.88975,8.972818303
where 'p' represents properties and 'u' represents units. Note that there are missing values when subsequent values are the same for that column. Unfortunately I cannot pad them.
My aim is to convert the csv to JSON using python. The format should be something like the following structure:
val1:{
val2:{
val3:{ [val4,val5,val6,val7,val8,val9]
}}}
val1:{
val2:{
val3:{ [val4,val5,val6,val7,val8,val9],
[val4,val5,val6,val7,val8,val9],
[val4,val5,val6,val7,val8,val9],
}}}
Note that the first two lines can be ignored. I have the following python code to start with :
import csv
import json
# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open a csv reader called DictReader
with open(csvFilePath, mode='r', encoding='utf-8-sig') as csvf:
csvReader = csv.DictReader(csvf,delimiter =',')
for rows in csvReader:
# Assuming a column named 'p1' to
# be the primary key
key = rows['p1']
data[key] = rows
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
# Driver Code
# Decide the two file paths according to your
# computer system
csvFilePath = r'./data/chart-0.csv'
jsonFilePath = r'./js/try.js'
# Call the make_json function
make_json(csvFilePath, jsonFilePath)
How should I go about this?
CodePudding user response:
If you want to convert .csv to .json, use pandas and json
First of all, we would convert csv to dictionary and then dict to json
import pandas as pd
import json
file = pd.read_csv('file.csv')
print(json.dumps(file.to_dict(), indent=4))
Output:
{
"p1": {
"0": "u1",
"1": "10",
"2": "2",
"3": NaN,
"4": NaN
},
"p2": {
"0": "u2",
"1": "500",
"2": "150",
"3": NaN,
"4": NaN
},
"p3": {
"0": "u3",
"1": "10",
"2": "5",
"3": NaN,
"4": NaN
},
"p4": {
"0": "u4",
"1": "10",
"2": "3",
"3": "4",
"4": "5"
},
"p5": {
"0": "u5",
"1": "6.192187",
"2": "6.7125",
"3": "6.960938",
"4": "7.819531"
},
"p6": {
"0": "u6",
"1": "0",
"2": "8.497266",
"3": "9.472656",
"4": "0"
},
"p7": {
"0": "u7",
"1": "0",
"2": "9.130859",
"3": NaN,
"4": "0"
},
"p8": {
"0": "u8",
"1": "10.3313",
"2": "6.46657",
"3": "6.64784",
"4": "6.88975"
},
"p9": {
"0": "u9",
"1": "8.58659",
"2": "7.645142111",
"3": "8.060439698",
"4": "8.972818303"
}
}
CodePudding user response:
I think this does what you want if I've understood what's implied by the desired output format shown in your question with regards to handling the missing values.
import csv
import json
# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open and read with a csv reader.
with open(csvFilePath, mode='r', encoding='utf-8-sig') as csvf:
csvReader = csv.reader(csvf, delimiter =',')
next(csvReader); next(csvReader) # Skip first 2 rows.
prev_key = None
for row in csvReader:
key = row[0:3]
if not all(key): # If keys missing, use previous key.
data[prev_key[0]][prev_key[1]][prev_key[2]].append(row[4:])
else:
data.setdefault(key[0], {}).setdefault(key[1], {}).setdefault(key[2], [])
data[key[0]][key[1]][key[2]].append(row[4:])
prev_key = key
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
# Driver Code
# Decide the two file paths according to your
# computer system
csvFilePath = 'chart-0.csv'
jsonFilePath = 'try.json'
# Call the make_json function
make_json(csvFilePath, jsonFilePath)
Contents of JSON output file after running:
{
"10": {
"500": {
"10": [
[
"6.192187",
"0",
"0",
"10.3313",
"8.58659"
]
]
}
},
"2": {
"150": {
"5": [
[
"6.7125",
"8.497266",
"9.130859",
"6.46657",
"7.645142111"
],
[
"6.960938",
"9.472656",
"",
"6.64784",
"8.060439698"
],
[
"7.819531",
"0",
"0",
"6.88975",
"8.972818303"
]
]
}
}
}