I'm trying to convert this JSON to CSV. I have tried many ways but without success, I have converted other JSONs but no one with this structure. I'd like to extract only the headers and values.
The best approach that I have was with this code but still did not get a tabular CSV file.
import json
import pandas
def read_json(filename: str) -> dict:
try:
with open(filename, "r") as f:
data = json.loads(f.read())
except:
raise Exception(f"Reading {filename} file encountered an error")
return data
def create_dataframe(data: list) -> pandas.DataFrame:
# Declare an empty dataframe to append records
dataframe = pandas.DataFrame()
# Looping through each record
for d in data:
# Normalize the column levels
record = pandas.json_normalize(d)
# Append it to the dataframe
dataframe = dataframe.append(record, ignore_index=False)
return dataframe
def main():
# Read the JSON file as python dictionary
data = read_json(filename="response.json")
# Generate the dataframe for the array items in
# details key
dataframe = create_dataframe(data=data['values'])
# Renaming columns of the dataframe
print("Normalized Columns:", dataframe.columns.to_list())
dataframe.rename(columns={
"results.school": "school",
"results.high_school": "high_school",
"results.graduation": "graduation",
"education.graduation.major": "grad_major",
"education.graduation.minor": "grad_minor"
}, inplace=True)
print("Renamed Columns:", dataframe.columns.to_list())
# Convert dataframe to CSV
dataframe.to_csv("details.csv", index=True)
if __name__ == '__main__':
main()
has someone had a similar issue?
Thank you.
The Json file
{
"headers": [
"Warehouse",
"Days in Date",
"SKU's",
"Stock minimo",
"Stock disponible",
"Nivel de servcio"
],
"metadata": [
{
"jaql": {
"datatype": "text",
"column": "Warehouse",
"merged": true,
"dim": "[BaseVHS_F41021VHS.Warehouse]",
"title": "Warehouse",
"table": "BaseVHS_F41021VHS"
},
"field": {
"index": 0,
"id": "[BaseVHS_F41021VHS.Warehouse]"
},
"handlers": [],
"panel": "rows"
},
{
"jaql": {
"datatype": "datetime",
"level": "days",
"column": "Date_str",
"merged": true,
"dim": "[BaseVHS_F41021VHS.Date_str (Calendar)]",
"title": "Days in Date",
"table": "BaseVHS_F41021VHS"
},
"field": {
"index": 1,
"id": "[BaseVHS_F41021VHS.Date (Calendar)]_days"
},
"hierarchies": [
"calendar",
"calendar - weeks"
],
"handlers": [
{}
],
"format": {
"mask": {
"months": "MM/yyyy",
"weeks": "ww yyyy",
"minutes": "HH:mm",
"quarters": "yyyy Q",
"days": "shortDate",
"isdefault": true,
"years": "yyyy"
}
},
"panel": "rows"
},
{
"jaql": {
"agg": "count",
"datatype": "numeric",
"column": "NJDE",
"dim": "[BaseVHS_F41021VHS.NJDE]",
"title": "SKU's",
"table": "BaseVHS_F41021VHS"
},
"field": {
"index": 2,
"id": "[BaseVHS_F41021VHS.NJDE]_count"
},
"handlers": [
{},
{}
],
"format": {
"color": {
"color": "transparent",
"type": "color"
},
"mask": {
"b": true,
"t": true,
"decimals": "auto",
"separated": true,
"isdefault": true,
"type": "number"
}
},
"disabled": false,
"panel": "measures"
},
{
"jaql": {
"context": {
"[7377E-BFD]": {
"agg": "sum",
"datatype": "numeric",
"column": "ROP",
"dim": "[BaseVHS_F41021VHS.ROP]",
"title": "Total ROP",
"table": "BaseVHS_F41021VHS"
}
},
"formula": "[7377E-BFD]",
"type": "measure",
"title": "Stock minimo"
},
"field": {
"index": 3,
"id": "[7377E-BFD]"
},
"handlers": [
{},
{}
],
"format": {
"color": {
"color": "transparent",
"type": "color"
},
"mask": {
"b": true,
"t": true,
"decimals": "auto",
"separated": true,
"isdefault": true,
"type": "number"
}
},
"panel": "measures"
},
{
"jaql": {
"context": {
"[1BD66-B0A]": {
"agg": "sum",
"datatype": "numeric",
"column": "Qty_SOH",
"dim": "[BaseVHS_F41021VHS.Qty_SOH]",
"title": "Total Qty_SOH",
"table": "BaseVHS_F41021VHS"
},
"[D2436-DE3]": {
"datatype": "numeric",
"column": "Qty_SOH",
"dim": "[BaseVHS_F41021VHS.Qty_SOH]",
"title": "Qty SOH",
"table": "BaseVHS_F41021VHS"
}
},
"formula": "[1BD66-B0A]",
"type": "measure",
"title": "Stock disponible"
},
"field": {
"index": 4,
"id": "[1BD66-B0A]"
},
"handlers": [
{},
{}
],
"format": {
"color": {
"color": "transparent",
"type": "color"
},
"mask": {
"b": true,
"t": true,
"decimals": "auto",
"separated": true,
"isdefault": true,
"type": "number"
}
},
"panel": "measures"
},
{
"jaql": {
"context": {
"[8679B-A2B]": {
"agg": "avg",
"datatype": "numeric",
"column": "Column",
"dim": "[BaseVHS_F41021VHS.Column]",
"title": "Average Column",
"table": "BaseVHS_F41021VHS"
}
},
"formula": "[8679B-A2B]",
"type": "measure",
"title": "Nivel de servcio"
},
"field": {
"index": 5,
"id": "[8679B-A2B]"
},
"handlers": [
{},
{}
],
"format": {
"color": {
"color": "transparent",
"type": "color"
},
"mask": {
"decimals": "auto",
"percent": true
}
},
"panel": "measures"
}
],
"datasource": {
"revisionId": "TBD",
"fullname": "LocalHost/VHS"
},
"processingInfo": {
"cacheType": "none",
"cacheTime": "2022-06-15T15:40:38.275Z"
},
"translationInfo": {
"translationServiceProvider": "TranslationService",
"isTranslationFallback": false,
"isQueryFallback": false
},
"values": [
[
{
"data": " SGBELRAY",
"text": " SGBELRAY"
},
{
"data": "2022-06-15 10:24:32.000000",
"text": "2022-06-15 10:24:32.000000"
},
{
"data": 94.0,
"text": "94"
},
{
"data": 1018.0,
"text": "1018"
},
{
"data": 3.0,
"text": "3"
},
{
"data": 0.18085106382978725,
"text": "0.180851063829787"
}
],
[
{
"data": " SGFERRET",
"text": " SGFERRET"
},
{
"data": "2022-06-15 10:24:32.000000",
"text": "2022-06-15 10:24:32.000000"
},
{
"data": 576.0,
"text": "576"
},
{
"data": 64505.0,
"text": "64505"
},
{
"data": 623498.6,
"text": "623498.6"
},
{
"data": 0.8663194444444444,
"text": "0.866319444444444"
}
]
]
}
CodePudding user response:
If you need to extract only headers and values, you can pass it to a dataframe:
import json
import pandas as pd
data = json.loads(open('your_file.json').read())
print(data['headers']))
>> ['Warehouse',
'Days in Date',
"SKU's",
'Stock minimo',
'Stock disponible',
'Nivel de servcio']
print(data['values'])
>> [[{'data': ' SGBELRAY', 'text': ' SGBELRAY'},
{'data': '2022-06-15 10:24:32.000000', 'text': '2022-06-15 10:24:32.000000'},
{'data': 94.0, 'text': '94'},
{'data': 1018.0, 'text': '1018'},
{'data': 3.0, 'text': '3'},
{'data': 0.18085106382978725, 'text': '0.180851063829787'}],
[{'data': ' SGFERRET', 'text': ' SGFERRET'},
{'data': '2022-06-15 10:24:32.000000', 'text': '2022-06-15 10:24:32.000000'},
{'data': 576.0, 'text': '576'},
{'data': 64505.0, 'text': '64505'},
{'data': 623498.6, 'text': '623498.6'},
{'data': 0.8663194444444444, 'text': '0.866319444444444'}]]
df = pd.DataFrame(data['values'], columns=data['headers']).applymap(lambda x: x['data'])
df.to_csv('your_file.csv', index=False)
And you will get:
Let me know, if this is not exactly what you need.