Format CSV out after parsing multiline JSON file-CodePudding

I have a JSON file like the following one (a smaller version):

{
    "A001": {
        "X": 503744.7,
        "Y": 4726339.0,
        "Z": 458.84,
        "LON": -2.954286956913572,
        "LAT": 42.68952475979137,
        "dates": [
            "2015-01-01",
            "2015-01-02",
            "2015-01-03",
            "2015-01-04",
            "2015-01-05",
            "2015-01-06"
        ],
        "values": [
            "56.9",
            "49.7",
            "48.1",
            "37.1",
            "34.4",
            "35.9"
        ]
    },
    "A002": {
        "X": 607870.5,
        "Y": 4670754.0,
        "Z": 264.83,
        "LON": -1.69378623727067,
        "LAT": 42.18149989583031,
        "dates": [
            "2015-01-01",
            "2015-01-02",
            "2015-01-03",
            "2015-01-04"
        ],
        "values": [
            "287",
            "231",
            "207",
            "191"
        ]
    },
    "A403": {
        "X": 868708.0,
        "Y": 4709148.0,
        "Z": 849.0,
        "LON": 1.483146867002623,
        "LAT": 42.44694604132231,
        "dates": [
            "2015-01-01",
            "2015-01-02",
            "2015-01-03",
            "2015-01-04",
            "2015-01-05",
            "2015-01-06",
            "2015-01-07",
            "2015-01-08",
            "2015-01-09"
        ],
        "values": [
            "2.296",
            "7.033",
            "2.298",
            "2.275",
            "7.207",
            "5.456",
            "4.794",
            "4.24",
            "4.748"
        ]
    }
}

and I'm already able to read, parse and write each input key "A001", "A002", ... "A403" as a single .csv file

from requests import get
from csv import DictWriter
import json

useful_columns = ["Station", "lon", "lat", "z", "dates", "values"]
default_value = ""

with open('station.json') as json_file:
    data = json.load(json_file)
    for json_obj in data:
        print(json_obj)
        filename = json_obj   '.csv'
    #data = json.load(json_file)['A001']
        lon = data[json_obj]["LON"]
        lat = data[json_obj]["LAT"]
        z = data[json_obj]["Z"]

        date = data[json_obj]["dates"]
        values = data[json_obj]["values"]
        #print(date)

        ## Open file for writing
        with open(filename, mode="w", newline='') as csv_file:

            # Create dictwriter object to use for writing
            writer = DictWriter(csv_file, fieldnames=useful_columns)
            # Write CSV headers
            writer.writeheader()
            ## Write each row to CSV file
            for item in data:
                print(item)
                row = {
                    "Station": item,
                    "lon": lon,
                    "lat": lat,
                    "z": z,
                    "dates": date,
                    "values": values,
                }
            writer.writerow(row)

The output I would like to have is a CSV file like the following example for A001:

Station,lon,lat,z,dates,values
A001,-2.954286957,42.68952476,458.84,2015-01-01,56.9
                                     2015-01-02,49.7
                                     2015-01-03,48.1
                                     2015-01-04,37.1
                                     2015-01-05,34.4
                                     2015-01-06,35.9

Now I have

['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04', '2015-01-05', '2015-01-06']

for dates and

['56.9', '49.7', '48.1', '37.1', '34.4', '35.9']

for values.

How can I transform the variables "dates2 and "values" as columns without apex as shown in the previous example?

CodePudding user response：

First write the first row that includes all the data:

for item in data:
    print(item)
    row = {
        "Station": item,
        "lon": lon,
        "lat": lat,
        "z": z,
        "dates": item['dates'][0],
        "values": item['values'][0],}
    writer.writerow(row)

Then write all the dates in the following rows:

    for i in range(1, len(item['dates'])):
        row = {
            "Station": '',
            "lon": '',
            "lat": '',
            "z": '',
            "dates": item['dates'][i],
            "values": item['values'][i], }
        writer.writerow(row)

CodePudding user response：

The way you would like to have is a CSV file to like isn't a valid CSV file format because each row should have the same number of fields — but it's easy to rectify that by supplying a restval parameter value when creating the csv.DictWriter (see documentation) that will be used for all the missing fields so the CSV files created will all be in a format similar to this for the A001.csv created:

Station,lon,lat,z,date,value
A001,-2.954286956913572,42.68952475979137,458.84,2015-01-01,56.9
,,,,2015-01-02,49.7
,,,,2015-01-03,48.1
,,,,2015-01-04,37.1
,,,,2015-01-05,34.4
,,,,2015-01-06,35.9

Code to do it:

from csv import DictWriter
import json

CSV_FIELDNAMES = 'Station', 'lon', 'lat', 'z', 'date', 'value'

with open('station.json') as json_file:
    data = json.load(json_file)

    for station, json_obj in data.items():
        csv_filename = station   '.csv'
        with open(csv_filename, mode="w", newline='') as csv_file:
            date, *dates = json_obj['dates']
            value, *values = json_obj['values']
            # Initial full row of station data.
            row = {
                'Station': station,
                'lon':   json_obj['LON'],
                'lat':   json_obj['LAT'],
                'z':     json_obj['Z'],
                'date':  date,
                'value': value
            }
            writer = DictWriter(csv_file, fieldnames=CSV_FIELDNAMES, restval='')
            writer.writeheader()
            writer.writerow(row)  # Write initial full station data.
            for date, value in zip(dates, values):  # Write remaining partial rows.
                writer.writerow({"date": date, "value": value})

print('fin')