I would like to use python to merge multiple JSON objects with duplicate or more fields with different data into one object by adding additional fields to the merged object. In this example. all of the fields have matching data except for the "audience type" field.
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "General Managers",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "Managers",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "Owners",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
I would like this to look like:
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type1": "General Managers",
"Person Audience Type2": "Managers",
"Person Audience Type3": "Owners",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
My code:
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'data.csv'
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)
CodePudding user response:
I assume that your data is in the list of dictionaries
data =[
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "General Managers",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "Managers",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "Owners",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
}
]
Then, I have applied the following logic and get your desired output
d1 = {}
i = 1
v_list = [d[k] for d in data for k,v in d.items()]
for d in data:
for k,v in d.items():
if v_list.count(d[k])>1:
d1[k] = v
else:
d1[k "" str(i)] = v
i = 1
Output looks like:
{'Person Username': '[email protected]',
'Person Status': 'Active',
'Person E-mail': '[email protected]',
'Person First Name': 'Ann',
'Person Last Name': 'Smith',
'Person Audience Type1': 'General Managers',
'Person Organization Name': 'mycompany',
'Person Organization Number': 'US3058',
'Person Audience Type2': 'Managers',
'Person Audience Type3': 'Owners'}
CodePudding user response:
this algorithm solves exactly what you ask, hope this helps
from json import dump, dumps
def json_formatter(data_list:dict)->dict:
data_with_list = {}
#transforming every key in a list
for data in data_list:
for key,value in data.items():
#means that is a new key
if key not in data_with_list.keys():
data_with_list[key] = [value]
else:
#avoid pushing repeated itens
if value not in data_with_list[key]:
data_with_list[key].append(value)
#transforming list
formated = {}
for key,value in data_with_list.items():
#means its unique
if len(value) == 1:
formated[key] = value[0]
else:
for x in range(1,len(value) 1):
formated[f'{key}{x}'] = value[x -1]
return formated
data = [
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "General Managers",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "Managers",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "Owners",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
},
{
"Person Username": "[email protected]",
"Person Status": "Active",
"Person E-mail": "[email protected]",
"Person First Name": "Ann",
"Person Last Name": "Smith",
"Person Audience Type": "Ownerswww",
"Person Organization Name": "mycompany",
"Person Organization Number": "US3058",
}
]
r = json_formatter(data)
dump(r,open('teste.json','w'),indent=4)