Is this the best way to parse a Json output from Google Ads Stream. I am parsing the json with pandas & it is taking too much time
record counts is around 700K
[{
"results": [
{
"customer": {
"resourceName": "customers/12345678900",
"id": "12345678900",
"descriptiveName": "ABC"
},
"campaign": {
"resourceName": "customers/12345678900/campaigns/12345",
"name": "Search_Google_Generic",
"id": "12345"
},
"adGroup": {
"resourceName": "customers/12345678900/adGroups/789789",
"id": "789789",
"name": "adgroup_details"
},
"metrics": {
"clicks": "500",
"conversions": 200,
"costMicros": "90000000",
"allConversionsValue": 5000.6936,
"impressions": "50000"
},
"segments": {
"device": "DESKTOP",
"date": "2022-10-28"
}
}
],
"fieldMask": "segments.date,customer.id,customer.descriptiveName,campaign.id,campaign.name,adGroup.id,adGroup.name,segments.device,metrics.costMicros,metrics.impressions,metrics.clicks,metrics.conversions,metrics.allConversionsValue",
"requestId": "fdhfgdhfgjf"
}
]
This is the sample json.I am saving the stream in json file and then reading using pandas and trying to dump in csv file
I want to convert it to CSV format, Like
with open('Adgroups.json', encoding='utf-8') as inputfile:
df = pd.read_json(inputfile)
df_new = pd.DataFrame(columns= ['Date', 'Account_ID', 'Account', 'Campaign_ID','Campaign',
'Ad_Group_ID', 'Ad_Group','Device',
'Cost', 'Impressions', 'Clicks', 'Conversions', 'Conv_Value'])
for i in range(len(df['results'])):
results = df['results'][i]
for result in results:
new_row = pd.Series({ 'Date': result['segments']['date'],
'Account_ID': result['customer']['id'],
'Account': result['customer']['descriptiveName'],
'Campaign_ID': result['campaign']['id'],
'Campaign': result['campaign']['name'],
'Ad_Group_ID': result['adGroup']['id'],
'Ad_Group': result['adGroup']['name'],
'Device': result['segments']['device'],
'Cost': result['metrics']['costMicros'],
'Impressions': result['metrics']['impressions'],
'Clicks': result['metrics']['clicks'],
'Conversions': result['metrics']['conversions'],
'Conv_Value': result['metrics']['allConversionsValue']
})
df_new = df_new.append(new_row, ignore_index = True)
df_new.to_csv('Adgroups.csv', encoding='utf-8', index=False)
CodePudding user response:
Don't use df.append
. It's very slow because it has to copy the dataframe over and over again. I think it's being deprecated for this reason.
You can build the rows using list comprehension before constructing the data frame:
import json
with open("Adgroups.json") as fp:
data = json.load(fp)
columns = [
"Date",
"Account_ID",
"Account",
"Campaign_ID",
"Campaign",
"Ad_Group_ID",
"Ad_Group",
"Device",
"Cost",
"Impressions",
"Clicks",
"Conversions",
"Conv_Value",
]
records = [
(
r["segments"]["date"],
r["customer"]["id"],
r["customer"]["descriptiveName"],
r["campaign"]["id"],
r["campaign"]["name"],
r["adGroup"]["id"],
r["adGroup"]["name"],
r["segments"]["device"],
r["metrics"]["costMicros"],
r["metrics"]["impressions"],
r["metrics"]["clicks"],
r["metrics"]["conversions"],
r["metrics"]["allConversionsValue"],
)
for d in data
for r in d["results"]
]
df = pd.DataFrame(records, columns=columns)
CodePudding user response:
you can use:
with open('Adgroups.json', encoding='utf-8') as inputfile:
df = pd.read_json(inputfile)
df=df.explode('results')
df=df.join(pd.json_normalize(df.pop('results')))
'''
df:
fieldMask requestId customer.resourceName customer.id customer.descriptiveName campaign.resourceName campaign.name campaign.id adGroup.resourceName adGroup.id adGroup.name metrics.clicks metrics.conversions metrics.costMicros metrics.allConversionsValue metrics.impressions segments.device segments.date
0 segments.date,customer.id,customer.descriptiveName,campaign.id,campaign.name,adGroup.id,adGroup.name,segments.device,metrics.costMicros,metrics.impressions,metrics.clicks,metrics.conversions,metrics.allConversionsValue fdhfgdhfgjf customers/12345678900 12345678900 ABC customers/12345678900/campaigns/12345 Search_Google_Generic 12345 customers/12345678900/adGroups/789789 789789 adgroup_details 500 200 90000000 5000.6936 50000 DESKTOP 2022-10-28
'''