I have a list of jsons i made after web scraping a store website using this code :
responses.append(requests.request("POST", url, data=payload, headers=headers).json())
now my issue is that each json , has a specific number of elements, and each element contains ('title', 'id','price','place','region')
I want to iterate through the list of jsons and then iterate through the 48 elements to parse those pieces of information into a CSV file
I've tried this code :
data = []
for j in responses : #iterating through the list of jsons
for i in range(len(responses[j]['data']['search']['announcements']['data'])) : #iterating through the elements
data.append([responses[j]["data"]["search"]["announcements"]['data'][i]['id'],
responses[j]["data"]["search"]["announcements"]['data'][i]['title'],
responses[j]["data"]["search"]["announcements"]['data'][i]['createdAt'],
responses[j]["data"]["search"]["announcements"]['data'][i]['description'],
responses[j]["data"]["search"]["announcements"]['data'][i]['cities'][0]['name'],
responses[j]["data"]["search"]["announcements"]['data'][i]['cities'][0]['region']['name'],
responses[j]["data"]["search"]["announcements"]['data'][i]['price']
])
print(j)
Cars_data = pd.DataFrame(data,columns=['id','Car_name','Post_Created','description','city_name','wilaya','price'])
i get this error :
TypeError Traceback (most recent call last) ~\AppData\Local\Temp\ipykernel_1648\38939902.py in 1 data = [] 2 for j in resp : ----> 3 for i in range(len(resp[j]['data']['search']['announcements']['data'])) : 4 data.append([resp[j]["data"]["search"]["announcements"]['data'][i]['id'], 5 resp[j]["data"]["search"]["announcements"]['data'][i]['title'],
TypeError: list indices must be integers or slices, not dict
and before i got out of index error
here is one element of json : (they all have the same format)
> {'id': '34456405', 'title': 'Hyundai i10 2012 GLS', 'slug':
> 'city-car-hyundai-i10-2012-gls-alger-centre-algiers-algeria',
> 'createdAt': '2022-12-07T11:33:06.000Z', 'isFromStore': False,
> 'isCommentEnabled': False, 'hasDelivery': False, 'deliveryType':
> None, 'description': 'سيارة نقية و مغلفة، فيها شوية صبيغة على برا
> كيما في الصور، محرك ما شاء الله \n 10/10 ما يسخن ما ينقص زيت. 00 مصروف
> ', 'status': 'PUBLISHED', 'cities': [{'id': '556',
> 'name': 'Alger centre',
> 'slug': 'alger-centre-556',
> 'region': {'id': '16',
> 'name': 'Algiers',
> 'slug': 'alger-16',
> '__typename': 'Region'},
> '__typename': 'City'}], 'store': None, 'user': {'id': '87200', '__typename': 'User'}, 'defaultMedia': {'mediaUrl':
> 'https://cdn9.ouedkniss.com/400/medias/announcements/images/gJJJl/7LIA1J26v5LTHmkp9Bn4zNr4NTrHwF1lJ95P4UZm.jpg',
> '__typename': 'AnnouncementMedia'}, 'price': 1450000,
> 'pricePreview': 145, 'priceUnit': 'MILLION', 'oldPrice': None,
> 'priceType': 'FIXED', 'exchangeType': None, '__typename':
> 'Announcement', 'smallDescription': [{'valueText': ['255200km'],
> '__typename': 'AnnouncementSpecDisplay'}, {'valueText': ['Petrol'], '__typename': 'AnnouncementSpecDisplay'}, {'valueText':
> ['1.1'], '__typename': 'AnnouncementSpecDisplay'}, {'valueText':
> ['Manuel'], '__typename': 'AnnouncementSpecDisplay'}, {'valueText':
> ['Silver grey'], '__typename': 'AnnouncementSpecDisplay'},
> {'valueText': ['Grey card'], '__typename':
> 'AnnouncementSpecDisplay'}], 'noAdsense': False}, {'id':
> '34453476', 'title': 'Toyota Auris 2009 Auris', 'slug':
> 'average-sedan-toyota-auris-2009-beni-messous-algiers-algeria',
> 'createdAt': '2022-12-07T11:32:54.000Z', 'isFromStore': False,
> 'isCommentEnabled': True, 'hasDelivery': False, 'deliveryType':
> None, 'description': '', 'status': 'PUBLISHED', 'cities':
> [{'id': '567',
> 'name': 'Beni messous',
> 'slug': 'beni-messous-567',
> 'region': {'id': '16',
> 'name': 'Algiers',
> 'slug': 'alger-16',
> '__typename': 'Region'},
> '__typename': 'City'}], 'store': None, 'user': {'id': '3203586', '__typename': 'User'}, 'defaultMedia': {'mediaUrl':
> 'https://cdn9.ouedkniss.com/400/medias/announcements/images/9kzK4/gGlMMM4jDueAFraAUioidvqJYUkYDU9zDRfX0o5N.jpg',
> '__typename': 'AnnouncementMedia'}, 'price': 10000,
> 'pricePreview': 1, 'priceUnit': 'MILLION', 'oldPrice': None,
> 'priceType': 'NEGOTIABLE', 'exchangeType': None, '__typename':
> 'Announcement', 'smallDescription': [{'valueText': ['182000km'],
> '__typename': 'AnnouncementSpecDisplay'}, {'valueText': ['Diesel'], '__typename': 'AnnouncementSpecDisplay'}, {'valueText':
> ['2.0 D4D 126ch'], '__typename': 'AnnouncementSpecDisplay'},
> {'valueText': ['Automatic'], '__typename': 'AnnouncementSpecDisplay'},
> {'valueText': ['Black'], '__typename': 'AnnouncementSpecDisplay'},
> {'valueText': ['Grey card'], '__typename':
> 'AnnouncementSpecDisplay'}], 'noAdsense': False}
CodePudding user response:
you can try iterate element by element instead of list[index]:
for response in responses:
for item in response['data']['search']['announcements']['data']:
data.append(item['id'])
I found it easier to manipulate the objects itselves
CodePudding user response:
response is your list of dict
# response = [{},{}]
base_data = []
for line in response:
data = []
for key, values in line.items():
if key == "id":
data.append(values)
elif key == "createdAt":
data.append(values)
elif key == "description":
data.append(values)
elif key == "cities":
data.append(values[0].get("name"))
data.append(values[0].get("region").get("name"))
elif key == "price":
data.append(values)
base_data.append(data)
print(base_data)
[['34456405', '2022-12-07T11:33:06.000Z', 'ارة نقية و مغلفة، فيها شوية صبيغة على برا\n كيما في الصور، محرك ما شاء الله \n 10/10 ما يسخن ما ينقص زيت. 00 مصروف\n ', 'Alger centre', 'Algiers', 1450000], ['34453476', '2022-12-07T11:32:54.000Z', '', 'Beni messous', 'Algiers', 10000]]
this will get the values from dict and store it in List of list