I have a script through which I am scraping data from multiple pages. I am trying to generate dynamic nested JSON objects inside an array. But the result I am getting is that there are nested JSON objects but inside two lists. The output data starts like this ['"[{ and ends }]"'] like this. I need someone who can explain to me where I am making a mistake?
**** I am pasting my code below please have a look ****
I am pasting below my code.
def geturl():
urls = [
# list of URLs
]
with open('temp.json', 'w', encoding='utf-8') as file:
for url in urls:
r = requests.get(url)
print(r.status_code)
data = json.loads(r.content)
items = data['items']
baseurl = # URL
data = OrderedDict()
main = []
for item in items:
data['Title'] = item['name']
data["Price"] = item['price']
data['Detai Page'] = baseurl item['slug']
data['Image'] = item['thumb_image']
main.append(data)
result = json.dumps(main)
json.dump(result, file, indent=4, sort_keys=True)
geturl()
Someone, please help me how to solve this issue?
CodePudding user response:
This should work file
import pandas as pd
def geturl(filehandler):
urls =[ # your urls here ]
main = []
baseurl = # your base url
for url in urls:
r = requests.get(url)
data = json.loads(r.content)
items = data['items']
for item in items:
data = {}
data['Title'] = item['name']
data["Price"] = item['price']
data['Detai Page'] = baseurl item['slug']
data['Image'] = item['thumb_image']
main.append(data)
# writing to json file
json.dump(main, filehandler)
# reading with pandas
df= pd.DataFrame(main)
df.to_json('data.json', orient='records')
with open('temp.json', 'w', encoding='utf-8') as filehandler:
geturl(filehandler)