I've ran the following code using python to create a json structure and I'm trying to print a list for all of the "title" elements inside the 'foods' structure
import re
import json
from subprocess import check_output
import requests
from bs4 import BeautifulSoup
url = "https://www.bbc.co.uk/food/recipes/avocado_pasta_with_peas_31700"
data = re.search("__reactInitialState__ = (.*);", requests.get(url).text)[1]
data = json.loads(data)
print(data["recipeReducer"]["recipe"]["stagesWithoutLinks"])
I want the output to look like:
['penne','avocado','garlic','coconut oil','salt','lemon','mint','peas','chilli]
I cant seem to access the foods structure in order to get the title elements to get the output. Any ideas?
CodePudding user response:
I don't think you need re for this.
import requests
from bs4 import BeautifulSoup as BS
(r := requests.get('https://www.bbc.co.uk/food/recipes/avocado_pasta_with_peas_31700')).raise_for_status()
soup = BS(r.text, 'lxml')
ingredients = [li.find('a').getText() for li in soup.select('li.recipe-ingredients__list-item')]
print(ingredients)
Output:
['penne', 'avocado', 'garlic', 'coconut oil', 'salt', 'lemon', 'mint', 'peas', 'chilli']
CodePudding user response:
A minimal change to your code would replace your last line with this:
titles = [ingredient["foods"][0]["id"]
for ingredient in data["recipeReducer"]["recipe"]["stagesWithoutLinks"][0]["ingredients"]]
print(titles)
CodePudding user response:
Output of title elements in json format.
import re
import json
import requests
url = "https://www.bbc.co.uk/food/recipes/avocado_pasta_with_peas_31700"
data = re.search("__reactInitialState__ = (.*);", requests.get(url).text)[1]
data = json.loads(data)
data=data["recipeReducer"]["recipe"]["stagesWithoutLinks"][0]['ingredients']
title=[]
for i in data:
j=i['foods'][0]['title']
title.append(j)
print(title)
Output:
['penne', 'avocado', 'garlic', 'coconut oil', 'salt', 'lemon', 'mint', 'peas', 'chilli']