I'm using the recipe_scraper
python package to grab recipes from a website and create a single df with a column for each recipe under which recipe ingredient amounts are stored. I have managed to create the df but nested for loop in the create_df()
function writes out the ingredients for each recipe for as many times as there are ingredients in the recipe e.g., if the recipe has 13 ingredients then the loop writes out all 13 ingredients 13 times instead of just once. Can someone please help me spot where I should be moving parts of my nested for loop such that each recipe contains only 1 copy of its ingredients?
Here's the code I've gotten thus far:
import pandas as pd
from recipe_scrapers import scrape_me
def replace_measurement_symbols(ingredients):
"""
Description:
Converts measurement symbols to numbers that will later serve as floats
Arguments:
* ingredients: this is the ingredient list object
"""
ingredients = [i.replace('¼', '0.25') for i in ingredients]
ingredients = [i.replace('½', '0.5') for i in ingredients]
ingredients = [i.replace('¾', '0.75') for i in ingredients]
return ingredients
def create_df(recipes):
"""
Description:
Creates one df with all recipes and their ingredients
Arguments:
* recipes: list of recipe URLs provided by user
"""
df_list = []
for recipe in recipes:
scraper = scrape_me(recipe)
recipe_details = replace_measurement_symbols(scraper.ingredients())
recipe_name = recipe.split("https://www.hellofresh.nl/recipes/", 1)[1]
recipe_name = recipe_name.rsplit('-', 1)[0]
print(recipe_name)
df_temp = pd.DataFrame(columns=['Ingredients', 'Measurement'])
df_temp[str(recipe_name)] = recipe_name
for ingredient in recipe_details:
try:
ing_1 = ingredient.split("2 * ", 1)[1]
ing_1 = ing_1.split(" ", 2)
item = ing_1[2]
measurement = ing_1[1]
quantity = float(ing_1[0]) * 2
df_temp.loc[len(df_temp)] = [item, measurement, quantity]
df_list.append(df_temp)
except ValueError:
pass
df = pd.concat(df_list)
return df
def main():
"""
Description:
Runs above functions to create one df with all recipes provided by user in a list
"""
recipes = [
'https://www.hellofresh.nl/recipes/luxe-burger-met-truffeltapenade-en-portobello-63ad875558b39f3da6083acd',
'https://www.hellofresh.nl/recipes/chicken-parmigiana-623c51bd7ed5c074f51bbb10',
'https://www.hellofresh.nl/recipes/quiche-met-broccoli-en-oude-kaas-628665b01dea7b8f5009b248',
]
df = create_df(recipes)
if __name__ == "__main__":
main()
CodePudding user response:
I've solved it! In the create_df()
function just needed to move df_temp()
within the nested for
loop:
def create_df(recipes):
"""
Description:
Creates one df with all recipes and their ingredients
Arguments:
* recipes: list of recipe URLs provided by user
"""
df_list = []
for recipe in recipes:
scraper = scrape_me(recipe)
recipe_details = replace_measurement_symbols(scraper.ingredients())
recipe_name = recipe.split("https://www.hellofresh.nl/recipes/", 1)[1]
recipe_name = recipe_name.rsplit('-', 1)[0]
print(recipe_name)
for ingredient in recipe_details:
try:
df_temp = pd.DataFrame(columns=['Ingredients', 'Measurement'])
df_temp[str(recipe_name)] = recipe_name
ing_1 = ingredient.split("2 * ", 1)[1]
ing_1 = ing_1.split(" ", 2)
item = ing_1[2]
measurement = ing_1[1]
quantity = float(ing_1[0]) * 2
df_temp.loc[len(df_temp)] = [item, measurement, quantity]
df_list.append(df_temp)
except ValueError:
pass
df = pd.concat(df_list)
return df