So I am scraping a [website][1] and I want to Retrieve the webpages based on these URLs and convert each into a beautifulsoup object
Retrieve Car Manufacturing Year, Engine, Price, Dealer information (if it is available), and the URL (href) to access the detailed car information.
When I run the code I get the error "ValueError: not enough values to unpack (expected 4, got 3)" when I remove one value instead of having make, model, year, and price I change it to make, model, and price and it comes up with another error "too many values to unpack (expected 3)"
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = "https://jammer.ie/used-cars?page={}&per-page=12"
all_data = []
for page in range(1, 3): # <-- increase number of pages here
soup = BeautifulSoup(requests.get(url.format(page)).text, "html.parser")
for car in soup.select(".car"):
info = car.select_one(".top-info").get_text(strip=True, separator="|")
make, model, year, price = info.split("|")
dealer_name = car.select_one(".dealer-name h6").get_text(
strip=True, separator=" "
)
address = car.select_one(".address").get_text(strip=True)
features = {}
for feature in car.select(".car--features li"):
k = feature.img["src"].split("/")[-1].split(".")[0]
v = feature.span.text
features[f"feature_{k}"] = v
all_data.append(
{
"make": make,
"model": model,
"year": year,
"price": price,
"dealer_name": dealer_name,
"address": address,
"url": "https://jammer.ie"
car.select_one("a[href*=vehicle]")["href"],
**features,
}
)
df = pd.DataFrame(all_data)
# prints sample data to screen:
print(df.tail().to_markdown(index=False))
# saves all data to CSV
df.to_csv('data.csv', index=False)
CodePudding user response:
You can make a check if the car contains model or not:
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = "https://jammer.ie/used-cars?page={}&per-page=12"
all_data = []
for page in range(1, 3): # <-- increase number of pages here
soup = BeautifulSoup(requests.get(url.format(page)).text, "html.parser")
for car in soup.select(".car"):
info = car.select_one(".top-info").get_text(strip=True, separator="|")
info = info.split("|")
if len(info) == 4:
make, model, year, price = info
else:
make, year, price = info
model = "N/A"
dealer_name = car.select_one(".dealer-name h6").get_text(
strip=True, separator=" "
)
address = car.select_one(".address").get_text(strip=True)
features = {}
for feature in car.select(".car--features li"):
k = feature.img["src"].split("/")[-1].split(".")[0]
v = feature.span.text
features[f"feature_{k}"] = v
all_data.append(
{
"make": make,
"model": model,
"year": year,
"price": price,
"dealer_name": dealer_name,
"address": address,
"url": "https://jammer.ie"
car.select_one("a[href*=vehicle]")["href"],
**features,
}
)
df = pd.DataFrame(all_data)
# prints sample data to screen:
print(df.tail().to_markdown(index=False))
# saves all data to CSV
df.to_csv("data.csv", index=False)
Prints:
make | model | year | price | dealer_name | address | url | feature_speed | feature_engine | feature_transmission | feature_door-icon1 | feature_petrol5 | feature_hatchback | feature_owner | feature_paint |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SEAT | Leon | 2015 | Price on application | McNamara Motors | Co. Cork | https://jammer.ie/vehicle/166591-seat-leon-2015 | 45000 miles | 1.2 litres | Manual | 5 doors | Petrol | Hatchback | 2 previous owners | Grey |
Toyota | Verso | 2012 | €8,250 | Amcc | Co. Dublin | https://jammer.ie/vehicle/166590-toyota-verso-2012 | 98179 miles | 1.5 litres | Automatic | 4 doors | Petrol | MPV | nan | Purple |
Mazda | Demio | 2012 | €7,950 | Amcc | Co. Dublin | https://jammer.ie/vehicle/166589-mazda-demio-2012 | 82644 miles | 1.3 litres | Automatic | 4 doors | Petrol | Hatchback | nan | Red |
Toyota | Corolla | 2017 | €14,950 | Amcc | Co. Dublin | https://jammer.ie/vehicle/166588-toyota-corolla-2017 | 78916 miles | 1.5 litres | Automatic | 4 doors | nan | Estate | nan | Silver |
Mazda | Demio | 2013 | €8,950 | Amcc | Co. Dublin | https://jammer.ie/vehicle/166587-mazda-demio-2013 | 53439 miles | 1.3 litres | Automatic | 4 doors | Petrol | Hatchback | nan | Grey |