i'm new to web scraping and was trying to get a basic webscraping code to work. The code works just fine, the problem is that I cannot get the CSV file to have any information on it it only shows the name of each column that's it with no data. Any help would be appreciated.
import requests
from bs4 import BeautifulSoup
import csv
def scrape_cars(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, "lxml")
cars = []
for car_div in soup.find_all("div", class_="c-search-card"):
car = {}
car["title"] = car_div.find("h2").text.strip()
car["price"] = car_div.find("div", class_="c-search-card__price").text.strip()
car["location"] = car_div.find("div", class_="c-search-card__location").text.strip()
car["year"] = car_div.find("div", class_="c-search-card__year").text.strip()
car["km"] = car_div.find("div", class_="c-search-card__km").text.strip()
car["color"] = car_div.find("div", class_="c-search-card__color").text.strip()
car["carrosserie"] = car_div.find("div", class_="c-search-card__body-type").text.strip()
car["puissance fiscale"] = car_div.find("div", class_="c-search-card__tax-horsepower").text.strip()
car["boite"] = car_div.find("div", class_="c-search-card__transmission").text.strip()
cars.append(car)
return cars
url = "https://www.automobile.tn/fr/occasion"
cars = scrape_cars(url)
# write to CSV file
with open("cars.csv", "w", newline="") as file:
writer = csv.DictWriter(file, fieldnames=["title", "price", "location", "year", "km", "color", "carrosserie", "puissance fiscale", "boite"])
writer.writeheader()
for car in cars:
writer.writerow(car)
this is what i get the csv file
CodePudding user response:
Here is one way of getting that information you're after:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
from tqdm import tqdm ## if using jupyter notebook: from tqdm.notebook import tqdm
big_list = []
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
s = requests.Session()
s.headers.update(headers)
for x in tqdm(range(1, 25)): ## to get all cars set range to 266
soup = bs(s.get(f'https://www.automobile.tn/fr/occasion/{x}').text, 'html.parser')
cars = soup.select('div[]')
for c in cars:
title = c.select_one('h2').text.strip()
price = c.select_one('div[]').text.strip()
big_list.append((title, price))
## add other elements as needed
df = pd.DataFrame(big_list, columns=['title', 'price'])
# df.to_csv('various_cars.csv') ## uncomment to save as csv
print(df)
Result in terminal:
100%
24/24 [00:25<00:00, 1.08it/s]
title price
0 Mazda CX-5 69 700 DT
1 Mercedes-Benz Classe E 53 000 DT
2 Mercedes-Benz Classe E 252 000 DT
3 Seat Arona 71 500 DT
4 Volkswagen Golf 7 47 000 DT
... ... ...
283 BMW Série 1 74 000 DT
284 BMW Série 3 135 000 DT
285 Volkswagen Golf 7 70 000 DT
286 Mercedes-Benz Classe C coupé 159 000 DT
287 Volkswagen Jetta 36 000 DT
288 rows × 2 columns