Home > other >  web scraping not showing any output
web scraping not showing any output

Time:02-06

i'm new to web scraping and was trying to get a basic webscraping code to work. The code works just fine, the problem is that I cannot get the CSV file to have any information on it it only shows the name of each column that's it with no data. Any help would be appreciated.

import requests
from bs4 import BeautifulSoup
import csv

def scrape_cars(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "lxml")
    cars = []
    for car_div in soup.find_all("div", class_="c-search-card"):
        car = {}
        car["title"] = car_div.find("h2").text.strip()
        car["price"] = car_div.find("div", class_="c-search-card__price").text.strip()
        car["location"] = car_div.find("div", class_="c-search-card__location").text.strip()
        car["year"] = car_div.find("div", class_="c-search-card__year").text.strip()
        car["km"] = car_div.find("div", class_="c-search-card__km").text.strip()
        car["color"] = car_div.find("div", class_="c-search-card__color").text.strip()
        car["carrosserie"] = car_div.find("div", class_="c-search-card__body-type").text.strip()
        car["puissance fiscale"] = car_div.find("div", class_="c-search-card__tax-horsepower").text.strip()
        car["boite"] = car_div.find("div", class_="c-search-card__transmission").text.strip()
        cars.append(car)
    return cars

url = "https://www.automobile.tn/fr/occasion"
cars = scrape_cars(url)

# write to CSV file
with open("cars.csv", "w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=["title", "price", "location", "year", "km", "color", "carrosserie", "puissance fiscale", "boite"])
    writer.writeheader()
    for car in cars:
        writer.writerow(car)

this is what i get the csv file

CodePudding user response:

Here is one way of getting that information you're after:

import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
from tqdm import tqdm ## if using jupyter notebook: from tqdm.notebook import tqdm

big_list = []
headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}

s = requests.Session()
s.headers.update(headers)
for x in tqdm(range(1, 25)): ## to get all cars set range to 266
    soup = bs(s.get(f'https://www.automobile.tn/fr/occasion/{x}').text, 'html.parser')
    cars = soup.select('div[]')
    for c in cars:
        title = c.select_one('h2').text.strip()
        price = c.select_one('div[]').text.strip()
        big_list.append((title, price))
        ## add other elements as needed

df = pd.DataFrame(big_list, columns=['title', 'price'])
# df.to_csv('various_cars.csv') ## uncomment to save as csv
print(df)

Result in terminal:

100%
24/24 [00:25<00:00, 1.08it/s]
title   price
0   Mazda CX-5  69 700 DT
1   Mercedes-Benz Classe E  53 000 DT
2   Mercedes-Benz Classe E  252 000 DT
3   Seat Arona  71 500 DT
4   Volkswagen Golf 7   47 000 DT
... ... ...
283 BMW Série 1 74 000 DT
284 BMW Série 3 135 000 DT
285 Volkswagen Golf 7   70 000 DT
286 Mercedes-Benz Classe C coupé    159 000 DT
287 Volkswagen Jetta    36 000 DT
288 rows × 2 columns
  • Related