When i run the code and i get my CSV file, its actually empty.
'''
import requests
from bs4 import BeautifulSoup
from csv import writer
url = 'https://www.fotocasa.es/es/alquiler/todas-las-casas/girona-provincia/todas-las-zonas/l'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
lists = soup.find_all('section', class_='re-CardPackAdvance')
with open('casas.csv', 'w', encoding='utf8', newline='') as f:
thewriter = writer(f)
header = ['Titulo', 'Precio', 'Metros', 'Telefono']
thewriter.writerow(header)
for list in lists:
titulo = list.find('a', class_='re-CardPackAdvance-info-container').text.replace('\n', '')
precio = list.find('span', class_='re-CardPrice').text.replace('\n', '')
metros = list.find('span', class_='re-CardFeaturesWithIcons-feature-icon--surface').text.replace('\n', '')
telefono = list.find('a', class_='re-CardContact-phone').text.replace('\n', '')
info = [titulo, precio, metros, telefono]
thewriter.writerow(info)
'''
I expected to have all the info scrapped from this website, but seems like i did something wrong at some point
CodePudding user response:
You are parsing the resulting soup
not appropriately. There is no section
with the re-CardPackAdvance
class. I adapted the code accordingly (find all articles
with class that starts with re-CardPack
). Please also note that you need to shift the for
-loop by one indention. However, due to the structure of the page, only the first two entries are loaded directly when fetching the page. All other entries are fetched after the page has loaded in the browser (via javascript). I think you might consider using the API of the page instead.
import requests
from bs4 import BeautifulSoup
from csv import writer
import re
url = 'https://www.fotocasa.es/es/alquiler/todas-las-casas/girona-provincia/todas-las-zonas/l'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
lists = soup.find_all("article", class_=re.compile("^re-CardPack"))
print(len(lists))
with open('casas.csv', 'w', encoding='utf8', newline='') as f:
thewriter = writer(f)
header = ['Titulo', 'Precio', 'Metros', 'Telefono']
thewriter.writerow(header)
for list in lists:
titulo = list.find('a').get('title')
precio = list.find('span', class_='re-CardPrice').text.replace('\n', '')
metros = list.find('span', class_='re-CardFeaturesWithIcons-feature-icon--surface').text.replace('\n', '')
telefono = list.find('a', class_='re-CardContact-phone').text.replace('\n', '')
info = [titulo, precio, metros, telefono]
thewriter.writerow(info)