In this webpage: https://www.centris.ca/en/properties~for-sale~brossard?view=Thumbnail
I am trying to do two things:
- get the price of the listings
- get the MLS number of the listings
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
url = 'https://www.centris.ca/en/properties~for-sale~brossard?view=Thumbnail'
def scrap_pages(driver):
listings = driver.find_elements(By.CLASS_NAME, 'description')
if listings[-1].text.split('/n')[0] == '': del listings[-1]
for listing in listings:
print(listing.text.split('\n'))
price = listing.text.split('\n')[0]
prop_type = listing.text.split('\n')[1]
addr = listing.text.split('\n')[2]
city = listing.text.split('\n')[3]
sector = listing.text.split('\n')[4]
bedrooms = listing.text.split('\n')[5]
bathrooms = listing.text.split('\n')[6]
listing_item = {
'price': price,
'Address': addr,
'property Type': prop_type,
'city': city,
'bedrooms': bedrooms,
'bathrooms': bathrooms,
'sector': sector
}
centris_list.append(listing_item)
if __name__ == '__main__':
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
#chrome_options.add_argument("headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options)
centris_list=[]
driver.get(url)
total_pages = driver.find_element(By.CLASS_NAME,'pager-current').text.split('/')[1].strip()
for i in range(1,int(total_pages)):
scrap_pages(driver)
driver.find_element(By.CSS_SELECTOR,'li.next> a').click()
time.sleep(0.8)
my code above already gets the price, but not in a way I would like. I don't like the fact that I had to get the whole description, and then go through the whole text/split/list selection. I tried to grab the price via one of the following methods below but none of it worked. They all returned unable to find element error. and if I can get price to work I might be able to adapt it the rest of the data too.
#price= listing.find_element(By.CLASS_NAME, 'price').text
#price= listing.find_element(By.XPATH, './/*[@id="divMainResult"]/div[1]/div/div[2]/a/div[2]/span[1]').text
#price= listing.find_element(By.XPATH, './/*[@id="divMainResult"]/div[1]/div/div[2]/a/div[2]/meta[2]').text
#price = listing.find_element(By.CSS_SELECTOR, '#divMainResult > div:nth-child(1) > div > div.description > a > div.price').text
the 2nd part of the question, getting the MLS number, unforunately I was never able to get it working, they all returned unable to find element error. But if I look at the HTML source of the webpage, I can see each listing does come with a MLS number: https://imgur.com/a/ZEoTLoO
#mls= listing.find_element(By.TAG_NAME, 'MlsNumberNoStealth').text
#mls = listing.find_element(By.CSS_SELECTOR, '#MlsNumberNoStealth').text
#mls = listing.find_element(By.ID, 'MlsNumberNoStealth').text
#mls = listing.find_element(By.XPATH, './/*[@id="MlsNumberNoStealth"]/p').text
#mls = listing.find_elements(By.TAG_NAME, 'div')
#mls = listing.find_elements(By.ID, 'MlsNumberNoStealth')
CodePudding user response:
I'm not an excellent programmer, but I work extensively with HTML, CSS, and Javascript. I believe you could link an executable Javascript file that went through something like...
h1.style.display = "block"
h1 {
display: none;
}
<body>
<h1 id="h1">Hidden data</h1>
</body>
CodePudding user response:
You were close to the right approach.
Once you have a listings
elements list by this line listings = driver.find_elements(By.CLASS_NAME, 'description')
you can iterate over the listings and grab their prices and MLSs as following:
def scrap_pages(driver):
listings = driver.find_elements(By.CLASS_NAME, 'description')
for listing in listings:
price = listing.find_element(By.XPATH, ".//div[@class='price']/meta[@itemprop='price']").text
mls = listing.find_element(By.XPATH, ".//div[@id='MlsNumberNoStealth']/p").text
All the other details can be grabbed in the similar manner.