Selenium XPath Click Load More Button Until Gone-CodePudding

I am trying to scrape apartment names and apartment addresses from this website: https://www.rentcollegepads.com/off-campus-housing/mississippi-state/search. I am new to scraping and was wondering if someone could help me. Thanks

I am able to click the "Load More" button once with this code:

from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import csv

file_name = 'Test'

options = Options()
options.headless = False
driver = webdriver.Firefox(options=options)

url = 'https://www.rentcollegepads.com/off-campus-housing/mississippi-state/search'

driver.get(url)

time.sleep(6)

WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[@class='btn btn-success btn-block' and contains(., 'Load More')]"))).click()

soup = bs(driver.page_source, 'lxml')

driver.quit()

name = soup.find("h3", class_="ellipsis").get_text(strip=True)

address = soup.find("span", class_="ellipsis").get_text(strip=True)

print(name)
print(address)

My current obstacle is I need to click the Load More Button an undefined number of times per search. How would I click the "Load More" button every time it is visible until it is gone?

Thanks!

CodePudding user response：

You have to use while loop to click untill it finishes the load more button

import time
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

url = 'https://www.rentcollegepads.com/off-campus-housing/mississippi-state/search'
driver.get(url)
time.sleep(5)


while True:
    soup = bs(driver.page_source, 'lxml')
    name = soup.find("h3", class_="ellipsis").get_text(strip=True)

    address = soup.find("span", class_="ellipsis").get_text(strip=True)

    print(name)
    print(address)
 
    try:     
        driver.execute_script("arguments[0].scrollIntoView();",driver.find_element(By.XPATH,'//*[@]/button'))
        pos= WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@]/button'))).click()
        time.sleep(2)
    except:
        break