I’m newbie of selenium, I’m trying to figure out how to scroll infinitely i tried almost everything what other stackoverflow said
1.
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko")
driver = webdriver.Chrome('chromedriver', options=chrome_options)
driver.set_window_size(1320, 550)
exchange_link = "https://icodrops.com/ico-stats/"
driver.get(exchange_link)
wait = WebDriverWait(driver, 10)
SCROLL_PAUSE_TIME = 0.5
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
from selenium.webdriver.common.keys import Keys
Number = wait.until(EC.presence_of_element_located((By.XPATH,'html[1]/body[1]/div[1]/div[1]/div[1]/main[1]/div[1]/div[4]/div[2]/div[1]/div[1]/div[1]')))
lastElement = Number.find_elements(By.XPATH,'div')[-1]
lastElement.send_keys(Keys.NULL)
Number = wait.until(EC.presence_of_element_located((By.XPATH,'html[1]/body[1]/div[1]/div[1]/div[1]/main[1]/div[1]/div[4]/div[2]/div[1]/div[1]/div[1]')))
lastElement = Number.find_elements(By.XPATH,'div')[-1]
lastElement.location_once_scrolled_into_view
etc
driver.execute_script("var scrollingElement = (document.scrollingElement || document.body);scrollingElement.scrollTop = scrollingElement.scrollHeight;")
driver.execute_script("document.getElementById('mydiv').scrollIntoView();")
idk somethingelse i can do i spend a lot of time to fix it
CodePudding user response:
You should scroll each web element one by one with the help of execute_script
Code:
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://icodrops.com/ico-stats/")
j = 1
while True:
ele = wait.until(EC.visibility_of_element_located((By.XPATH, f"(//div[@id='market-ico-stat-container']/div)[{j}]")))
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
#scrape it here
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
the above code will never break and will be executing infinite, to overcome this behavior you should introduce maximum limit like this:
if j == 500:
break
However, the web application seems to detect selenium script.
CodePudding user response:
I was able to scroll this with the next code changes:
- Add extra options to make the script undetected (it was blocked as a bot before)
- Add keyboard action ARROW_UP, this does magic and content started to load after js scroll.
- Add 5 seconds timeout to load the new content
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko")
#extra options
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome('chromedriver', options=chrome_options)
driver.set_window_size(1320, 550)
exchange_link = "https://icodrops.com/ico-stats/"
driver.get(exchange_link)
SCROLL_PAUSE_TIME = 5 #5 seconds
time.sleep(SCROLL_PAUSE_TIME)
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
for x in range(0, 10):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.find_element(BY.XPATH, "//body").send_keys(Keys.ARROW_UP)
time.sleep(SCROLL_PAUSE_TIME)
new_height = driver.execute_script("return document.body.scrollHeight")
print(x)
if new_height == last_height:
break
last_height = new_height
I've tested this with Selenium 4 (java-client), Chrome 97, Windows.
I've converted my working java-code to python.
This code might be improved and optimized, but at least I hope it should work.