Python selenium just screenshots the first element multiple times throughout the loop-CodePudding

I'm trying to take a screenshot of each comment in a reddit post using selenium python. All comments have the same id/class and that's what I have used to select them.

Here's my code;

import requests
from bs4 import BeautifulSoup
import pyttsx3, pyautogui

from PIL import Image
from io import BytesIO

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome(executable_path='C:\Selenium_Drivers\chromedriver.exe')

url = 'https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'

driver.get(url)
driver.implicitly_wait(5)

total_height = int(driver.execute_script("return document.body.scrollHeight"))

u = 1
for i in range(1, total_height*2, 50):
    driver.execute_script(f"window.scrollTo(0, {i})")
 
    comment = driver.find_element(By.CSS_SELECTOR, 'div#t1_ikllxsq._3sf33-9rVAO_v4y0pIW_CH')
    comment.screenshot(f'E:\WEB SCRAPING PROJECTS\PROJECTS\Reddit Scraping\shot{u}.png')
    u  = 1

Well my code scrolls down the page and saves screenshots in my desired path. But the problem is that all the screenshots are of the first element(comment) in the reddit post.

I want my code to save a screenshot of each comment separately. Need help

CodePudding user response：

To get the screenshots of each comments, you need to identify the comment elements and then scroll to each comments and then take the screen shot.

This approach works for me.

url='https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
driver.get(url)
#disabled coockie button
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[contains(.,'Reject non-essential')]"))).click()
#Get all the comments
comments = driver.find_elements(By.CSS_SELECTOR, "[data-testid='comment_author_link']")
print(len(comments))

for i in range(len(comments)):
    #Scroll to each comment
    comments[i].location_once_scrolled_into_view
    time.sleep(2)# slowdown the scripts to take the screenshot
    driver.save_screenshot(f'E:\WEB SCRAPING PROJECTS\PROJECTS\Reddit Scraping\shot{i 1}.png')

Note: you have all the libraries, you need import time library only.

CodePudding user response：

Here you have an exmample including the scroll till the end of the page:

# Needed libs
from selenium.webdriver import ActionChains, Keys
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver

# Initialize drivver and navigate
driver = webdriver.Chrome()
driver.maximize_window()
url = 'https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
wait = WebDriverWait(driver, 5)
driver.get(url)

# Wait for reject cookies button and push on it
reject_cookies_button = wait.until(EC.presence_of_element_located((By.XPATH, f"(//section[@class='_2BNSty-Ld4uppTeWGfEe8r']//button)[2]")))
reject_cookies_button.click()

# Make scroll till the end of the page
while True:
    high_before_scroll = driver.execute_script('return document.body.scrollHeight')
    driver.execute_script('window.scrollTo(100, document.body.scrollHeight);')
    time.sleep(2)
    if driver.execute_script('return document.body.scrollHeight') == high_before_scroll:
        break

# We take how many comments we have
comments = wait.until(EC.presence_of_all_elements_located((By.XPATH, f"//div[contains(@class, 'Comment')]")))

# We take an screenshot for every comment and we save it
u = 1
for comment in comments:
    driver.execute_script("arguments[0].scrollIntoView();", comment)
    comment.screenshot(f'./shot{u}.png')
    u  = 1

I hope the comments in the code help you to understand what is happening

My code is done for linux, but just initialize the driver with your linux chromedriver