I'm trying to take a screenshot of each comment in a reddit post using selenium python. All comments have the same id/class and that's what I have used to select them.
Here's my code;
import requests
from bs4 import BeautifulSoup
import pyttsx3, pyautogui
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome(executable_path='C:\Selenium_Drivers\chromedriver.exe')
url = 'https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
driver.get(url)
driver.implicitly_wait(5)
total_height = int(driver.execute_script("return document.body.scrollHeight"))
u = 1
for i in range(1, total_height*2, 50):
driver.execute_script(f"window.scrollTo(0, {i})")
comment = driver.find_element(By.CSS_SELECTOR, 'div#t1_ikllxsq._3sf33-9rVAO_v4y0pIW_CH')
comment.screenshot(f'E:\WEB SCRAPING PROJECTS\PROJECTS\Reddit Scraping\shot{u}.png')
u = 1
Well my code scrolls down the page and saves screenshots in my desired path. But the problem is that all the screenshots are of the first element(comment) in the reddit post.
I want my code to save a screenshot of each comment separately. Need help
CodePudding user response:
To get the screenshots of each comments, you need to identify the comment elements and then scroll to each comments and then take the screen shot.
This approach works for me.
url='https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
driver.get(url)
#disabled coockie button
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[contains(.,'Reject non-essential')]"))).click()
#Get all the comments
comments = driver.find_elements(By.CSS_SELECTOR, "[data-testid='comment_author_link']")
print(len(comments))
for i in range(len(comments)):
#Scroll to each comment
comments[i].location_once_scrolled_into_view
time.sleep(2)# slowdown the scripts to take the screenshot
driver.save_screenshot(f'E:\WEB SCRAPING PROJECTS\PROJECTS\Reddit Scraping\shot{i 1}.png')
Note: you have all the libraries, you need import time
library only.
CodePudding user response:
Here you have an exmample including the scroll till the end of the page:
# Needed libs
from selenium.webdriver import ActionChains, Keys
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
# Initialize drivver and navigate
driver = webdriver.Chrome()
driver.maximize_window()
url = 'https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
wait = WebDriverWait(driver, 5)
driver.get(url)
# Wait for reject cookies button and push on it
reject_cookies_button = wait.until(EC.presence_of_element_located((By.XPATH, f"(//section[@class='_2BNSty-Ld4uppTeWGfEe8r']//button)[2]")))
reject_cookies_button.click()
# Make scroll till the end of the page
while True:
high_before_scroll = driver.execute_script('return document.body.scrollHeight')
driver.execute_script('window.scrollTo(100, document.body.scrollHeight);')
time.sleep(2)
if driver.execute_script('return document.body.scrollHeight') == high_before_scroll:
break
# We take how many comments we have
comments = wait.until(EC.presence_of_all_elements_located((By.XPATH, f"//div[contains(@class, 'Comment')]")))
# We take an screenshot for every comment and we save it
u = 1
for comment in comments:
driver.execute_script("arguments[0].scrollIntoView();", comment)
comment.screenshot(f'./shot{u}.png')
u = 1
I hope the comments in the code help you to understand what is happening
My code is done for linux, but just initialize the driver with your linux chromedriver