Home > Back-end >  Reddit isn't scraping the top comments (python/selenium)
Reddit isn't scraping the top comments (python/selenium)

Time:08-03

Put the entire code into a question, thank you to all that have replied but this issue is super annoying either way help is appreciated!

Context: This code is meant to go onto the top reddit post of the day/week, then screenshot it and once that's done it goes to the comments and screenshots the top comments of said post, the former works but the latter does not.

import time,utils,string
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from utils import config

def scrape(post_url):
bot = utils.create_bot(headless=True)
data = {}

try:
    # Load cookies to prevent cookie overlay & other issues
    bot.get('https://www.reddit.com')
    for cookie in config['reddit_cookies'].split('; '):
        cookie_data = cookie.split('=')
        bot.add_cookie({'name':cookie_data[0],'value':cookie_data[1],'domain':'reddit.com'})
    bot.get(post_url)

    # Fetching the post itself, text & screenshot
    post = WebDriverWait(bot, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.Post')))
    post_text = post.find_element(By.CSS_SELECTOR, 'h1').text
    data['post'] = post_text
    post.screenshot('output/post.png')

    # Let comments load
    bot.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)
    
    # Fetching comments & top level comment determinator
    comments = WebDriverWait(bot, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div[id^=t1_][tabindex]')))
    allowed_style = comments[0].get_attribute("style")
    
    # Filter for top only comments
    NUMBER_OF_COMMENTS = 10
    comments = [comment for comment in comments if comment.get_attribute("style") == allowed_style][:NUMBER_OF_COMMENTS]

    print('           
  • Related