I am attempting to log into LinkedIn to perform webscraping but I receive a "no such element: U-CodePudding

I am attempting to log into LinkedIn to perform webscraping using the following code in Google Collab:

#Import relevant packages
from bs4 import BeautifulSoup as bs
import time
import pandas as pd
import re as re
# Install chromium, its driver, and selenium
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium
# set options to be headless, ..
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
# open it, go to a website, and get results
browser = webdriver.Chrome('chromedriver',options=options)
browser.get('https://www.linkedin.com/login')
time.sleep(3)

#METHOD 1:
#browser.find_element_by_id('[email protected]').send_keys(email)
#browser.find_element_by_id('T35t11!').send_keys(password)
#browser.find_element_by_id('T35t11!').send_keys(Keys.RETURN)

#METHOD 2:
# locate email form by_class_name
username = browser.find_element_by_class_name('login-email')
# send_keys() to simulate key strokes
username.send_keys('[email protected]')
# locate password form by_class_name
password = browser.find_element_by_class_name('login-password')
# send_keys() to simulate key strokes
password.send_keys('T35t11!')
# locate submit button by_class_name
log_in_button = browser.find_element_by_class_name('login-submit')
# locate submit button by_class_id
log_in_button = browser.find_element_by_class_id('login submit-button')
# locate submit button by_xpath
log_in_button = browser.find_element_by_xpath('//*[@type="submit"]')
# .click() to mimic button click
log_in_button.click()



# Go to webpage
page = "https://www.linkedin.com/company/cgi/"
browser.get(page   'posts/')

SCROLL_PAUSE_TIME = 1.5

# Scroll webpage
height = browser.execute_script("return document.documentElement.scrollHeight")
browser.execute_script("window.scrollTo(0, "   str(height)   ");")

company_page = browser.page_source   

linkedin_soup = bs(company_page.encode("utf-8"), "html")
linkedin_soup.prettify()

containers = linkedin_soup.findAll("div",{"class":"occludable-update ember-view"})

post_dates = []
post_texts = []

for container in containers:

    try:
        posted_date = container.find("span",{"class":"visually-hidden"})
        text_box = container.find("div",{"class":"feed-shared-update-v2__description-wrapper"})
        text = text_box.find("span",{"dir":"ltr"})
    
        post_dates.append(posted_date.text.strip())
        post_texts.append(text.text.strip())

    except:
        pass

data = {
    "Date Posted": post_dates,
    "Post Text": post_texts,
       }

df = pd.DataFrame(data)
df

However, I am receiving the following error message:

NoSuchElementException                    Traceback (most recent call last)
<ipython-input-15-985af2eb8d2f> in <module>()
     25 
     26 # locate email form by_class_name
---> 27 username = browser.find_element_by_class_name('login-email')
     28 # send_keys() to simulate key strokes
     29 username.send_keys('[email protected]')

3 frames
/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
    241                 alert_text = value['alert'].get('text')
    242             raise exception_class(message, screen, stacktrace, alert_text)  # type: ignore[call-arg]  # mypy is not smart enough here
--> 243         raise exception_class(message, screen, stacktrace)
    244 
    245     def _value_or_default(self, obj: Mapping[_KT, _VT], key: _KT, default: _VT) -> _VT:

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".login-email"}
  (Session info: headless chrome=95.0.4638.69)
Stacktrace:
#0 0x5577ca240623 <unknown>
#1 0x5577c9f47d43 <unknown>
#2 0x5577c9f7d5f0 <unknown>
#3 0x5577c9fb1337 <unknown>
#4 0x5577c9f9a5fd <unknown>
#5 0x5577c9faf0ac <unknown>
#6 0x5577c9f9a9e3 <unknown>
#7 0x5577c9f71c0c <unknown>
#8 0x5577c9f730d5 <unknown>
#9 0x5577ca264954 <unknown>
#10 0x5577ca273f6d <unknown>
#11 0x5577ca273c8b <unknown>
#12 0x5577ca2745b2 <unknown>
#13 0x5577ca2ace8b <unknown>
#14 0x5577ca274811 <unknown>
#15 0x5577ca259831 <unknown>
#16 0x5577ca27d218 <unknown>
#17 0x5577ca27d3aa <unknown>
#18 0x5577ca2973bf <unknown>
#19 0x7f2cd7d546db <unknown>

I have tried two different methods as shown above which I found from different examples of web-scraping. Would you be able to let me know what the issue might be and what I could do to resolve this please?

Thank you.

CodePudding user response：

Use the URL: https://www.linkedin.com/checkpoint/rm/sign-in-another-account for logging in on order to avoid the existing user selection page.

There is no element with the class of login-email on the login page so use username = browser.find_element_by_id('username') to locate the email field.