The goal is to get all the news articles from this page by clicking the load more button programmatically: https://money.tmx.com/en/quote/AMK/news
Here's what I have tried so far:
from bs4 import BeautifulSoup
import urllib3
from selenium import webdriver
import re
import time
import random
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
chrome_options = webdriver.ChromeOptions()
#chrome_options.add_argument("--headless")
chrome_driver = webdriver.Chrome(r"\chromedriver.exe", chrome_options=chrome_options)
chrome_driver.get(url)
while True:
try:
WebDriverWait(chrome_driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'LOAD MORE')]"))).click()
except Exception as e:
print(i)
break
bs = BeautifulSoup(chrome_driver.page_source, features="lxml")
chrome_driver.close()
But this always loads only 10 items and breaks, as it can't locate the load more button after a click apparently. Any suggestion will be highly appreciated. Thanks
CodePudding user response:
The button text isn't actually all caps. If you view source on that page the text is 'Load more', not 'LOAD MORE'. It's shown all caps because of a css style.
I changed the xpath from //button[contains(., 'LOAD MORE')]
to //button[contains(., 'Load more')]
and the button is found for me.
CodePudding user response:
The Load more
button is initially out of the visible screen. You have to scroll the page in order to access it.
Also, there are adds banners there you need to close in order to work on that page.
The following code works. It closes the banners and clicks "Load more" 1 time. In case you want to click that button more times this code can be simply changed to do that.
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://money.tmx.com/en/quote/AMK/news"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.ub-emb-close"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".ssrt-close-anchor-button.ssrt-close-anchor-button-desktop"))).click()
load_more = driver.find_element(By.XPATH, "//button[contains(.,'Load more')]")
while not load_more.is_displayed():
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)
time.sleep(0.3)
load_more.click()
CodePudding user response:
The below code example is loading the load more button
effectively.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
from bs4 import BeautifulSoup
import pandas as pd
s=Service('./chromedriver')
driver= webdriver.Chrome(service=s)
url='https://money.tmx.com/en/quote/AMK/news'
driver.get(url)
driver.maximize_window()
time.sleep(5)
data = []
for x in range(3):
try:
soup = BeautifulSoup(driver.page_source, 'lxml')
cards = soup.select('ol.sc-ddCvFA.jCEzJs li')
print(len(cards))
for x in cards:
h3 = x.h3.get_text(strip=True)
data.append(h3)
loadMoreButton = driver.find_element(By.XPATH, "//button[contains(text(),'Load more')]")
if loadMoreButton:
driver.execute_script("arguments[0].click();" ,loadMoreButton)
#loadMoreButton.click()
time.sleep(3)
except Exception as e:
print(e)
break
print(set(data))
# df = pd.DataFrame(set(data))
# print(df)
Output:
10
20
30
{"American Creek's JV Partner Tudor Gold Increases Exploration Efforts with Eight Drills at Their Flagship Treaty Creek Property in BC's Golden Triangle", 'American Creek Resources Grants Incentive Options', "American Creek's JV Partner Tudor Gold Intersects 1.91 g/t Gold Eq Over 85.4 Meters in Step-Out Hole and Significantly Extends Gold-Copper
Mineralization to the Northeast at the Goldstorm Deposit in BC's Golden Triangle", 'American Creek Announces Extension of 2021 Drill Hole GS-21-113-W2 and Reports a Final Composite of 1.12 g/t AuEq over 1,497.5 Meters at the Goldstorm Deposit, Treaty Creek Project JV, Golden Triangle, British Columbia', 'American Creek Reports Consistent Results
from Continued Step-Out Drilling at Treaty Creek Joint Venture Including 2.00 g/t AuEq over 66.0 m with 5.0 m of 8.22 g/t AuEq (GS-22-145-W1) and 4.38 g/t AuEq over 57 m (GS-22-154)', 'American Creek Resources Announces Passing of Founder and Board Chairman of JV Partner Tudor Gold', 'American Creek: JV Partner Tudor Gold Initiates Diamond Drilling on the Goldstorm Deposit at Treaty Creek in the Golden Triangle, Northwestern B.C.', "237.3 M of 1.51 g/t AuEQ within 600 M of 0.91 g/t AuEQ from the Goldstorm Deposit on American Creek's JV Treaty Creek", "American Creek's JV Partner Tudor Gold Announces Filing of NI 43-101 Technical Report for the Initial Mineral Resource Estimate at Treaty
Creek Project, Golden Triangle, British Columbia", 'American Creek Announces Results from Annual General and Special Meeting of Shareholders', 'American Creek Resources Adds OTCQB Listing to Capitalize on Investor Interest in the United States', 'American Creek Resources Announces DTC Eligibility', 'Treaty Creek Drilling Expands Goldstorm System
to Northeast with 1.76 G/t Gold Eq over 196.5 M Within 564 m of 1.09 G/t Gold Eq, and Cuts Near-Surface Gold Mineralization at the Eureka Zone with 0.76 G/t Gold Eq over 217.5 m', "American Creek's JV Partner Tudor Gold Prepares for 2022 Drill Hole Program at Treaty Creek Gold Project in BC's Golden Triangle", "American Creek's JV Partner Tudor Gold Intersects Near-Surface Intercept of 1.09 G/T Gold Eq Over 476.1M and 1.22 G/T Gold Eq Over 355.15M including 1.45 G/T Gold Eq Over 247.5M at Goldstorm Deposit in BC'S Golden Triangle", "American Creek's JV Partner Tudor Gold Intersects 972.0 Meters of 1.265 g/t Gold EQ Including 405.0 Meters of 1.439 g/t Gold EQ and 456.0 Meters of 1.352 g/t Gold EQ in Step-Out Drill Hole GS-21-113 at the Goldstorm Deposit, Treaty Creek, B.C.'s Golden Triangle", 'American Creek Congratulates JV Partner Tudor Gold on the Appointment of Two Accomplished Mining Executives to Their Advisory Board', 'American Creek Announces 70.96 g/t AuEq over 1.0 Meter Within 39.15 g/t AuEq over 2.0 Meters with a 225 Meter Northeast Step-Out Hole at the Goldstorm Deposit, Treaty Creek Project JV, Golden Triangle, British Columbia',
'JV Partner Tudor Gold Confirms 4th Significant Gold-Silver Discovery at Treaty Creek with Drill Hole CBS-21-02 Intercepting 1.30 g/t Gold Eq over 53.9m Within 155.5m of 0.82 g/t Gold Eq at "Calm Before the Storm Zone" (CBS)', "American Creek Announces Expansion of Drilling on the Goldstorm Deposit with Four Drill Rigs at Treaty Creek in BC's Golden Triangle", "American Creek's JV Partner Tudor Gold Intersects 130.5 Meters of 2.389 g/t Gold EQ Within 474.0 Meters of 1.039 g/t Gold EQ in Drill Hole GS-21-110 and 168.0 Meters of 1.391 g/t Gold EQ Within 801.0 Meters of 0.704
g/t Gold EQ in Drill Hole GS-21-103 at the Goldstorm Deposit on Treaty Creek, in B.C.'s Golden Triangle.", 'American Creek Reports 9.55 g/t AuEq Over 10.5 Meters Within 102.0 Meters of 2.64 g/t AuEq Within 517.5 Meters Of 1.10 g/t AuEq From The Goldstorm Deposit, At The Joint Venture Treaty Creek Property, Golden Triangle, British Columbia', "American Creek's JV Partner Tudor Gold Intersects 0.97 G/T Gold EQ Over 1,320M, Including 1.38 G/T Gold Eq Over 556.5 M, Ending In Strong Mineralization With 2.34 G/T Gold Eq Over 57M in Step Out hole At Goldstorm Deposit In BC'S Golden Triangle", 'American Creek Reports That JV Partner Tudor Gold Has Bought Out and Fully Extinguished an Underlying
NSR Interest in the Treaty Creek JV Project', 'American Creek Completes Shares for Services Agreement with AGORACOM', 'American Creek Intersects Strong Gold-Copper Porphyry Mineralization Returning 1.82 g/t AuEQ over 114.0 M Within
592.5 M of 1.16 g/t AuEQ in Northernmost Step-Out Section of the Goldstorm Deposit, at Joint Ventured Treaty Creek', 'American Creek Reports Preliminary Results from a 350m Step-Out Hole, with 59.53 G/T AuEq over 1.5m and In-Fill Hole with 2.02 G/T AuEq over 180m Including 3.18 G/T AuEq over 93m at the Joint Venture Treaty Creek Property, Golden
Triangle, British Columbia', 'American Creek Announces 20.86 G/T AuEQ over 4.5M Within 25.5M of 9.96 G/T AuEQ Within 663M of 0.97 G/T AuEQ in a 500M Step-Out Hole at the Goldstorm Deposit, Treaty Creek Project JV, Golden Triangle, British Columbia', 'American Creek Resources Announces Incentive Options Repricing', "American Creek's JV Partner Tudor Gold Commences 2022 Exploration Drill Hole Program at Their Flagship Treaty Creek Property in BC's Golden Triangle"}