I ran this code trying to scrape a dynamic website using selenium. instead of running a for loop as instructed by my code and giving me more data in the other elements that share the same class name. it repeats the first element's data only.
Code
import time
from selenium import webdriver
from selenium.webdriver.chrome import service
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
ser= Service("C:\Program Files (x86)\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options,service=ser)
driver.get('https://soundcloud.com/jujubucks')
print(driver.title)
song_contents = driver.find_elements(By.CLASS_NAME, 'soundList__item')
song_list = []
for song in song_contents:
search = driver.find_element(By.CLASS_NAME, 'soundTitle__usernameText').text
search_song = driver.find_element(By.XPATH, '//span[@]').text
search_date = driver.find_element(By.CLASS_NAME, 'sc-visuallyhidden').text
search_plays = driver.find_element(By.XPATH, '//*[@id="content"]/div/div[4]/div[1]/div/div[2]/div/div[2]/ul/li[1]/div/div/div/div[2]/div[4]/div[2]/div/ul/li/span/span[2]').text
song ={
'Artist': search,
'Song_title': search_song,
'Date': search_date,
'Streams': search_plays
}
song_list.append(song)
df = pd.DataFrame(song_list)
print(df)
driver.quit()
this is the output that it gives. only one set of data instead of moving on to other sets
Output
Stream Juju Bucks music | Listen to songs, albums, playlists for free on SoundCloud
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31
1 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31
2 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31
3 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31
4 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31
CodePudding user response:
To find element within in an element use a dot in the xpath like below:
driver.get("https://soundcloud.com/jujubucks")
wait = WebDriverWait(driver,30)
# Close Cookie pop-up
wait.until(EC.element_to_be_clickable((By.ID,"onetrust-accept-btn-handler"))).click()
song_contents = driver.find_elements(By.CLASS_NAME, 'soundList__item')
for option in song_contents:
title = option.find_element_by_xpath(".//a[contains(@class,'soundTitle__title')]/span").text # Extract title from that particular song.
print(title)
Squad Too Deep Ft. Cool Prince (Outro)
Tropikana ft. P-Dogg Amazing
Party Ka Mngani Ft. X-Poll
Joy Ft. Black Sushi & Gavin Bowden
Amazing ft. X-Poll
Update:
i = 1
for _ in range(20):
song_contents = driver.find_element_by_xpath("//li[@class='soundList__item'][{}]".format(i))
driver.execute_script("arguments[0].scrollIntoView(true);",song_contents)
title = song_contents.find_element_by_xpath(".//a[contains(@class,'soundTitle__title')]/span").text # Use a dot in the xpath to find element within in an element
print(title)
i =1