I'm trying to webscrape this page : https://mlapshin.com/index.php/scrum-quizzes/sm-learning-mode/ I want to scrape the questions and answers However, I'm having trouble clicking on the next button to scrape all the informations. I've tried doing this:
driver = webdriver.Chrome('C:/Users/Ihnhn/Documents/WebScrap/Selenium/chromedriver.exe')
driver.get("https://mlapshin.com/index.php/scrum-quizzes/sm-learning-mode/")
driver.maximize_window()
start_quizz = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"input[name='startQuiz']"))).click()
driver.execute_script("window.scrollTo(0,400);")
all_questions = driver.find_elements_by_class_name("wpProQuiz_listItem")
for i in all_questions:
nom_question = i.find_element_by_class_name("wpProQuiz_question_text").text
print(nom_question)
check_answer = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//input[@name='check']"))).click()
next_answer = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//input[@name='next']"))).click()
So i wanted to try first, to get just the name of the questions but it gives me TimeoutException it just scrap the 2 first questions and thats all, so in the second question, it just doesnt click on the button "check" so that I can continue
(there are 87 questions : so I imagined that with this code it would get the 87 questions)
Im am a beginner in web scraping, so Im a litte lost... if anyone could help me Thanks
CodePudding user response:
You use absolute xpath "//input[@name='check']"
so it always searchs first input Check
on page but if you check HTML in browser then you see every question has own input Check
(and input Next
) - and when it displays second question then your xpath
waits for input Check
in first question - but this input is hidden and it can't be clickable
.
You should use relative xpath (with dot) ".//input[@name='check']"
and you should use i
instead of driver
when you use relative xpath.
all_questions = driver.find_elements_by_class_name("wpProQuiz_listItem")
for i in all_questions:
# relative to `i`
nom_question = i.find_element_by_class_name("wpProQuiz_question_text").text
print(nom_question)
# relative to `i`
check_answer = WebDriverWait(i, 20).until(EC.element_to_be_clickable((By.XPATH,".//input[@name='check']"))).click()
# relative to `i`
next_answer = WebDriverWait(i, 20).until(EC.element_to_be_clickable((By.XPATH,".//input[@name='next']"))).click()
Full working code which I used to test it.
from selenium import webdriver
#from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
#import time
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get("https://mlapshin.com/index.php/scrum-quizzes/sm-learning-mode/")
driver.maximize_window()
start_quizz = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"input[name='startQuiz']"))).click()
driver.execute_script("window.scrollTo(0,400);")
all_questions = driver.find_elements_by_class_name("wpProQuiz_listItem")
for item in all_questions:
# relative to `item`
nom_question = item.find_element_by_class_name("wpProQuiz_question_text").text
print(nom_question)
# relative to `item`
check_answer = WebDriverWait(item, 20).until(EC.element_to_be_clickable((By.XPATH,".//input[@name='check']"))).click()
#time.sleep(0.5)
# relative to `item`
next_answer = WebDriverWait(item, 20).until(EC.element_to_be_clickable((By.XPATH,"//input[@name='next']"))).click()
#time.sleep(0.5)
CodePudding user response:
thank you very much!
Now i want to get all the informations : question / answers / correct answers / explanations
I wrote this code when I tried to get only the first question and it worked (for the first question)
all_question = driver.find_element_by_class_name("wpProQuiz_question")
nom_question = all_question.find_element_by_class_name("wpProQuiz_question_text")
reponses = all_question.find_elements_by_class_name("wpProQuiz_questionList")
correct = all_question.find_elements_by_xpath("//ul/li[@class='wpProQuiz_questionListItem wpProQuiz_answerCorrect']")
commentaire = all_question.find_elements_by_xpath("//div[@class='wpProQuiz_incorrect']/p")
comm = all_question.find_element_by_xpath("//div[@class='wpProQuiz_incorrect']")
list_com = comm.find_elements_by_xpath(".//ul/li")
print("Question :\n" nom_question.text)
print("\nRéponses possibles :")
for rep in reponses:
print(rep.text)
print("\nRéponse(s) correcte(s):")
for c in correct:
print(c.text)
print("\nExplications :")
for com in commentaire:
if com.is_displayed():
print(com.text)
for i in list_com:
print(i.text)
so with the Full working code that you gave me it should be :
all_questions = driver.find_elements_by_class_name("wpProQuiz_listItem")
for item in all_questions:
nom_question = item.find_element_by_class_name("wpProQuiz_question_text").text
reponses = item.find_elements_by_class_name("wpProQuiz_questionList")
correct = item.find_elements_by_xpath("//ul/li[@class='wpProQuiz_questionListItem wpProQuiz_answerCorrect']")
commentaire = item.find_elements_by_xpath("//div[@class='wpProQuiz_incorrect']/p")
comm = item.find_element_by_xpath("//div[@class='wpProQuiz_incorrect']")
list_com = comm.find_elements_by_xpath(".//ul/li")
print("Question : " nom_question "\n")
print("Reponses : ")
for rep in reponses:
print(rep.text)
print("\nReponse(s) correct : ")
for c in correct:
print(c.text)
print("\nExplications :")
for com in commentaire:
if com.is_displayed():
print(com.text) #print chaque <p>
for i in list_com:
print(i.text)
check_answer = WebDriverWait(item, 20).until(EC.element_to_be_clickable((By.XPATH,".//input[@name='check']"))).click()
time.sleep(0.5)
# relative to `item`
next_answer = WebDriverWait(item, 20).until(EC.element_to_be_clickable((By.XPATH,".//input[@name='next']"))).click()
time.sleep(0.5)
but I only get Questions and Answers (but no errors, its just empty for the others) if you have any idea please? thank you