I want to scrape data from MCQs but they will provide me an error and also want to go to next page
and how I go on next pages
to scrape all the MCQs data is there any feasible solution kindly tell us
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def supplyvan_scraper():
with chrome_driver as driver:
driver.implicitly_wait(15)
URL = 'http://www.tulsithakur.com/bankingquiztwo.php'
driver.get(URL)
time.sleep(3)
title = driver.find_element_by_xpath("//span[@id='quest']//text()")
option_1 = driver.find_element_by_xpath("//span[@id='onee']//text()")
option_2 = driver.find_element_by_xpath("//span[@id='two']//text()")
option_3 = driver.find_element_by_xpath("//span[@id='three']//text()")
option_4 = driver.find_element_by_xpath("//span[@id='four']//text()")
print(title,option_1,option_2,option_3,option_4)
supplyvan_scraper()
CodePudding user response:
This page contains no text in MCQ questions as well as in options. And if you click on the next button only then it fetches the data but it says undefined in every field (question, answers).
you may check it like this -
driver.find_element(By.XPATH, '//*[@id="next"]').click()
title = driver.find_element(By.XPATH, "//span[@id='quest']").text
option_1 = driver.find_element(By.XPATH, "//span[@id='onee']").text
option_2 = driver.find_element(By.XPATH, "//span[@id='two']").text
option_3 = driver.find_element(By.XPATH, "//span[@id='three']").text
option_4 = driver.find_element(By.XPATH, "//span[@id='four']").text
print(title, option_1, option_2, option_3, option_4)
And if you want to scrape data from all pages by clicking the next button, you can try this -
try:
while True:
driver.find_element(By.XPATH, '//*[@id="next"]').click()
title = driver.find_element(By.XPATH, "//span[@id='quest']").text
option_1 = driver.find_element(By.XPATH, "//span[@id='onee']").text
option_2 = driver.find_element(By.XPATH, "//span[@id='two']").text
option_3 = driver.find_element(By.XPATH, "//span[@id='three']").text
option_4 = driver.find_element(By.XPATH, "//span[@id='four']").text
print(title, option_1, option_2, option_3, option_4)
except Exception as e:
print(e)
If you click the left-sidebar first (Available Quiz Sets
) then the undefined
problem will go away.
So, the ideal steps would be -
- click on the set option ( left sidebar)
- scrape the qs and while clicking the next button
Set option button -
driver.find_element(By.XPATH, '//*[@id="features-wrapper"]/div[1]/div/div[1]/section/div/ul/form[1]/div/li/input')
The value of form
will be changed for each of option. For your page there are 70 options, so you may loop through each option and scrape the data