Home > Software design >  I am very new to scraping please bear with me and this is my 1st project. I am trying to scrape a si
I am very new to scraping please bear with me and this is my 1st project. I am trying to scrape a si

Time:10-12

"problem lines"
                                        for_tariff_loop = driver.find_elements_by_xpath("//span[@class='phx-radio__element']")
                                        radio_label_list = for_tariff_loop[i].find_element_by_css_selector('span[class="phx-radio__label"]')
                                        print(radio_label_list)
                                        time.sleep(1)

website I'm scraping https://www.telekom.de/unterwegs/apple/apple-iphone-13-pro/graphit-512gb

label image I was not able to print the radio buttons label according to checked button. I don't know what is the mistake and where I did it. could anyone help on this. It will be helpful for me to learn. Change tariff links given below links,

import xlwt
from selenium import webdriver
import re
import time
from datetime import date
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

class telekommobiles:
    def __init__(self):
        self.url="https://www.telekom.de/mobilfunk/geraete/smartphone?page=1&pageFilter=promotion"
        self.country='DE'
        self.currency='GBP'
        self.VAT='Included'
        self.shipping = 'free shipping within 3-4 weeks'
        self.Pre_PromotionPrice ='N/A'
        self.color ='N/A'
    def telekom(self):
        #try:
            driver=webdriver.Chrome()
            driver.maximize_window()          
            driver.get(self.url)
            today = date.today()
            time.sleep(5)
            cookies = driver.find_element_by_css_selector('button.cl-btn.cl-btn--accept-all').click()
            print("cookies accepted")            
            links_prod_check = []
            prod_models = []
            prod_manufacturer =[]
            prod_memorys = []
            product_colors =[]
            product_price_monthly_payments = []
            product_price_one_time_payments =[]
            product_links = []
            containers = driver.find_elements_by_css_selector('div[class="styles_item__12Aw4"]')
            i = 1            
            for container in containers:
                p_links =container.find_element_by_tag_name('a').get_attribute('href')
                i = i   1
                product_links.append(p_links)
                #print(p_links)
            for links in product_links:
                driver.get(links)
                #time.sleep(5)
                
                #print(driver.current_url)
                #links_prod_check.append(driver.current_url)

                coloroptions = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH,"//li[@data-qa='list_ColorVariant']")))
                #print(coloroptions)
                for i in range(len(coloroptions)):
                    coloroption = driver.find_elements_by_xpath("//li[@data-qa='list_ColorVariant']")
                    coloroption[i].click()
                    #print(coloroption[i])
                    time.sleep(3)

                    memoryoptions = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH,"//span[@class='phx-radio__element']")))
                    for i in range(len(memoryoptions)):
                        memoryoption = driver.find_elements_by_xpath("//span[@class='phx-radio__element']")
                        try:
                            memoryoption[i].click()
                        except:
                            pass

                        time.sleep(5)
                        change_traiff = driver.find_element_by_css_selector('button[class="phx-link phx-list-of-links__link js-mod tracking-added"]').click()
                        time.sleep(3)
                        #looping for each section 
                        section_loops = driver.find_elements_by_css_selector('section[class="tariff-catalog--layer"]')   
                        #print(len(section_loops))
                        for section_loop in section_loops:
                            #print(section_loop)
                            time.sleep(5)
                            #Headings
                            heading_1 = section_loop.find_element_by_css_selector('h2[class="page-title page-title--lowercase"]').text
                            print(heading_1)                          
                            # looping for each separate boxes
                            each_box_subcontainers = section_loop.find_elements_by_css_selector('.phx-tariff-box__section')
                            #print(len(each_box_subcontainers))
                            for subcontainer in each_box_subcontainers:
                                #print(subcontainer)
                                looping_for_tariff = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH,"//span[@class='phx-radio__element']")))
                                #print(looping_for_tariff)
                                for i in range(len(looping_for_tariff)):
                                    #print(i)
                                    try:
                                        for_tariff_loop = driver.find_elements_by_xpath("//span[@class='phx-radio__element']")                                        
                                        for_tariff_loop[i].click()
                                        time.sleep(3)
                                    except:
                                        pass
                                        
                                    for_tariff_loop = driver.find_elements_by_xpath("//span[@class='phx-radio__element']")
                                    radio_label_list = for_tariff_loop[i].find_element_by_css_selector('span[class="phx-radio__label"]')
                                    print(radio_label_list)
                                    time.sleep(1)




                        change_traiff_close_button = driver.find_element_by_css_selector('span[class="icon-after-yellow-close right close popup-close-tr js-popup-close"]').click()

        
telekom_de=telekommobiles()
telekom_de.telekom()

CodePudding user response:

You are trying to find element within an element. Finding radio_label_list using for_tariff_loop[i], xpath for radio_label_list will become like below:

//span[@class='phx-radio__element']//span[@class="phx-radio__label"]

Which does not exist in the DOM.

I tried the last part of the code. And was able to print the Memory size like below. Do try and confirm:

Replaced css-selector for radio_label_list with this xpath ./following-sibling::span

looping_for_tariff = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//span[@class='phx-radio__element']")))
# print(looping_for_tariff)
for i in range(len(looping_for_tariff)):
    # print(i)
    try:
        for_tariff_loop = driver.find_elements_by_xpath("//span[@class='phx-radio__element']")
        for_tariff_loop[i].click()
        time.sleep(3)
    except:
        pass

    for_tariff_loop = driver.find_elements_by_xpath("//span[@class='phx-radio__element']")
    radio_label_list = for_tariff_loop[i].find_element_by_xpath("./following-sibling::span").text
    print(radio_label_list)
    time.sleep(1)
128 GB
256 GB
512 GB
1 TB
  • Related