Home > Blockchain >  Iterate Opening a List of URL's with Selenium
Iterate Opening a List of URL's with Selenium

Time:05-04

I am having trouble iterating the action of opening a list containing URLs using selenium. the issue is in the part labeled #Second Part in my code. linklinkfin is a list of length 9 at the moment, but this length can change as more URLs are collected over time. when the code runs, it appears to open the very first URL over and over, and it does not appear to run the append action in the nested while loop since at the end when i print textreal_listing it is empty. As the code runs i see https://www.nj.gov/dobi/division_insurance/bfd/enforcement2014.htm opening/refreshing continually till the program ends. At the end of each while loop 1 should get added to browsercount and then the code repeats with the new URL but this doesn't appear to be occurring, any ideas?

my code:

#FIRST PART
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
textreal_listing=[]
browser = webdriver.Chrome(r'\\homedirpva1a01\USERSNC$\603225\chromedriver\chromedriver.exe')
time.sleep(5)
browser.get("https://www.nj.gov/dobi/division_insurance/bfd/enforcement.htm")
time.sleep(5)

linkslist=browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[6]/td[1]/table/tbody/tr[2]/td/ul/li/font/a")
linkslist2=browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[6]/td[1]/table/tbody/tr[2]/td/ul/li/font/font/a")
linklinkfin=linkslist linkslist2

#SECOND PART
textcount=1
textpage=6
browsercount=2014
for i in linklinkfin:
    browser.get("https://www.nj.gov/dobi/division_insurance/bfd/enforcement{}.htm".format(browsercount))
    time.sleep(2)
    if "404 Error" in browser.page_source:
        browser.get("https://www.nj.gov/dobi/division_insurance/bfd/enforcement{}.html".format(browsercount))
        time.sleep(2)
        while len(textreal_listing)<100:
            texttreesing=browser.find_element_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount))
            textreal_listing.append(texttreesing.text)
            textcount =1
            if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                textpage =3
                textcount=2
                if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                    break
                browsercount =1
        else:
            while len(textreal_listing)<100:
                texttreesing=browser.find_element_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount))
                textreal_listing.append(texttreesing.text)
                textcount =1
                if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                    textpage =3
                    textcount=2
                    if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                        break
                browsercount =1

print(textreal_listing)

CodePudding user response:

this worked

#FIRST PART
import pandas as pd
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
textreal_listing=[]
browser = webdriver.Chrome(r'\\homedirpva1a01\USERSNC$\603225\chromedriver\chromedriver.exe')
time.sleep(5)
browser.get("https://www.nj.gov/dobi/division_insurance/bfd/enforcement.htm")
time.sleep(5)

linkslist=browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[6]/td[1]/table/tbody/tr[2]/td/ul/li/font/a")
linkslist2=browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[6]/td[1]/table/tbody/tr[2]/td/ul/li/font/font/a")

linktext=[]
for my_href in linkslist:
    linktext.append(my_href.get_attribute("href"))

for my_hrefs in linkslist2:
    linktext.append(my_hrefs.get_attribute("href"))

#SECOND Part
textcount=1
textpage=6
browsercount=2014
for i in linktext:
    browser.get(i)
    time.sleep(5)
    while len(textreal_listing)<100:
        texttreesing=browser.find_element_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount))
        textreal_listing.append(texttreesing.text)
        textcount =1
        if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
            textpage =3
            textcount=2
            if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                break
    
print(textreal_listing)
  • Related