Home > Software design >  Adding to list from a fo loop
Adding to list from a fo loop

Time:02-28

I got most of my code working but have a lingering question. This is not my full code below but for the interest of readability I selected only a portion. I'm scraping a list of URLs from a web page (in imgs2) and then scraping info from the list of URLs. I would like to create a second list of URLs based on the results gathered in the first. (see img_url2 below). What happens is that instead of appending every new url to the list, it just replaces the previous one with the new one. Any idea how to have all of them be added to the list?

    driver.get("https://superrare.com/market?market-options=%7B%22first%22:30,%22orderBy%22:%22RECENT_NFT_EVENT_BY_TOKEN_CONTRACT_ADDRESS_AND_TOKEN_ID__TIMESTAMP_DESC%22,%22fileTypes%22:%5B%22image/jpeg%22,%22image/png%22%5D,%22listPrice%22:false,%22isGenesis%22:false,%22isSeries%22:false,%22neverReceivedOffer%22:false,%22reservePrice%22:false,%22liveAuctions%22:false,%22upcomingAuctions%22:false,%22hasSold%22:false,%22ownedByCreator%22:false,%22openOffers%22:false,%22artistsCollected%22:false,%22artistsYouFollow%22:false,%22artistsThatFollowYou%22:false,%22artistsFollowedByFollowed%22:false,%22lowerPriceRange%22:0,%22upperPriceRange%22:100000,%22numCreatorSales%22:false,%22lowerMintedRange%22:null,%22upperMintedRange%22:null,%22startCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE3VDE0OjExOjMyKzAwOjAwIiwiMHhiOTMyYTcwYTU3NjczZDg5ZjRhY2ZmYmU4MzBlOGVkN2Y3NWZiOWUwIiwxNzYzMF1d%22,%22endCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE2VDIwOjMxOjUxKzAwOjAwIiwiMHg0MjQyMzk5YzE2Yjc4MzgxOTZlZDMzZjE3OWU5OWUzZjk5Yjg4NGYyIiwzXV0=%22,%22lastEndCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE3VDE0OjMwOjI3KzAwOjAwIiwiMHhiOTMyYTcwYTU3NjczZDg5ZjRhY2ZmYmU4MzBlOGVkN2Y3NWZiOWUwIiwyNzgxNl1d%22,%22lastStartCursor%22:false,%22hasPreviousPage%22:true,%22hasNextPage%22:true,%22reverse%22:false%7D")
imgs2 = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH, "//a[contains(@class,'Name-sc-7kf6vz-3')]")))
    time.sleep(5)   
    
    for i in range(0,30):
        img_url = []
        for number, item in enumerate(imgs2, 1):
            imgwors2 = item.get_attribute("href")
            driver3 = webdriver.Chrome()
            driver3.get(imgwors2) 
            def check_exists_by_xpath(xpath):
                try:
                    WebDriverWait(driver3,55).until(EC.presence_of_all_elements_located((By.XPATH, xpath)))
                except TimeoutException:
                    return False
                return True
            if check_exists_by_xpath("//h1[@class='collectible-detail__collectible-name']"):
                imgsrc4 = WebDriverWait(driver3,65).until(EC.presence_of_all_elements_located((By.XPATH, "//h1[contains(@class,'collectible-detail__collectible-name')]")))
                for i in imgsrc4:
                    title = i.text  
            else:
                title = "none"
            print(title)
    
            img_url2 = [] 
            imgsrc2 = WebDriverWait(driver3,55).until(EC.presence_of_all_elements_located((By.XPATH, "//p[@data-testid='artistName']/ancestor::a[contains(@class,'ChildrenLink')]")))                                                                                                                 
            for i in imgsrc2:
                biourl = i.get_attribute("href")
                img_url2.append(biourl)
            print(img_url2)
    driver.close()

CodePudding user response:

I think from your description and code, the variable img_url2 should be initialized before the for loop(s)

driver.get("https://superrare.com/market?market-options=%7B%22first%22:30,%22orderBy%22:%22RECENT_NFT_EVENT_BY_TOKEN_CONTRACT_ADDRESS_AND_TOKEN_ID__TIMESTAMP_DESC%22,%22fileTypes%22:%5B%22image/jpeg%22,%22image/png%22%5D,%22listPrice%22:false,%22isGenesis%22:false,%22isSeries%22:false,%22neverReceivedOffer%22:false,%22reservePrice%22:false,%22liveAuctions%22:false,%22upcomingAuctions%22:false,%22hasSold%22:false,%22ownedByCreator%22:false,%22openOffers%22:false,%22artistsCollected%22:false,%22artistsYouFollow%22:false,%22artistsThatFollowYou%22:false,%22artistsFollowedByFollowed%22:false,%22lowerPriceRange%22:0,%22upperPriceRange%22:100000,%22numCreatorSales%22:false,%22lowerMintedRange%22:null,%22upperMintedRange%22:null,%22startCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE3VDE0OjExOjMyKzAwOjAwIiwiMHhiOTMyYTcwYTU3NjczZDg5ZjRhY2ZmYmU4MzBlOGVkN2Y3NWZiOWUwIiwxNzYzMF1d%22,%22endCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE2VDIwOjMxOjUxKzAwOjAwIiwiMHg0MjQyMzk5YzE2Yjc4MzgxOTZlZDMzZjE3OWU5OWUzZjk5Yjg4NGYyIiwzXV0=%22,%22lastEndCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE3VDE0OjMwOjI3KzAwOjAwIiwiMHhiOTMyYTcwYTU3NjczZDg5ZjRhY2ZmYmU4MzBlOGVkN2Y3NWZiOWUwIiwyNzgxNl1d%22,%22lastStartCursor%22:false,%22hasPreviousPage%22:true,%22hasNextPage%22:true,%22reverse%22:false%7D")
imgs2 = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH, "//a[contains(@class,'Name-sc-7kf6vz-3')]")))
time.sleep(5)   

img_url2 = [] # <--- moved before the loop

for i in range(0,30):
    for number, item in enumerate(imgs2, 1):
        imgwors2 = item.get_attribute("href")
        driver3 = webdriver.Chrome()
        driver3.get(imgwors2) 
        def check_exists_by_xpath(xpath):
            try:
                WebDriverWait(driver3,55).until(EC.presence_of_all_elements_located((By.XPATH, xpath)))
            except TimeoutException:
                return False
            return True
        if check_exists_by_xpath("//h1[@class='collectible-detail__collectible-name']"):
            imgsrc4 = WebDriverWait(driver3,65).until(EC.presence_of_all_elements_located((By.XPATH, "//h1[contains(@class,'collectible-detail__collectible-name')]")))
            for i in imgsrc4:
                title = i.text  
        else:
            title = "none"
        print(title)

        imgsrc2 = WebDriverWait(driver3,55).until(EC.presence_of_all_elements_located((By.XPATH, "//p[@data-testid='artistName']/ancestor::a[contains(@class,'ChildrenLink')]")))                                                                                                                 
        for i in imgsrc2:
            biourl = i.get_attribute("href")
            img_url2.append(biourl)
            
driver.close()
print(img_url2) # <--- moved below the loop
  • Related