I got most of my code working but have a lingering question. This is not my full code below but for the interest of readability I selected only a portion. I'm scraping a list of URLs from a web page (in imgs2) and then scraping info from the list of URLs. I would like to create a second list of URLs based on the results gathered in the first. (see img_url2 below). What happens is that instead of appending every new url to the list, it just replaces the previous one with the new one. Any idea how to have all of them be added to the list?
driver.get("https://superrare.com/market?market-options=%7B%22first%22:30,%22orderBy%22:%22RECENT_NFT_EVENT_BY_TOKEN_CONTRACT_ADDRESS_AND_TOKEN_ID__TIMESTAMP_DESC%22,%22fileTypes%22:%5B%22image/jpeg%22,%22image/png%22%5D,%22listPrice%22:false,%22isGenesis%22:false,%22isSeries%22:false,%22neverReceivedOffer%22:false,%22reservePrice%22:false,%22liveAuctions%22:false,%22upcomingAuctions%22:false,%22hasSold%22:false,%22ownedByCreator%22:false,%22openOffers%22:false,%22artistsCollected%22:false,%22artistsYouFollow%22:false,%22artistsThatFollowYou%22:false,%22artistsFollowedByFollowed%22:false,%22lowerPriceRange%22:0,%22upperPriceRange%22:100000,%22numCreatorSales%22:false,%22lowerMintedRange%22:null,%22upperMintedRange%22:null,%22startCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE3VDE0OjExOjMyKzAwOjAwIiwiMHhiOTMyYTcwYTU3NjczZDg5ZjRhY2ZmYmU4MzBlOGVkN2Y3NWZiOWUwIiwxNzYzMF1d%22,%22endCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE2VDIwOjMxOjUxKzAwOjAwIiwiMHg0MjQyMzk5YzE2Yjc4MzgxOTZlZDMzZjE3OWU5OWUzZjk5Yjg4NGYyIiwzXV0=%22,%22lastEndCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE3VDE0OjMwOjI3KzAwOjAwIiwiMHhiOTMyYTcwYTU3NjczZDg5ZjRhY2ZmYmU4MzBlOGVkN2Y3NWZiOWUwIiwyNzgxNl1d%22,%22lastStartCursor%22:false,%22hasPreviousPage%22:true,%22hasNextPage%22:true,%22reverse%22:false%7D")
imgs2 = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH, "//a[contains(@class,'Name-sc-7kf6vz-3')]")))
time.sleep(5)
for i in range(0,30):
img_url = []
for number, item in enumerate(imgs2, 1):
imgwors2 = item.get_attribute("href")
driver3 = webdriver.Chrome()
driver3.get(imgwors2)
def check_exists_by_xpath(xpath):
try:
WebDriverWait(driver3,55).until(EC.presence_of_all_elements_located((By.XPATH, xpath)))
except TimeoutException:
return False
return True
if check_exists_by_xpath("//h1[@class='collectible-detail__collectible-name']"):
imgsrc4 = WebDriverWait(driver3,65).until(EC.presence_of_all_elements_located((By.XPATH, "//h1[contains(@class,'collectible-detail__collectible-name')]")))
for i in imgsrc4:
title = i.text
else:
title = "none"
print(title)
img_url2 = []
imgsrc2 = WebDriverWait(driver3,55).until(EC.presence_of_all_elements_located((By.XPATH, "//p[@data-testid='artistName']/ancestor::a[contains(@class,'ChildrenLink')]")))
for i in imgsrc2:
biourl = i.get_attribute("href")
img_url2.append(biourl)
print(img_url2)
driver.close()
CodePudding user response:
I think from your description and code, the variable img_url2
should be initialized before the for
loop(s)
driver.get("https://superrare.com/market?market-options=%7B%22first%22:30,%22orderBy%22:%22RECENT_NFT_EVENT_BY_TOKEN_CONTRACT_ADDRESS_AND_TOKEN_ID__TIMESTAMP_DESC%22,%22fileTypes%22:%5B%22image/jpeg%22,%22image/png%22%5D,%22listPrice%22:false,%22isGenesis%22:false,%22isSeries%22:false,%22neverReceivedOffer%22:false,%22reservePrice%22:false,%22liveAuctions%22:false,%22upcomingAuctions%22:false,%22hasSold%22:false,%22ownedByCreator%22:false,%22openOffers%22:false,%22artistsCollected%22:false,%22artistsYouFollow%22:false,%22artistsThatFollowYou%22:false,%22artistsFollowedByFollowed%22:false,%22lowerPriceRange%22:0,%22upperPriceRange%22:100000,%22numCreatorSales%22:false,%22lowerMintedRange%22:null,%22upperMintedRange%22:null,%22startCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE3VDE0OjExOjMyKzAwOjAwIiwiMHhiOTMyYTcwYTU3NjczZDg5ZjRhY2ZmYmU4MzBlOGVkN2Y3NWZiOWUwIiwxNzYzMF1d%22,%22endCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE2VDIwOjMxOjUxKzAwOjAwIiwiMHg0MjQyMzk5YzE2Yjc4MzgxOTZlZDMzZjE3OWU5OWUzZjk5Yjg4NGYyIiwzXV0=%22,%22lastEndCursor%22:%22WyJyZWNlbnRfbmZ0X2V2ZW50X2J5X3Rva2VuX2NvbnRyYWN0X2FkZHJlc3NfYW5kX3Rva2VuX2lkX190aW1lc3RhbXBfZGVzYyIsWyIyMDIyLTAyLTE3VDE0OjMwOjI3KzAwOjAwIiwiMHhiOTMyYTcwYTU3NjczZDg5ZjRhY2ZmYmU4MzBlOGVkN2Y3NWZiOWUwIiwyNzgxNl1d%22,%22lastStartCursor%22:false,%22hasPreviousPage%22:true,%22hasNextPage%22:true,%22reverse%22:false%7D")
imgs2 = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH, "//a[contains(@class,'Name-sc-7kf6vz-3')]")))
time.sleep(5)
img_url2 = [] # <--- moved before the loop
for i in range(0,30):
for number, item in enumerate(imgs2, 1):
imgwors2 = item.get_attribute("href")
driver3 = webdriver.Chrome()
driver3.get(imgwors2)
def check_exists_by_xpath(xpath):
try:
WebDriverWait(driver3,55).until(EC.presence_of_all_elements_located((By.XPATH, xpath)))
except TimeoutException:
return False
return True
if check_exists_by_xpath("//h1[@class='collectible-detail__collectible-name']"):
imgsrc4 = WebDriverWait(driver3,65).until(EC.presence_of_all_elements_located((By.XPATH, "//h1[contains(@class,'collectible-detail__collectible-name')]")))
for i in imgsrc4:
title = i.text
else:
title = "none"
print(title)
imgsrc2 = WebDriverWait(driver3,55).until(EC.presence_of_all_elements_located((By.XPATH, "//p[@data-testid='artistName']/ancestor::a[contains(@class,'ChildrenLink')]")))
for i in imgsrc2:
biourl = i.get_attribute("href")
img_url2.append(biourl)
driver.close()
print(img_url2) # <--- moved below the loop