Home > Enterprise >  I can't download the product image
I can't download the product image

Time:09-29

I need to know how I can download the second product image, as it is inside a TAG in quotes.

An alternative would also be to just copy the image link to save in a DF.

Link Element

import pandas as pd
import xlsxwriter
import pyautogui
import urllib.request
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.get("https://shopee.com.br/Tapete-Sala-1-00-X-1-50-Peludo-Shaggy-Macio-Quarto-Felpudo-        
i.346235717.7779211526")
driver.maximize_window()

df = pd.read_excel (r"C:\__Imagens e Planilhas Python\Shopee\Videos\Videos.xlsx")


for index, row in df.iterrows():
links = driver.get(str(row["links"]))
sleep(5)
video = driver.find_element(By. CLASS_NAME, "_1OPdfl")
sleep(5)
atributoSrc = video.get_attribute("src")
print(atributoSrc)
try:
    urllib.request.urlretrieve(atributoSrc,r"C:\__Imagens e Planilhas Python\Shopee\Videos     
Baixados\nome"    str(row["salvar"])   ".mp4")

    
except:
    print("error")

CodePudding user response:

This should download all the product images:

#import requests  

imgDivs = driver.find_elements(By.CSS_SELECTOR, '._1OPdfl > ._2PWsS4')
for i in range(len(imgDivs)):
    d = imgDivs[i].get_attribute('style')
    imgUrl = d.split('url("')[1].split('")')[0]
    with open(f"img_{i}.jpeg", "wb") as f:
        f.write(requests.get(imgUrl).content)
        f.close()
    print(imgUrl)

The enlarged image gets downloaded twice. You can change the find_elements arguments to be pickier about the images.

CodePudding user response:

hello thanks for the help.

It pulls the link several times, as shown in the image below:

https://prnt.sc/Nprveo15VcBn

Look what I found: https://prnt.sc/1KdQtA0Lo0B4

import pandas as pd
import requests
import xlsxwriter
import pyautogui
import urllib.request
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])

driver = webdriver.Chrome(options=options)
driver.get("https://shopee.com.br/Tapete-Sala-1-00-X-1-50-Peludo-Shaggy-Macio-        
Quarto-Felpudo-i.346235717.7779211526")
driver.maximize_window()

df = pd.read_excel (r"C:\__Imagens e Planilhas 
Python\Shopee\Videos\Videos.xlsx")
for index, row in df.iterrows():
links = driver.get(str(row["links"]))
sleep(5)

imgDivs = driver.find_elements(By.CSS_SELECTOR, '._1OPdfl > ._2PWsS4')
for i in range(len(imgDivs)):
    d = imgDivs[i].get_attribute('style')
    imgUrl = d.split('url("')[1].split('")')[0]
    with open(f"img_{i}.jpeg", "wb") as f:
        f.write(requests.get(imgUrl).content)
        f.close()
    print(imgUrl)
  • Related