scrap image with request header on beautifulsoup-CodePudding

I have code for scrap image:

import requests, base64
from bs4 import BeautifulSoup


baseurl = "https://www.google.com/search?q=cat&sxsrf=APq-WBuyx07rsOeGlVQpTsxLt262WbhlfA:1650636332756&source=lnms&tbm=shop&sa=X&ved=2ahUKEwjQr5HC66f3AhXxxzgGHejKC9sQ_AUoAXoECAIQAw&biw=1920&bih=937&dpr=1"
headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0"}

r_images = requests.get(url=baseurl, headers=headers)


soup_for_image = BeautifulSoup(r_images.text, 'html.parser') 
#find product images
productimages = [] 
product_images = soup_for_image.findAll('img')
for item in product_images:
    # print(item['src'])
    if "data:image/svg xml" not in item['src']:
        productimages.append(item.get('src'))
print(productimages)

It will be fine if there is no header but, if I use request header, the result will be base64 image. So is there any way that I can scrap the image with the request headers?

CodePudding user response：

You can add cookie CONSENT and it works.
Maybe some selectors can change in the future.

import requests, base64
from bs4 import BeautifulSoup

baseurl = "https://www.google.com/search?q=cat&sxsrf=APq-WBuyx07rsOeGlVQpTsxLt262WbhlfA:1650636332756&source=lnms&tbm=shop&sa=X&ved=2ahUKEwjQr5HC66f3AhXxxzgGHejKC9sQ_AUoAXoECAIQAw&biw=1920&bih=937&dpr=1"
headers = {"cookie": "CONSENT=YES cb.20230531-04-p0.en FX 908"}
result = requests.get(url=baseurl, headers=headers)
soup = BeautifulSoup(result.text, 'html.parser')
allProducts = soup.findAll(class_="u30d4")
number = 0
for product in allProducts:
    name = product.find(class_="rgHvZc")
    if name is not None:
        number  = 1
        print("Product number %d:" % number)
        print("Name : "   name.text)
        productLink = product.find('a')
        print("Link: "   productLink["href"][7:])
        img = product.find('img')
        print("Image: "   img["src"])
        price = product.find(class_="HRLxBb")
        print("Price "   price.text)

I hope i was able to help you.