How to download the image if we have given url?-CodePudding

I have the following URLs, I want to download these images using a code. There is millions of URL so I want to do it using python.

1) https://image.lexica.art/md/dbbb96f1-fce2-4970-ab62-b4b4e6859fe9
2) https://image.lexica.art/md/76318f25-5736-4cda-965d-96fe34823263
3) https://image.lexica.art/md/c11dd279-757e-43ff-8305-43e106f6c345
4) https://image.lexica.art/md/f38d92bb-99bc-4611-938f-c5d6cc70d6ea

I have tried the following code but didn't work.

url = 'https://image.lexica.art/md/76318f25-5736-4cda-965d-96fe34823263'
folder_path = 'images_artistics'
file_name = url.split('/')[-1][:-4]

image_content = requests.get(url).content
image_file = io.BytesIO(image_content)
image = Image.open(image_file).convert('RGB')
file_path = os.path.join(folder_path, file_name)
f = open(file_path, 'wb')
image.save(f, "JPEG", quality=85)
print(f"SAVED - {url} - AT: {file_path}")

Error that I am getting;

CodePudding user response：

rather complicated ... from wireshark trace, i see it's using HTTP2 and i'm guessing server also checks for browser-like HTTP headers (eg. "User-Agent", etc)

make sure you install httpx with http2 pip3 install 'httpx[http2]'

then try this,

import httpx
from PIL import Image
from io import BytesIO
import httpx
import asyncio

async def main():
    url_list = [
        'https://image.lexica.art/md/dbbb96f1-fce2-4970-ab62-b4b4e6859fe9',
        #'https://image.lexica.art/md/76318f25-5736-4cda-965d-96fe34823263',
        #'https://image.lexica.art/md/c11dd279-757e-43ff-8305-43e106f6c345',
        #'https://image.lexica.art/md/f38d92bb-99bc-4611-938f-c5d6cc70d6ea',
    ]

    headers = {
        'Host': 'image.lexica.art',
        'authority': 'image.lexica.art',
        'method': 'GET',
        'scheme': 'https',
        'accept': 'text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image,webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en;q=0.9',
        'cache-control': 'max-age=0',
        'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': 'macOS',
        'sec-fetch-dest': 'document',
        'sec-fetch-mode': 'navigate',
        'sec-fetch-site': 'none',
        'sec-fetch-user': '?1',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
    }

    for url in url_list:
        filename = url.split('/')[-1]
        client = httpx.AsyncClient(http2=True)
        response = await client.get(url, headers=headers)
        image = Image.open(BytesIO(response.content)).convert('RGB')
        image.show()

if __name__ == "__main__":
    asyncio.run( main() )

good luck !

CodePudding user response：

I would use requests for this.

import requests

url_list = [
    'https://image.lexica.art/md/dbbb96f1-fce2-4970-ab62-b4b4e6859fe9',
    'https://image.lexica.art/md/76318f25-5736-4cda-965d-96fe34823263',
    'https://image.lexica.art/md/c11dd279-757e-43ff-8305-43e106f6c345',
    'https://image.lexica.art/md/f38d92bb-99bc-4611-938f-c5d6cc70d6ea',
]

for url in url_list:
    filename = url.split('/')[-1]
    response = requests.get(url)
    open(filename, 'wb').write(response.content)