I have the following URLs, I want to download these images using a code. There is millions of URL so I want to do it using python.
1) https://image.lexica.art/md/dbbb96f1-fce2-4970-ab62-b4b4e6859fe9
2) https://image.lexica.art/md/76318f25-5736-4cda-965d-96fe34823263
3) https://image.lexica.art/md/c11dd279-757e-43ff-8305-43e106f6c345
4) https://image.lexica.art/md/f38d92bb-99bc-4611-938f-c5d6cc70d6ea
I have tried the following code but didn't work.
url = 'https://image.lexica.art/md/76318f25-5736-4cda-965d-96fe34823263'
folder_path = 'images_artistics'
file_name = url.split('/')[-1][:-4]
image_content = requests.get(url).content
image_file = io.BytesIO(image_content)
image = Image.open(image_file).convert('RGB')
file_path = os.path.join(folder_path, file_name)
f = open(file_path, 'wb')
image.save(f, "JPEG", quality=85)
print(f"SAVED - {url} - AT: {file_path}")
CodePudding user response:
rather complicated ... from wireshark trace, i see it's using HTTP2 and i'm guessing server also checks for browser-like HTTP headers (eg. "User-Agent", etc)
make sure you install httpx with http2 pip3 install 'httpx[http2]'
then try this,
import httpx
from PIL import Image
from io import BytesIO
import httpx
import asyncio
async def main():
url_list = [
'https://image.lexica.art/md/dbbb96f1-fce2-4970-ab62-b4b4e6859fe9',
#'https://image.lexica.art/md/76318f25-5736-4cda-965d-96fe34823263',
#'https://image.lexica.art/md/c11dd279-757e-43ff-8305-43e106f6c345',
#'https://image.lexica.art/md/f38d92bb-99bc-4611-938f-c5d6cc70d6ea',
]
headers = {
'Host': 'image.lexica.art',
'authority': 'image.lexica.art',
'method': 'GET',
'scheme': 'https',
'accept': 'text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image,webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'cache-control': 'max-age=0',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': 'macOS',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}
for url in url_list:
filename = url.split('/')[-1]
client = httpx.AsyncClient(http2=True)
response = await client.get(url, headers=headers)
image = Image.open(BytesIO(response.content)).convert('RGB')
image.show()
if __name__ == "__main__":
asyncio.run( main() )
good luck !
CodePudding user response:
I would use requests
for this.
import requests
url_list = [
'https://image.lexica.art/md/dbbb96f1-fce2-4970-ab62-b4b4e6859fe9',
'https://image.lexica.art/md/76318f25-5736-4cda-965d-96fe34823263',
'https://image.lexica.art/md/c11dd279-757e-43ff-8305-43e106f6c345',
'https://image.lexica.art/md/f38d92bb-99bc-4611-938f-c5d6cc70d6ea',
]
for url in url_list:
filename = url.split('/')[-1]
response = requests.get(url)
open(filename, 'wb').write(response.content)