I got this function right here from this question (ctrl f for "There is an answer with requests and tqdm"):
import requests
from tqdm import tqdm
def download(url: str, fname: str):
resp = requests.get(url, stream=True)
total = int(resp.headers.get('content-length', 0))
with open(fname, 'wb') as file, tqdm(
desc=fname,
total=total,
unit='b',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
Basically it downloads a file and writes it to a file, and I wanted it to return a variable that rapresents the downloaded file, so I made this:
def download(url: str, fname: str):
import requests
from tqdm import tqdm
import os
resp = requests.get(url, stream=True)
total = int(resp.headers.get('content-length', 0))
with open(fname, 'wb') as file, tqdm(
desc=fname,
total=total,
unit='b',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
with open(fname, "rb") as f:
returned = f.read()
os.remove(fname)
return returned
Now it saves the file, reads it and saves it to a variable, deletes the file and returns the variable. Is there a way I can save it directly to a variable?
CodePudding user response:
Well, you could just return a tqdm iterator, and do whatever you like with the chunks:
import requests
import tqdm
import io
def download(url: str):
resp = requests.get(url, stream=True)
total = int(resp.headers.get('content-length', 0))
with tqdm.tqdm(
desc=url,
total=total,
unit='b',
unit_scale=True,
unit_divisor=1024,
) as bar:
for chunk in resp.iter_content(chunk_size=65536):
bar.update(len(chunk))
yield chunk
bio = io.BytesIO()
for chunk in download('http://...'):
# Do something with the chunk; this just stores it in memory.
bio.write(chunk)
content = bio.getvalue() # Get the contents of the BytesIO() as a bytes.
Of course you can then refactor this to
import requests
import tqdm
import io
def download_as_bytes_with_progress(url: str) -> bytes:
resp = requests.get(url, stream=True)
total = int(resp.headers.get('content-length', 0))
bio = io.BytesIO()
with tqdm.tqdm(
desc=url,
total=total,
unit='b',
unit_scale=True,
unit_divisor=1024,
) as bar:
for chunk in resp.iter_content(chunk_size=65536):
bar.update(len(chunk))
bio.write(chunk)
return bio.getvalue()