需求
使用python下载大文件,文件大小可能超过了内存大小。那么我们可以分块下载,并且显示进度条
实现
import os
import requests
from tqdm import tqdm
def get_url_file_content_length_without_download(url: str) -> int:
"""
不下载获取url上面文件的文件大小
"""
try:
resp = requests.head(url, allow_redirects=True, headers=headers)
content_length = int(resp.headers.get("content-length"))
except:
content_length = -1
return content_length
def download_file(url, local_path=None, headers=None, chunk_size=8192, force=False):
"""
分块下载大文件, 带进度条
:param url: 下载的url
:param local_path: 本地存储路径
:param headers: 可以设置headers
:param chunk_size: 分块的大小
:param force:是否覆盖本地已存在的文件
:return:
"""
content_length = get_url_file_content_length_without_download(url)
if local_path is None:
local_path = url.split("/")[-1]
if not force and os.path.exists(local_path):
return local_path
t = tqdm(total=content_length, desc=f"download {url}")
# NOTE the stream=True parameter below
with requests.get(url, stream=True, headers=headers) as r:
r.raise_for_status()
with open(local_path, "wb") as f:
for chunk in r.iter_content(chunk_size=chunk_size):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
# if chunk:
f.write(chunk)
t.update(chunk_size)
t.close()
return local_path