python分块下载大文件

需求

使用python下载大文件,文件大小可能超过了内存大小。那么我们可以分块下载,并且显示进度条

实现

import os
import requests
from tqdm import tqdm


def get_url_file_content_length_without_download(url: str) -> int:
    """
    不下载获取url上面文件的文件大小
    """
    try:
        resp = requests.head(url, allow_redirects=True, headers=headers)
        content_length = int(resp.headers.get("content-length"))
    except:
        content_length = -1
    return content_length


def download_file(url, local_path=None, headers=None, chunk_size=8192, force=False):
    """
    分块下载大文件, 带进度条
    :param url: 下载的url
    :param local_path: 本地存储路径
    :param headers: 可以设置headers
    :param chunk_size: 分块的大小
    :param force:是否覆盖本地已存在的文件
    :return:
    """

    content_length = get_url_file_content_length_without_download(url)
    if local_path is None:
        local_path = url.split("/")[-1]
    if not force and os.path.exists(local_path):
        return local_path
    t = tqdm(total=content_length, desc=f"download {url}")
    # NOTE the stream=True parameter below
    with requests.get(url, stream=True, headers=headers) as r:
        r.raise_for_status()
        with open(local_path, "wb") as f:
            for chunk in r.iter_content(chunk_size=chunk_size):
                # If you have chunk encoded response uncomment if
                # and set chunk_size parameter to None.
                # if chunk:
                f.write(chunk)
                t.update(chunk_size)
    t.close()
    return local_path