不积跬步无以至千里,不积小流无以成江海

 Administrator     2022-10-26      小工具      2068

python根据链接批量下载文件的脚本

需要安装一下xlrd,requests,tqdm的依赖包。根据网上的情况,写了个异常会跳过,继续处理。

def get_video_url(excel_path):
    import xlrd
    url_list = []
    wb = xlrd.open_workbook(excel_path)  # xlrd版本的原因可能这里会报错,修改版本可以解决,或者使用其他方法获取url链接
    sh = wb.sheet_by_index(0)
    for i in range(sh.nrows):
        url_list.append(sh.row_values(i)[4])  # url链接在excel的第二列
    return url_list
def get_video_name(excel_path):
    import xlrd
    url_name = []
    wb = xlrd.open_workbook(excel_path)  # xlrd版本的原因可能这里会报错,修改版本可以解决,或者使用其他方法获取url链接
    sh = wb.sheet_by_index(0)
    for i in range(sh.nrows):  
        url_name.append(sh.row_values(i)[0]) # url链接在excel的第二列
    return url_name

def download_video(url_list, url_name, save_folder):
    import requests
    import sys
    from tqdm import tqdm
    for index , url in enumerate(tqdm(url_list)):
        print(index)
        file = save_folder + str(index+1) +"_" + url_name[index]+"_" + url.split('/')[-1]  # 保存本地的路径
        r = requests.get(url)  #根据文件的大小,这一步为主要耗时步骤
        try:
            with open(file, "wb") as code:
                code.write(r.content)
        except:
            print(sys.exc_info()[0])
            pass
        continue
    print('download over')

if __name__ == '__main__':
    excel_path = './2017 NHANES.xlsx'
    url_list = get_video_url(excel_path)
    url_name = get_video_name(excel_path)
    save_folder = './data3/'
    download_video(url_list, url_name, save_folder)