diff --git a/download/文件下载.py b/download/文件下载.py new file mode 100644 index 0000000..38444b2 --- /dev/null +++ b/download/文件下载.py @@ -0,0 +1,27 @@ +import os +from urllib.request import urlopen +from tqdm import tqdm +import requests +import sys +from threading import Thread,Lock +lock = Lock() +def download(url,filename=None): + file_size = int(requests.head(url).headers['Content-Length']) + if not filename: + filename = url.split('/')[-1] + if os.path.exists(filename): + first_byte = os.path.getsize(filename) + print('断点续传中。。。') + else: + first_byte = 0 + header = {'Range': 'bytes=%s-%s' % (first_byte, file_size)} + pbar = tqdm(total=file_size,initial=first_byte,unit='B',unit_scale=True,desc=url.split('/')[-1],mininterval=0.5) + result = requests.get(url,headers = header,stream=True) + with open(filename,'ab') as f: + for chunk in result.iter_content(chunk_size=1024): + f.write(chunk) + pbar.update(1024) + pbar.close() + return file_size +if __name__ == '__main__': + download('http://huo.hongjiaozuida.com/20200606/5381_8ab40c11/少年间谍第一季-08.mp4') \ No newline at end of file diff --git a/doutu.py b/spider/doutu.py similarity index 96% rename from doutu.py rename to spider/doutu.py index a59a485..513dbb9 100644 --- a/doutu.py +++ b/spider/doutu.py @@ -1,39 +1,39 @@ -#!/usr/bin/env python3 -#_*_ coding: utf-8 _*_ -''' -*********************************************** -authOr: newrain * -blog: https://blog.csdn.net/NewRain_wang * - https://newrain001.gitee.io * -github: https://github.com/newrain001 * -gitee : https://gitee.com/newrain001 * -email : newrain_wang@163.com * -*********************************************** -''' -import requests -from lxml import etree -import time -import os - -def getUrl(): - header = { - 'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36', - 'Referer': 'www.doutula.com', - } - html = requests.get('https://www.doutula.com/photo/list', headers=header) - if html.status_code == 200: - e = etree.HTML(html.text) - url = e.xpath('//ul/li/div/div/a/img/@data-original') - for i in url: - name = i.split("/")[-1] - result = requests.get(i) - if not os.path.exists('./image'): - os.mkdir('image') - with open(f'image/{name}', 'wb') as f: - f.write(result.content) - print(name,'已完成') - time.sleep(1) - - - +#!/usr/bin/env python3 +#_*_ coding: utf-8 _*_ +''' +*********************************************** +authOr: newrain * +blog: https://blog.csdn.net/NewRain_wang * + https://newrain001.gitee.io * +github: https://github.com/newrain001 * +gitee : https://gitee.com/newrain001 * +email : newrain_wang@163.com * +*********************************************** +''' +import requests +from lxml import etree +import time +import os + +def getUrl(): + header = { + 'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36', + 'Referer': 'www.doutula.com', + } + html = requests.get('https://www.doutula.com/photo/list', headers=header) + if html.status_code == 200: + e = etree.HTML(html.text) + url = e.xpath('//ul/li/div/div/a/img/@data-original') + for i in url: + name = i.split("/")[-1] + result = requests.get(i) + if not os.path.exists('./image'): + os.mkdir('image') + with open(f'image/{name}', 'wb') as f: + f.write(result.content) + print(name,'已完成') + time.sleep(1) + + + getUrl() \ No newline at end of file