master
Your Name 3 years ago
parent 0cb71ac1e4
commit 538719120f
  1. 27
      download/文件下载.py
  2. 76
      spider/doutu.py

@ -0,0 +1,27 @@
import os
from urllib.request import urlopen
from tqdm import tqdm
import requests
import sys
from threading import Thread,Lock
lock = Lock()
def download(url,filename=None):
file_size = int(requests.head(url).headers['Content-Length'])
if not filename:
filename = url.split('/')[-1]
if os.path.exists(filename):
first_byte = os.path.getsize(filename)
print('断点续传中。。。')
else:
first_byte = 0
header = {'Range': 'bytes=%s-%s' % (first_byte, file_size)}
pbar = tqdm(total=file_size,initial=first_byte,unit='B',unit_scale=True,desc=url.split('/')[-1],mininterval=0.5)
result = requests.get(url,headers = header,stream=True)
with open(filename,'ab') as f:
for chunk in result.iter_content(chunk_size=1024):
f.write(chunk)
pbar.update(1024)
pbar.close()
return file_size
if __name__ == '__main__':
download('http://huo.hongjiaozuida.com/20200606/5381_8ab40c11/少年间谍第一季-08.mp4')

@ -1,39 +1,39 @@
#!/usr/bin/env python3
#_*_ coding: utf-8 _*_
'''
***********************************************
authOr: newrain *
blog: https://blog.csdn.net/NewRain_wang *
https://newrain001.gitee.io *
github: https://github.com/newrain001 *
gitee : https://gitee.com/newrain001 *
email : newrain_wang@163.com *
***********************************************
'''
import requests
from lxml import etree
import time
import os
def getUrl():
header = {
'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36',
'Referer': 'www.doutula.com',
}
html = requests.get('https://www.doutula.com/photo/list', headers=header)
if html.status_code == 200:
e = etree.HTML(html.text)
url = e.xpath('//ul/li/div/div/a/img/@data-original')
for i in url:
name = i.split("/")[-1]
result = requests.get(i)
if not os.path.exists('./image'):
os.mkdir('image')
with open(f'image/{name}', 'wb') as f:
f.write(result.content)
print(name,'已完成')
time.sleep(1)
#!/usr/bin/env python3
#_*_ coding: utf-8 _*_
'''
***********************************************
authOr: newrain *
blog: https://blog.csdn.net/NewRain_wang *
https://newrain001.gitee.io *
github: https://github.com/newrain001 *
gitee : https://gitee.com/newrain001 *
email : newrain_wang@163.com *
***********************************************
'''
import requests
from lxml import etree
import time
import os
def getUrl():
header = {
'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36',
'Referer': 'www.doutula.com',
}
html = requests.get('https://www.doutula.com/photo/list', headers=header)
if html.status_code == 200:
e = etree.HTML(html.text)
url = e.xpath('//ul/li/div/div/a/img/@data-original')
for i in url:
name = i.split("/")[-1]
result = requests.get(i)
if not os.path.exists('./image'):
os.mkdir('image')
with open(f'image/{name}', 'wb') as f:
f.write(result.content)
print(name,'已完成')
time.sleep(1)
getUrl()
Loading…
Cancel
Save