From 4c1011d1512f5f8312f4ce1df87e9867648d2c7f Mon Sep 17 00:00:00 2001 From: newrain001 Date: Thu, 11 Nov 2021 12:46:53 +0000 Subject: [PATCH] add doutu.py. --- doutu.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 doutu.py diff --git a/doutu.py b/doutu.py new file mode 100644 index 0000000..bb755e2 --- /dev/null +++ b/doutu.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +#_*_ coding: utf-8 _*_ +''' +*********************************************** +authOr: newrain * +blog: https://blog.csdn.net/NewRain_wang * + https://newrain001.gitee.io * +github: https://github.com/newrain001 * +gitee : https://gitee.com/newrain001 * +email : newrain_wang@163.com * +*********************************************** +''' +import requests +from lxml import etree +import time + +def getUrl(): + header = { + 'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36', + 'Referer': 'www.doutula.com', + } + html = requests.get('https://www.doutula.com/photo/list', headers=header) + if html.status_code == 200: + e = etree.HTML(html.text) + url = e.xpath('//ul/li/div/div/a/img/@data-original') + for i in url: + name = i.split("/")[-1] + result = requests.get(i) + with open(f'image/{name}', 'wb') as f: + f.write(result.content) + print(name,'已完成') + time.sleep(1) + + + +getUrl() \ No newline at end of file