From 9dd9260a21cdfab7c66c13c9f369657ae29fed70 Mon Sep 17 00:00:00 2001
From: newrain001 <newrain_wang@163.com>
Date: Wed, 4 Aug 2021 18:05:09 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.en.md                                |  36 ------
 api/.keep                                   |   0
 dingding.py => api/dingding.py              |   0
 sendmail.py => api/sendmail.py              |   0
 wechat.py => api/wechat.py                  |   0
 wechat3.py => api/wechat3.py                | 128 ++++++++++----------
 database/mysql.py                           |  53 ++++++++
 library/data.json                           |  20 +++
 library/library.py                          | 125 +++++++++++++++++++
 op/Transp.py                                |  40 ++++++
 checkfile.py => op/checkfile.py             |   0
 op/find.py                                  |  50 ++++++++
 spider/image.py                             |  56 +++++++++
 spider/img.py                               |  54 +++++++++
 spider/tktest.py                            |  26 ++++
 spider/小说.py                            |  16 +++
 spider/小说片段.py                      |  30 +++++
 文件下载.py => spider/文件下载.py   |  60 ++++-----
 文件下载2.py => spider/文件下载2.py | 102 ++++++++--------
 spider/校花网.py                         |  19 +++
 20 files changed, 634 insertions(+), 181 deletions(-)
 delete mode 100644 README.en.md
 delete mode 100644 api/.keep
 rename dingding.py => api/dingding.py (100%)
 rename sendmail.py => api/sendmail.py (100%)
 rename wechat.py => api/wechat.py (100%)
 rename wechat3.py => api/wechat3.py (95%)
 create mode 100644 database/mysql.py
 create mode 100644 library/data.json
 create mode 100644 library/library.py
 create mode 100644 op/Transp.py
 rename checkfile.py => op/checkfile.py (100%)
 create mode 100644 op/find.py
 create mode 100644 spider/image.py
 create mode 100644 spider/img.py
 create mode 100644 spider/tktest.py
 create mode 100644 spider/小说.py
 create mode 100644 spider/小说片段.py
 rename 文件下载.py => spider/文件下载.py (95%)
 rename 文件下载2.py => spider/文件下载2.py (97%)
 create mode 100644 spider/校花网.py

diff --git a/README.en.md b/README.en.md
deleted file mode 100644
index 53f01ec..0000000
--- a/README.en.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# python-project
-
-#### Description
-{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**}
-
-#### Software Architecture
-Software architecture description
-
-#### Installation
-
-1.  xxxx
-2.  xxxx
-3.  xxxx
-
-#### Instructions
-
-1.  xxxx
-2.  xxxx
-3.  xxxx
-
-#### Contribution
-
-1.  Fork the repository
-2.  Create Feat_xxx branch
-3.  Commit your code
-4.  Create Pull Request
-
-
-#### Gitee Feature
-
-1.  You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md
-2.  Gitee blog [blog.gitee.com](https://blog.gitee.com)
-3.  Explore open source project [https://gitee.com/explore](https://gitee.com/explore)
-4.  The most valuable open source project [GVP](https://gitee.com/gvp)
-5.  The manual of Gitee [https://gitee.com/help](https://gitee.com/help)
-6.  The most popular members  [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
diff --git a/api/.keep b/api/.keep
deleted file mode 100644
index e69de29..0000000
diff --git a/dingding.py b/api/dingding.py
similarity index 100%
rename from dingding.py
rename to api/dingding.py
diff --git a/sendmail.py b/api/sendmail.py
similarity index 100%
rename from sendmail.py
rename to api/sendmail.py
diff --git a/wechat.py b/api/wechat.py
similarity index 100%
rename from wechat.py
rename to api/wechat.py
diff --git a/wechat3.py b/api/wechat3.py
similarity index 95%
rename from wechat3.py
rename to api/wechat3.py
index dced3e0..f0ceca8 100644
--- a/wechat3.py
+++ b/api/wechat3.py
@@ -1,65 +1,65 @@
-#!/usr/bin/python3
-#-*-coding: utf-8-*-
-# by QianFeng.newrain
-#
-'''
-    Welcome QianFeng cloud computing
-'''
-
-import json
-import sys
-import time
-import requests
-
-# 此为企业的ID号
-CorpID = ''
-
-# 应用的ID
-Agentid = 1000004
-
-# 认证信息，企业ID+认证信息可获取tokent，获取之后向此tokent发送内容
-Secret = ''
-
-localtime = time.strftime("[%H:%M:%S]", time.localtime())
-class Tencent(object):
-    def __init__(self,user,title):
-        import subprocess
-        a=subprocess.getoutput("free -h |awk 'NR==2{print $4}'")
-        b=subprocess.getoutput("df -Th |awk 'NR==2{print $5}'")
-        c= subprocess.getoutput("uptime |awk -F ':' '{print $NF}'")
-        msg =a+b+c
-        # 格式化输出内容：标题+内容
-        self.MSG = f'{title}\n{msg}\n{localtime}'
-        self.User = user
-        self.url = 'https://qyapi.weixin.qq.com'
-        self.send_msg = json.dumps({
-            "touser": self.User,
-            "msgtype": 'text',
-            "agentid": Agentid,
-            "text": {'content': self.MSG},
-            "safe": 0
-        })
-    # 获取tokent
-    def get_token(self):
-        token_url = '%s/cgi-bin/gettoken?corpid=%s&corpsecret=%s' % (self.url, CorpID, Secret)
-        r = requests.get(token_url)
-        r = r.json()
-        token = r['access_token']
-        return token
-
-    # 发送信息
-    def send_message(self):
-        send_url = '%s/cgi-bin/message/send?access_token=%s' % (self.url,self.get_token())
-        respone = requests.post(url=send_url, data=self.send_msg)
-        respone = respone.json()
-        x = respone['errcode']
-        if x == 0:
-            print ('Succesfully')
-        else:
-            print ('Failed')
-
-if __name__ == '__main__':
-    # 创建对象
-    send_obj = Tencent('xxxxx','我就是我，不一样的烟火')
-    # 调用发送函数
+#!/usr/bin/python3
+#-*-coding: utf-8-*-
+# by QianFeng.newrain
+#
+'''
+    Welcome QianFeng cloud computing
+'''
+
+import json
+import sys
+import time
+import requests
+
+# 此为企业的ID号
+CorpID = ''
+
+# 应用的ID
+Agentid = 1000004
+
+# 认证信息，企业ID+认证信息可获取tokent，获取之后向此tokent发送内容
+Secret = ''
+
+localtime = time.strftime("[%H:%M:%S]", time.localtime())
+class Tencent(object):
+    def __init__(self,user,title):
+        import subprocess
+        a=subprocess.getoutput("free -h |awk 'NR==2{print $4}'")
+        b=subprocess.getoutput("df -Th |awk 'NR==2{print $5}'")
+        c= subprocess.getoutput("uptime |awk -F ':' '{print $NF}'")
+        msg =a+b+c
+        # 格式化输出内容：标题+内容
+        self.MSG = f'{title}\n{msg}\n{localtime}'
+        self.User = user
+        self.url = 'https://qyapi.weixin.qq.com'
+        self.send_msg = json.dumps({
+            "touser": self.User,
+            "msgtype": 'text',
+            "agentid": Agentid,
+            "text": {'content': self.MSG},
+            "safe": 0
+        })
+    # 获取tokent
+    def get_token(self):
+        token_url = '%s/cgi-bin/gettoken?corpid=%s&corpsecret=%s' % (self.url, CorpID, Secret)
+        r = requests.get(token_url)
+        r = r.json()
+        token = r['access_token']
+        return token
+
+    # 发送信息
+    def send_message(self):
+        send_url = '%s/cgi-bin/message/send?access_token=%s' % (self.url,self.get_token())
+        respone = requests.post(url=send_url, data=self.send_msg)
+        respone = respone.json()
+        x = respone['errcode']
+        if x == 0:
+            print ('Succesfully')
+        else:
+            print ('Failed')
+
+if __name__ == '__main__':
+    # 创建对象
+    send_obj = Tencent('xxxxx','我就是我，不一样的烟火')
+    # 调用发送函数
     send_obj.send_message()
\ No newline at end of file
diff --git a/database/mysql.py b/database/mysql.py
new file mode 100644
index 0000000..0108f1f
--- /dev/null
+++ b/database/mysql.py
@@ -0,0 +1,53 @@
+import pymysql
+
+class MySQLDB():
+    def __init__(self,*args,
+                 host='localhost',
+                 user='root',
+                 password=None,
+                 port=3306,
+                 db='mysql',
+                 charset='utf8',
+                 ):
+        self.conn = pymysql.connect(user=user,host=host,port=port,password=password,db=db,charset=charset,cursorclass=pymysql.cursors.DictCursor)
+        self.cursor = self.conn.cursor()
+    def DML(self,sql,condition,mode):
+        try:
+            if mode == 'insert':
+                if len(condition) > 1:
+                    result = self.cursor.executemany(sql,condition)
+                else:
+                    result = self.cursor.execute(sql, condition)
+                print(f'插入完成，受影响{result}行')
+            if mode == 'update':
+                result = self.cursor.execute(sql)
+                print(f'更新完成，受影响{result}行')
+            if mode == 'delete':
+                result = self.cursor.execute(sql)
+                print(f'删除完成，受影响{result}行')
+            self.conn.commit()
+        except pymysql.MySQLError as e:
+            self.conn.rollback()
+            print(e)
+        finally:
+            self.conn.close()
+    def DQL(self,sql,size=None):
+        try:
+            result = self.cursor.execute(sql)
+            if size:
+                result2 = self.cursor.fetchmany(size)
+            else:
+                result2 = self.cursor.fetchall()
+            for i in result2:
+                for v in i.values():
+                    print(v,end='\t')
+                print()
+            print(f'共查找{result}条记录')
+        except pymysql.MySQLError as e:
+            print(e)
+        finally:
+            self.conn.close()
+    def __str__(self):
+        return '详细操作手册查看 https://www.baidu.com'
+a = MySQLDB(host='39.103.141.138',user='eval',password='123456',db='db1')
+a.DQL('select id,name from user')
\ No newline at end of file
diff --git a/library/data.json b/library/data.json
new file mode 100644
index 0000000..6323d53
--- /dev/null
+++ b/library/data.json
@@ -0,0 +1,20 @@
+{
+	"user": {
+		"haha": "6ca13d52ca70c883e0f0bb101e425a89e8624de51db2d2392593af6a84118090"
+	},
+	"book": {
+		"\u6bdb\u6cfd\u4e1c\u8bd7\u96c6": "2021-03-19 18:56:05.986340",
+		"\u5c0f\u4e8c\u90ce\u653e\u725b\u8bb0": "2021-03-19 18:56:05.986340",
+		"\u53d8\u5f62\u91d1\u521a3": "2021-03-22 16:53:38",
+		"\u53d8\u5f62\u91d1\u521a2": "2021-03-22 16:53:38",
+		"\u53d8\u5f62\u91d1\u521a1": "2021-03-22 17:30:17",
+		"\u767d\u96ea\u516c\u4e3b": "2021-03-22 17:34:36",
+		"\u683c\u6797\u7ae5\u8bdd": "2021-03-22 17:34:36",
+		"\u4eba\u6c11\u65e5\u62a5": "2021-03-22 17:34:36",
+		"\u8d70\u8fd1\u79d1\u5b66": "2021-03-22 17:34:36",
+		"\u5c0f\u732b\u9493\u9c7c": "2021-03-22 17:50:53"
+	},
+	"admin": {
+		"admin001": "654321"
+	}
+}
\ No newline at end of file
diff --git a/library/library.py b/library/library.py
new file mode 100644
index 0000000..02e5c2c
--- /dev/null
+++ b/library/library.py
@@ -0,0 +1,125 @@
+import json
+import time
+import hashlib
+
+
+class Library():
+    def __init__(self):
+        self.loginStats = 0
+        self.loginUser = None
+        self.password = None
+        self.now = time.strftime('%F %X')
+
+    def hashPass(self,password):
+        t = hashlib.sha256(password.encode('utf-8'))
+        return t.hexdigest()
+
+    def write_json(self, name, dict):
+        with open(name, 'w') as f:
+            json.dump(dict, f, indent='\t', ensure_ascii=False)
+
+    def read_json(self, name):
+        with open(name, 'r') as f:
+            data = json.load(f)
+        return data
+
+    def regUser(self, u):
+        self.write_json('./data.json', u)
+        self.write_json(f'./{self.loginUser}', {'regTime': self.now,'book':{}})
+
+    def login(self):
+        data = self.read_json('data.json')
+        if self.loginUser in data['user']:
+            if data['user'][self.loginUser] == self.hashPass(self.password):
+                return True
+
+    def borrowBook(self, bookName):
+        data = self.read_json('data.json')
+        my_data = self.read_json(self.loginUser)
+        if bookName in data['book']:
+            del data['book'][bookName]
+            my_data['book'][bookName] = self.now
+            self.write_json('data.json', data)
+            self.write_json(self.loginUser, my_data)
+            return True
+
+    def backBook(self, bookName):
+        my_data = self.read_json(self.loginUser)
+        data = self.read_json('data.json')
+        if bookName in my_data['book']:
+            del my_data['book'][bookName]
+            data['book'][bookName] = self.now
+            self.write_json('data.json', data)
+            self.write_json(self.loginUser, my_data)
+            return True
+
+    def pushBook(self, books):
+        if books:
+            s = {}.fromkeys(books, self.now)
+            data = self.read_json('data.json')
+            data['book'].update(s)
+            self.write_json('data.json', data)
+
+    def main(self):
+        try:
+            while True:
+                num = input('''(1) 注册\n(2) 登录\n(3) 借书\n(4) 还书\n(5) 上架\n(6) 注销\n(0) 退出\n请输入编号：''')
+                if num == '2':
+                    if self.loginStats==1: print('用户已登录');continue
+                    self.loginUser, self.password = input('请输入账号：'), input('请输入密码：')
+                    s = self.login()
+                    if not s:
+                        print('密码错误')
+                    else:
+                        print('登录成功')
+                        self.loginStats = 1
+                elif num == '1':
+                    if self.loginStats==1: print('用户已登录');continue
+                    data = self.read_json('data.json')
+                    self.loginUser, self.password, password = input('请输入账号：'), input('请输入密码：'), input('请在此输入密码：')
+                    if self.loginUser in data['user']: print('用户已存在')
+                    else: data['user'][self.loginUser] = self.hashPass(self.password);self.regUser(data);print('创建成功');self.loginStats = 1
+                elif num == '3':
+                    if self.loginStats==0: print('用户未登录');continue
+                    print('书籍列表：')
+                    for k,v in self.read_json('data.json')['book'].items():
+                        print(f'名称:{k}',f'时间:{v}',sep='\t\t')
+                    bn = input('请输入书籍名称:')
+                    if self.borrowBook(bn): print('借书成功')
+                    else: print('输入错误，请检查')
+                elif num == '4':
+                    if self.loginStats==0: print('用户未登录');continue
+                    if not self.read_json(self.loginUser)['book']:
+                        print('没有借阅任何书籍')
+                        continue
+                    print('我的书架：')
+                    for k,v in self.read_json(self.loginUser)['book'].items():
+                        print(f'名称:{k}',f'时间:{v}',sep='\t\t')
+                    bn = input('请输入书籍名称:')
+                    if self.backBook(bn): print('还书成功')
+                    else: print('输入错误，请检查')
+                elif num == '5':
+                    if self.loginUser!='admin001': print('管理员用户请自行初始化');continue
+                    data = self.read_json('data.json')
+                    blist = input('请输入书籍名称(多本书，请使用“,”隔开 ):').split(',')
+                    data['book'].update({}.fromkeys(blist,self.now))
+                    self.write_json('data.json',data)
+                    print('上架成功')
+                elif num == '6':
+                    if self.loginStats==0: print('用户未登录');continue
+                    print(f'{self.loginUser}已注销');self.loginUser = None
+                elif num == '0':break
+                else: print('输入错误，请重新输入')
+        except StopIteration as e:
+            print(e)
+        finally:
+            print('欢迎下次光临')
+
+
+if __name__ == '__main__':
+    t = Library()
+    t.main()
+    # 管理员用户上架图书
+    # t = Library()
+    # t.loginUser = 'admin001'
+    # t.main()
\ No newline at end of file
diff --git a/op/Transp.py b/op/Transp.py
new file mode 100644
index 0000000..e94a033
--- /dev/null
+++ b/op/Transp.py
@@ -0,0 +1,40 @@
+import paramiko
+class sshd:
+	def __init__(self,hostname,
+			passwd,
+			username='root',
+			port=22):
+		self.hostname = hostname
+		self.passwd = passwd
+		self.username=username
+		self.port=port
+		self.obj=paramiko.Transport((self.hostname,self.port))
+		self.obj.connect(username=self.username,password=self.passwd)
+		self.ssh = paramiko.SSHClient()
+		self.ssh._transport = self.obj
+		self.sftp=paramiko.SFTPClient.from_transport(self.obj)
+	def op_ssh(self,cmd):
+		stdin,stdout,stderr = self.ssh.exec_command(cmd)
+		stdout = str(stdout.read().decode())
+		stderr = str(stderr.read().decode())
+		if stdout:
+			return stdout
+		else:
+			return stderr
+	def op_ftp_push(self,froms,tos):
+		self.sftp.put(froms,tos)
+		return True
+	def op_ftp_pull(self,froms,tos):
+		self.sftp.get(froms,tos)
+		return True
+	def close(self):
+		self.sftp.close()
+		self.obj.close()
+	def __str__(self):
+		return 'QianFeng cloud computing testing'
+if __name__ == '__main__':
+	abc = sshd(hostname='127.0.0.1',passwd='123')
+	s = abc.op_ssh('df -Th')
+	b = abc.op_ftp_pull('/etc/passwd','/mnt/abc.txt')
+	print(s,b)
+	abc.close()
diff --git a/checkfile.py b/op/checkfile.py
similarity index 100%
rename from checkfile.py
rename to op/checkfile.py
diff --git a/op/find.py b/op/find.py
new file mode 100644
index 0000000..704ae4a
--- /dev/null
+++ b/op/find.py
@@ -0,0 +1,50 @@
+# 帮助用户查找文件
+# 由于windows自带的文件查找很是垃圾，所以我们自己写了一个查找文件的程序
+# 1、用户输入文件的关键字
+# 2、用户可以输入一个大概的位置，如果用户不输入，默认为/
+# 3、返回查找了多少个文件和找到了相关的文件有多少
+
+import os
+
+allfile = []
+kwfile = []
+
+
+def check_exists(dir):
+    if os.path.exists(dir):
+        return True
+    else:
+        print('目录不存在，使用默认目录')
+        global p
+        if os.name == 'nt':
+            p = 'c:\\'
+        elif os.name == 'posix':
+            p = '/'
+
+# 功能相关
+
+def check_abs(path):
+    os.chdir(path)
+    return os.path.abspath(path)
+
+kw = input('请输入需要查询文件中的关键字[default "network"]：')
+if kw == '':
+    kw = 'network'
+p = input('请输入文件的大概位置[default C:|/]：')
+check_exists(p)
+
+def main(path):
+    path = check_abs(path)                        # 执行函数修改成绝对路径 用户输入./test,cd ./test && pwd
+    dirlist = os.listdir(path)                    # 列表 = ls -A ./
+    for i in dirlist:                             # 循环这个列表，获得目录下面的所有文件
+        allfile.append(os.path.join(path,i))      # 将文件追加到空列表 allfile 中
+        if os.path.isdir(os.path.join(path,i)):   # 使用isdir来判断是否是目录
+            main(os.path.join(path,i))
+        if kw in i:                               # 使用in来判断是否包含关键字
+            kwfile.append(os.path.join(path,i))
+
+main(p)
+for i in kwfile:
+    print(i)
+print(f'在{len(allfile)}个文件中进行了查找')
+print(f'共查找到{len(kwfile)}个相关文件')
\ No newline at end of file
diff --git a/spider/image.py b/spider/image.py
new file mode 100644
index 0000000..fc6d7cb
--- /dev/null
+++ b/spider/image.py
@@ -0,0 +1,56 @@
+from urllib import request,parse                                # urllib 网络需要使用的模块  python3 urllib urllib2 urllib3 requests                                          # 配置文件
+import chardet,re,requests                               # chardet 检测网页的字符集（有时候不准）
+import logging                                                  # 日志模块
+import os,sys
+class spider():                                                 # spider 爬虫框架
+    def __init__(self,word):
+        self.word = word                                        # 要爬取的图片的关键字
+        self.url = f'https://image.baidu.com/search/index?tn=baiduimage&'         # 定义基础url
+        logging.basicConfig(filename=f'message.log', level=logging.INFO, format='%(asctime)s %(message)s',
+                            datefmt='%Y-%m-%d %H:%M:%S')        # 日志模块，定义日志的内容模板
+    def urld(self):
+        word = self.word
+        word = {
+            "word": word
+        }
+        self.word = parse.urlencode(word)                        # 使用 parse 将关键字进行编码
+        self.url = self.url + self.word                          # 基础url  和关键字进行拼接
+        return self.url
+    def data(self,path):                                         # 定义爬取的功能函数
+        if not os.path.exists(path):                             # 判断路径是否存在 如果不，打印日志 退出程序
+            self.logd('路径无法找到，请检查')
+            sys.exit(3)
+        rsps = request.urlopen(self.url)                         # 打开rul（访问url）
+        if rsps.getcode() == 200:                                # 判断返回的状态码是什么
+            html = rsps.read()                                   # 获取html代码
+            code = chardet.detect(html)                          # 检测字符集
+            html = html.decode(code.get('encoding', 'utf-8'))    # 解码过程
+            data = re.findall(r'http[s]://.*?\.jpg', html)       # 使用正则匹配网页内的图片信息
+            data = list(set(data))                               # 去重
+            n = 1
+            path = path+os.sep
+            print(path)
+            for i in data:
+                d = requests.get(i).content                          # 读取图片内容，将内容写到文件中，以二进制的方式
+                f = open(f'{path}{n}.jpg', 'wb')
+                self.logd(f'url{n}:ok--{i}')
+                print('正在爬取。。。')
+                print(f'url{n}:ok--{i}')
+                f.write(d)
+                f.close()
+                n += 1
+        else:
+            self.logd('访问错误，请检查网络是否连接')
+            #sys.exit(4)
+
+    def logd(self,log, level='error'):
+        if level == 'error':
+            logging.error(log)
+        else:
+            logging.critical(log)
+
+if __name__ == '__main__':
+    path='/Users/mac/Desktop/a/'
+    a = spider('美女')
+    a.urld()
+    a.data(path=path)
\ No newline at end of file
diff --git a/spider/img.py b/spider/img.py
new file mode 100644
index 0000000..f1cb395
--- /dev/null
+++ b/spider/img.py
@@ -0,0 +1,54 @@
+from urllib import request,parse                                # urllib 网络需要使用的模块  python3 urllib urllib2 urllib3 requests
+from config import *                                            # 配置文件
+import chardet,config,re,requests                               # chardet 检测网页的字符集（有时候不准）
+import logging                                                  # 日志模块
+import os,sys
+class spider():                                                 # spider 爬虫框架
+    def __init__(self,word):
+        self.word = word                                        # 要爬取的图片的关键字
+        self.url = f'https://image.baidu.com/search/index?tn=baiduimage&'         # 定义基础url
+        logging.basicConfig(filename=f'{keyword}.log', level=logging.INFO, format='%(asctime)s %(message)s',
+                            datefmt='%Y-%m-%d %H:%M:%S')        # 日志模块，定义日志的内容模板
+    def urld(self):
+        word = self.word
+        word = {
+            "word": word
+        }
+        self.word = parse.urlencode(word)                        # 使用 parse 将关键字进行编码
+        self.url = self.url + self.word                          # 基础url  和关键字进行拼接
+        return self.url
+    def data(self,path):                                         # 定义爬取的功能函数
+        if not os.path.exists(path):                             # 判断路径是否存在 如果不，打印日志 退出程序
+            self.logd('路径无法找到，请检查')
+            sys.exit(3)
+        rsps = request.urlopen(self.url)                         # 打开rul（访问url）
+        if rsps.getcode() == 200:                                # 判断返回的状态码是什么
+            html = rsps.read()                                   # 获取html代码
+            code = chardet.detect(html)                          # 检测字符集
+            html = html.decode(code.get('encoding', 'utf-8'))    # 解码过程
+            data = re.findall(r'http[s]://.*?\.jpg', html)       # 使用正则匹配网页内的图片信息
+            data = list(set(data))                               # 去重
+            n = 1
+            for i in data:
+                d = requests.get(i).content                          # 读取图片内容，将内容写到文件中，以二进制的方式
+                f = open(f'{path}{keyword}{n}.jpg', 'wb')
+                self.logd(f'url{n}:ok--{i}')
+                print('正在爬取。。。')
+                print(f'url{n}:ok--{i}')
+                f.write(d)
+                f.close()
+                n += 1
+        else:
+            self.logd('访问错误，请检查网络是否连接')
+            sys.exit(4)
+
+    def logd(self,log, level='error'):
+        if level == 'error':
+            logging.error(log)
+        else:
+            logging.critical(log)
+
+if __name__ == '__main__':
+    a = spider(keyword)
+    a.urld()
+    a.data(path=path)
\ No newline at end of file
diff --git a/spider/tktest.py b/spider/tktest.py
new file mode 100644
index 0000000..e59a023
--- /dev/null
+++ b/spider/tktest.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+#-*-coding: utf-8-*-
+
+from tkinter import *
+from a import *
+def test():
+    var= entry1.get()
+    path = entry2.get()
+    b = spider(var)
+    b.urld()
+    b.data(path=path)
+# 构造窗口
+root = Tk()
+root.title('python爬虫')
+Label(root, text='关键字|路径\n').pack()
+entry1 = Entry(root,width=20)
+entry2 = Entry(root,width=20)
+but = Button(root,text='确认',command=test)
+# 添加元素
+entry1.pack()
+entry2.pack()
+but.pack()
+# 循环运行
+root.mainloop()
+
+
diff --git a/spider/小说.py b/spider/小说.py
new file mode 100644
index 0000000..edbed0a
--- /dev/null
+++ b/spider/小说.py
@@ -0,0 +1,16 @@
+import requests
+from lxml import etree
+import time
+
+next_url = "http://book.zongheng.com/chapter/1128608/66171932.html"
+for i in range(1,100):
+    html = requests.get(url=next_url)
+    e = etree.HTML(html.content)
+    title = e.xpath('//div[@class="title_txtbox"]/text()')[0]
+    text = '\n'.join(e.xpath('//p/text()'))
+    next_url = e.xpath('//div/a[text()="下一章"]/@href')[0]
+    with open(f'国公凶猛.txt','a') as f:
+        f.write(title+'\n')
+        f.writelines(text)
+    print(title,'下载成功')
+    time.sleep(0.5)
\ No newline at end of file
diff --git a/spider/小说片段.py b/spider/小说片段.py
new file mode 100644
index 0000000..5be2e0a
--- /dev/null
+++ b/spider/小说片段.py
@@ -0,0 +1,30 @@
+import requests,re
+import time
+
+def func(url):
+    data = requests.get(url)
+    data.encoding = 'UTF-8'
+    data = data.text
+    strd = re.search(r'<p>(.*)</p>',data)
+    strd = strd.group().replace('<p>','----').replace('</p>','\n')
+    return strd
+def func2(url):
+    data = requests.get(url)
+    data.encoding = 'UTF-8'
+    data = data.text
+    s = re.search(r'(http://book.zongheng.com/chapter.*?\d{8}.html).*?下一章',data)
+    return s.group(1)
+url = ''
+while True:
+    time.sleep(5)
+    if url == '':
+        url = url = 'http://book.zongheng.com/chapter/557195/27125898.html'
+    data = func(url)
+    f = open('a.txt','a+')
+    f.write(data)
+    url = func2(url)
+    print(url)
+f.close()
+
+
+
diff --git a/文件下载.py b/spider/文件下载.py
similarity index 95%
rename from 文件下载.py
rename to spider/文件下载.py
index cf4c7d6..8f3bf81 100644
--- a/文件下载.py
+++ b/spider/文件下载.py
@@ -1,31 +1,31 @@
-import requests
-import tqdm as tqdm
-import os
-
-url = 'http://mirrors.163.com/centos/8.3.2011/isos/x86_64/CentOS-8.3.2011-x86_64-boot.iso'
-def download(url):
-    filename = url.split('/')[-1]
-    total_size = int(requests.head(url).headers['Content-Length'])
-    if os.path.exists(filename):
-        file_size = os.path.getsize(filename)
-        if file_size < total_size:
-            print('断点续传中。。。')
-        elif file_size == total_size:
-            print('文件已存在')
-            exit(0)
-    else:
-        file_size = 0
-
-    header = {'Range': 'bytes=%s-%s' % (file_size, total_size)}
-    t = tqdm.tqdm(total=total_size, desc=filename, initial=file_size, unit='B', unit_scale=True)
-    result = requests.get(url, headers=header, stream=True)
-
-    with open(filename, 'ab') as f:
-        for i in result.iter_content(chunk_size=1024):
-            f.write(i)
-            t.update(1024)
-        t.close()
-
-if __name__ == '__main__':
-    url = 'http://mirrors.163.com/centos/8.3.2011/isos/x86_64/CentOS-8.3.2011-x86_64-boot.iso'
+import requests
+import tqdm as tqdm
+import os
+
+url = 'http://mirrors.163.com/centos/8.3.2011/isos/x86_64/CentOS-8.3.2011-x86_64-boot.iso'
+def download(url):
+    filename = url.split('/')[-1]
+    total_size = int(requests.head(url).headers['Content-Length'])
+    if os.path.exists(filename):
+        file_size = os.path.getsize(filename)
+        if file_size < total_size:
+            print('断点续传中。。。')
+        elif file_size == total_size:
+            print('文件已存在')
+            exit(0)
+    else:
+        file_size = 0
+
+    header = {'Range': 'bytes=%s-%s' % (file_size, total_size)}
+    t = tqdm.tqdm(total=total_size, desc=filename, initial=file_size, unit='B', unit_scale=True)
+    result = requests.get(url, headers=header, stream=True)
+
+    with open(filename, 'ab') as f:
+        for i in result.iter_content(chunk_size=1024):
+            f.write(i)
+            t.update(1024)
+        t.close()
+
+if __name__ == '__main__':
+    url = 'http://mirrors.163.com/centos/8.3.2011/isos/x86_64/CentOS-8.3.2011-x86_64-boot.iso'
     download(url)
\ No newline at end of file
diff --git a/文件下载2.py b/spider/文件下载2.py
similarity index 97%
rename from 文件下载2.py
rename to spider/文件下载2.py
index 37e0c19..03ea57e 100644
--- a/文件下载2.py
+++ b/spider/文件下载2.py
@@ -1,51 +1,51 @@
-from alive_progress import alive_bar
-import math
-import requests
-import os
-
-
-class Download():
-    def __init__(self, urlPath=None):
-        self.urlPath = urlPath
-        self.filename = urlPath.split('/')[-1]
-        self.header = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
-        }
-
-    def download(self):
-        self.header['Range'] = 'bytes=%s-%s' % (self.fileSize, self.totalSize)
-        self.result = requests.get(url=self.urlPath, headers=self.header, stream=True)
-
-
-    def progress(self):
-        with alive_bar(total=math.ceil((self.totalSize - self.fileSize) / 1024), title=self.filename, title_length=10, force_tty=True) as bar:
-            with open(self.filename, 'wb') as f:
-                for i in self.result.iter_content(chunk_size=1024):
-                    f.write(i)
-                    bar()
-
-    def checkPath(self):
-        self.totalSize = int(requests.head(url=self.urlPath, headers=self.header).headers['Content-Length'])
-        if os.path.exists(self.filename):
-            self.fileSize = os.path.getsize(self.filename)
-            if self.fileSize < self.totalSize:
-                print(f'文件{self.filename}断点续传中')
-            else:
-                print('文件已存在')
-                return ''
-        else:
-            self.fileSize = 0
-
-    def run(self):
-        self.checkPath()
-        self.download()
-        self.progress()
-
-if __name__ == '__main__':
-    with open('./url.txt','r') as f:
-        urls = f.read().splitlines()
-        for url in urls:
-            if not url:
-                continue
-            s = Download(urlPath=url)
-            s.run()
+from alive_progress import alive_bar
+import math
+import requests
+import os
+
+
+class Download():
+    def __init__(self, urlPath=None):
+        self.urlPath = urlPath
+        self.filename = urlPath.split('/')[-1]
+        self.header = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
+        }
+
+    def download(self):
+        self.header['Range'] = 'bytes=%s-%s' % (self.fileSize, self.totalSize)
+        self.result = requests.get(url=self.urlPath, headers=self.header, stream=True)
+
+
+    def progress(self):
+        with alive_bar(total=math.ceil((self.totalSize - self.fileSize) / 1024), title=self.filename, title_length=10, force_tty=True) as bar:
+            with open(self.filename, 'wb') as f:
+                for i in self.result.iter_content(chunk_size=1024):
+                    f.write(i)
+                    bar()
+
+    def checkPath(self):
+        self.totalSize = int(requests.head(url=self.urlPath, headers=self.header).headers['Content-Length'])
+        if os.path.exists(self.filename):
+            self.fileSize = os.path.getsize(self.filename)
+            if self.fileSize < self.totalSize:
+                print(f'文件{self.filename}断点续传中')
+            else:
+                print('文件已存在')
+                return ''
+        else:
+            self.fileSize = 0
+
+    def run(self):
+        self.checkPath()
+        self.download()
+        self.progress()
+
+if __name__ == '__main__':
+    with open('./url.txt','r') as f:
+        urls = f.read().splitlines()
+        for url in urls:
+            if not url:
+                continue
+            s = Download(urlPath=url)
+            s.run()
diff --git a/spider/校花网.py b/spider/校花网.py
new file mode 100644
index 0000000..b59b18f
--- /dev/null
+++ b/spider/校花网.py
@@ -0,0 +1,19 @@
+import requests,re
+'''
+思路：获取网页的前端页面，通过正则方法获取到页面中的静态资源地址，下载地址。
+'''
+first_url = "http://www.xiaohuar.com/2014.html"  # 定义url
+reponse = requests.get(first_url)      # 获取网页对象
+reponse.encoding = 'GBK'  # 定义编码方式
+html = reponse.text   # 获取html 代码
+img_urls = re.findall(r'src="(/d/file/\w+\.jpg)"', html)  #正则匹配图片地址
+img_num = len(img_urls)
+for i in range(img_num):  # 拼接url
+    img_urls[i] = "http://www.xiaohuar.com%s" % img_urls[i]
+
+for img_url in img_urls:  # 下载图片并保存
+    img_file_name = img_url.split('/')[-1]
+    img_data = requests.get(img_url).content
+    with open(img_file_name, "wb") as f:
+        f.write(img_data)
+    print(img_url)
\ No newline at end of file