40 lines
2.0 KiB

import requests, json
from lxml import etree
# from apscheduler.schedulers.background import BackgroundScheduler
# from django_apscheduler.jobstores import DjangoJobStore, register_job, register_events
# scheduler = BackgroundScheduler()
# scheduler.add_jobstore(DjangoJobStore(), "default")
def get_image(resolving='300x200',keyword='library'):
a = requests.head(f'{resolving}/?{keyword}',headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'})
image = a.headers['Location']
if image:
return image
return ''
# @register_job(scheduler, 'interval', minutes=60*24, replace_existing=True)
def csdn_get():
blog = requests.get('', headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'})
e = etree.HTML(blog.content)
url = e.xpath('//article[@class="blog-list-box"][count(preceding::article) < 5]/a/@href')[:3]
title = e.xpath('//h4[count(preceding::h4) < 5]/text()')[:3]
text = e.xpath('//article[@class="blog-list-box"][count(preceding::article) < 5]/a/div[@class="blog-list-content"]/text()')[:3]
data = {}
for i in range(len(url)):
data.update({url[i]: {'title': title[i], 'text': text[i],'image': get_image()}})
if data:
#cache.set('csdn', data, 60*60*24)
with open('App/csdn.json', 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=4)
print('csdn.json 已生成')
print('csdn.json 未获取')
# register_events(scheduler)
# scheduler.start()
if __name__ == '__main__':