You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
40 lines
2.0 KiB
40 lines
2.0 KiB
import requests, json
|
|
from lxml import etree
|
|
# from apscheduler.schedulers.background import BackgroundScheduler
|
|
# from django_apscheduler.jobstores import DjangoJobStore, register_job, register_events
|
|
#
|
|
# scheduler = BackgroundScheduler()
|
|
# scheduler.add_jobstore(DjangoJobStore(), "default")
|
|
|
|
def get_image(resolving='300x200',keyword='library'):
|
|
a = requests.head(f'https://source.unsplash.com/{resolving}/?{keyword}',headers={
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'})
|
|
image = a.headers['Location']
|
|
if image:
|
|
return image
|
|
else:
|
|
return 'https://images.unsplash.com/photo-1567168539593-59673ababaae?crop=entropy&cs=tinysrgb&fit=crop&fm=jpg&h=500&ixid=MnwxfDB8MXxyYW5kb218MHx8Ym9vayxsaWJyYXJ5fHx8fHx8MTY1MTU1NDcyNA&ixlib=rb-1.2.1&q=80&utm_campaign=api-credit&utm_medium=referral&utm_source=unsplash_source&w=500'
|
|
|
|
# @register_job(scheduler, 'interval', minutes=60*24, replace_existing=True)
|
|
def csdn_get():
|
|
blog = requests.get('https://blog.csdn.net/NewRain_wang', headers={
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'})
|
|
e = etree.HTML(blog.content)
|
|
url = e.xpath('//article[@class="blog-list-box"][count(preceding::article) < 5]/a/@href')[:3]
|
|
title = e.xpath('//h4[count(preceding::h4) < 5]/text()')[:3]
|
|
text = e.xpath('//article[@class="blog-list-box"][count(preceding::article) < 5]/a/div[@class="blog-list-content"]/text()')[:3]
|
|
data = {}
|
|
for i in range(len(url)):
|
|
data.update({url[i]: {'title': title[i], 'text': text[i],'image': get_image()}})
|
|
if data:
|
|
#cache.set('csdn', data, 60*60*24)
|
|
with open('App/csdn.json', 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=4)
|
|
print('csdn.json 已生成')
|
|
else:
|
|
print('csdn.json 未获取')
|
|
|
|
# register_events(scheduler)
|
|
# scheduler.start()
|
|
if __name__ == '__main__':
|
|
csdn_get() |