发布于2022年10月18日3年前 前言 今天看RSS的时候发现有篇文章说到抓取某接口获取每天新的CVE然后保存到数据库并通过邮箱告知CVE更新了。由于个人觉得不错索性就进行爬了 正文 接口是CVE接口 AOd8T1.png 都是超链接,点进去可以看到详情,直接爬就完事了,由于最近学了异步追求速度 思路如下: 1.抓取接口里url 2.异步获取全部详情的url 3.写入到xls 代码如下: #author:九世 #time:2019/4/14 import requests from bs4 import BeautifulSoup import asyncio from aiohttp import ClientSession import re import xlwt import time import os new_cve=[] bh_cve=[] jg_cve=[] class Demon: def __init__(self,headers,url): self.headers=headers self.url=url def zx(self): try: rgt=requests.get(url=self.url,headers=headers,timeout=3) zg=BeautifulSoup(rgt.text,'html.parser') for c in zg.find_all('a'): href=c.get('href') new_cve.append(href) except Exception as r: print('[-] Error {}'.format(r)) async def zhuaqu(self,url): async with ClientSession() as reqt: try: async with reqt.get(url=url,headers=self.headers,timeout=2) as respone: url=respone.url jg_cve.append(url) zz=re.findall('[0-9]{1,}-.*',str(url)) bh_cve.append('CVE-{}'.format(zz[0])) except: pass def writes(self): workbook=xlwt.Workbook(encoding='utf-8') sheet=workbook.add_sheet("今天更新的CVE") sheet.write(0,0,"CVE编号") sheet.write(0,1,"URL") for i in range(0,len(bh_cve)): sheet.write(int(i)+1,0,bh_cve[i]) sheet.write(int(i)+1,1,'{}'.format(jg_cve[i])) sz=time.time() workbook.save('{}.xls'.format(sz)) if os.path.exists('{}.xls'.format(sz)): print('[+] 保存成功') else: print('[-] 保存失败') async def main(self): td=[] for n in new_cve: jc=asyncio.ensure_future(self.zhuaqu(n)) td.append(jc) await asyncio.wait(td) if __name__ == '__main__': headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'} urls='https://cassandra.cerias.purdue.edu/CVE_changes/today.html' obj=Demon(headers=headers,url=urls) obj.zx() loop = asyncio.get_event_loop() loop.run_until_complete(obj.main()) loop.close() obj.writes() 最终结果: 仓库地址:https://github.com/422926799/python/tree/master/CVE%E5%AE%9A%E6%97%B6%E6%9B%B4%E6%96%B0
创建帐户或登录后发表意见