发布于2022年10月14日3年前 之前有那么一丝想抓取一个Reg007网站的想法,但是之前因为 上学没有时间。所以没抓 正文: Reg007是一个可以查到,你手机号注册了什么网站的一个站。 我们不登陆的话可以查询到三个注册过的网站。 那么我们来抓包一次看看到底是怎么回事 从这里我们可以看出这里的抓到的请求,请求参数为 q:你的手机号码 然后需要在cookie带上你手机号发送: _ga=GA1.2.1107102936.1534170998; _gid=GA1.2.1501069481.1534324705; reg007_c_s_t=2; reg007_think_language=zh-CN; PHPSESSID=gtnua8rhph3d80u3umktadimk6; reg007_f_c_j=958dcf85640ae27962c2c9f9cd00574f; reg007_q=手机号 然后我们登陆账号在试试查询 可以看出用了登陆cookie之后我们能查到全部。(我草码又没打好 不打了草) 思路: 1.首先进行登陆 2.然后抓取登陆的cookie进行查询 1 2 代码:(烂的一匹,死活不知道怎么获取Request Cookie) import requests import re import time from bs4 import BeautifulSoup def reg(): headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} url='https://www.reg007.com/' loginurl='https://www.reg007.com/account/signin' sous='https://www.reg007.com/search?q=查询的手机号' zq=requests.get(url=url,headers=headers) search=re.search('value=".*"',zq.text) qz=str(search.group()).replace('"','').replace('value','') cs='account=你的账号&password=你的密码&remember=on&__csrf__{}'.format(qz) datas={} cookies={} cookiev='_ga=GA1.2.1107102936.1534170998; _gid=GA1.2.1501069481.1534324705; reg007_c_s_t=2; reg007_q=13113190031; reg007_account=18938561951; reg007_password=86e6a966a434f11cfa0ad215d1eaa8b0; reg007_think_language=zh-CN; PHPSESSID=jt6c1djpm2ep7qnte3d1svuen4; _gat=1' for c in cs.split('&'): key,value=c.split('=',1) datas[key]=value for k in cookiev.split(';'): key,value=k.split('=',1) cookies[key]=value #print(zq.headers) hq=requests.get(url=loginurl,headers=headers) reqts=requests.session() login=reqts.post(url=loginurl,headers=headers,data=datas,cookies=cookies) if login.url == 'https://www.reg007.com/account': print('[+]Landing success !') cv= {} cookiesw='_ga=GA1.2.1107102936.1534170998; _gid=GA1.2.1501069481.1534324705; reg007_c_s_t=2; reg007_think_language=zh-CN; PHPSESSID=gtnua8rhph3d80u3umktadimk6; reg007_f_c_j=958dcf85640ae27962c2c9f9cd00574f; reg007_q=13113190031; reg007_account=18938561951; reg007_password=86e6a966a434f11cfa0ad215d1eaa8b0' for u in cookiesw.split(';'): key,value=u.split('=',1) cv[key]=value dw = reqts.get(url=sous, headers=headers,cookies=cv) time.sleep(5) kivs = BeautifulSoup(dw.text, 'html.parser') fd=re.findall('.*\|',kivs.get_text()) for f in fd: print(str(f).replace('(','').replace('adsbygoogle','').replace('=','').replace('window','').replace('.','').replace('||','').replace('functioni','').replace('s','').replace('o','').replace('g','').replace('r','').replace('a','').replace(',','').replace('m','').replace('{','').replace('i','').replace('[','').replace("'",'').replace(')','').replace('GleAnlytcObject','').replace(']','').replace(';','').replace('q','')) #print(f) else: print('[-]Landing failure !') exit() #gz=requests.get(url=url,headers=headers) #PHPSESSID=gz.cookies #search2=re.search('PHPSESSID=.*',str(PHPSESSID)) #PHPSESSID=str(search2).replace('<','').replace('_','').replace('sre','').replace('.','').replace('SREMatch','').replace('object','').replace(';','').replace('span=','').replace('(','').replace('27','').replace(',','').replace('162','').replace(')','').replace('match=','').replace('for','').replace('wwwreg0','').replace('>','').replace("'",'').strip() #COOKIES='reg007_think_language=zh-CN;{};reg007_c_s_t=1; reg007_account=18938561951;reg007_password={}'.format(PHPSESSID) #cas={} #cks='Cookie: reg007_think_language=zh-CN; PHPSESSID=evtmr498kqegclhchh3etkqv27; reg007_c_s_t=1; reg007_account=18938561951; reg007_password=86e6a966a434f11cfa0ad215d1eaa8b0; reg007_q=13113190031; reg007_f_c_j=acca75fee2492ffacbc22c9640bca829; _ga=GA1.2.714340125.1534326028; _gid=GA1.2.519570063.1534326028' #for l in cks.split(';'): #key,value=l.split('=',1) #cas[key]=value reg() 效果还是那么烂:
创建帐户或登录后发表意见