import scrapy
import re
from chaojiying_Python.chaojiying import get_code
class Login1Spider(scrapy.Spider):
name = 'login1'
allowed_domains = ['ganji.com']
start_urls = ['https://passport.ganji.com/login.php']
def parse(self, response):
img_url = 'https://passport.ganji.com/ajax.php?dir=captcha&module=login_captcha'
hash_code =re.search(r'"__hash__":"(.+)"',response.text).group(1)
yield scrapy.Request(img_url,callback=self.do_fromdata,meta={'hash_code':hash_code})
def do_fromdata(self,response):
with open('code.jpg','wb') as f:
f.write(response.body)
#code = get_code('code.jpg')
code = input("请输入验证码:")
hash_code = response.request.meta['hash_code']
data = {
'username': '17030240219',
'password': '123456qaz',
'setcookie': '14',
'checkCode':code,
'next': '/ user / register_success.php?username=17030240219&next=%2F',
'source':'passport',
'__hash__':hash_code
}
login_url = 'https://passport.ganji.com/login.php'
yield scrapy.FormRequest(login_url,method='POST',formdata=data,callback=self.after_login)
#print(response.text)
def after_login(self,response):
print(response.text)
为什么登录时显示无效数组长度,这个怎么弄