recently, I learned node crawler. I wanted to practice the data crawler that requires login, so I used codeshelper to practice my hands. I have successfully solved the problem of random query and cookie Synchronize for every login request, and then directly used superagent to request this API: the related code is as follows:
const superagent = require("superagent")
const async = require("async")
const getRandom = require("./getRandom")
let random = ""
let cookie = ""
async.series([
(cb) => {
superagent
.get("https://segmentfault.com")
.end((err, res) => {
if (err) console.log(err)
cookie = res.headers["set-cookie"].join(",").match(/(PHPSESSID=.+?);/)[1]
// console.log(cookie)
random = getRandom.getToken(res.text)
// console.log(res)
cb(null)
})
},
(cb) => {
const username = process.argv[2]
const password = process.argv[3]
console.log(cookie)
console.log(random)
let header = {
"accept": "*/*",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"content-length": "47",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"cookie": `PHPSESSID=${cookie};`,
"origin": "https://segmentfault.com",
"referer": "https://segmentfault.com/",
"user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
"x-requested-with": "XMLHttpRequest"
}
superagent
.post(`https://segmentfault.com/api/user/login`)
.query({"_": random})
.set("Referer", "https://segmentfault.com/user/login")
.set("X-Requested-With", "XMLHttpRequest")
.set(header)
.type("form")
.send({
username: username,
password: password,
remember: 1
})
.end(function(err, res) {
if (err) {
console.log(err.status);
} else {
console.log("yay got " + res.status)
cb(null, cookie)
}
})
}
])
you can log in successfully by executing node index phoneNumber password directly. If you continue to crawl other interfaces, you will return to not found, for help