Boss node, please come in! Why did the node crawler of movie paradise collapse without entering titleHref.forEach?

the problem with the old code is res.on ("end",.) Is asynchronous, and maybe titleHref is still empty. Now I have updated the code to solve this problem, but how to print the resulting ans array, print is Synchronize, always print out empty data.

New Code:

const cheerio = require("cheerio");
const http = require("http");
const iconv = require("iconv-lite");

let baseUrl = "http://www.ygdy8.net/html/gndy/dyzz/list_23_";
let Host = "http://www.ygdy8.net/";

const totalPage = 2; //
let ans = [];
//
function getTitleHref(url,page) {
  let startUrl = url+page+".html";
  http.get(startUrl,function(res) {
    const { statusCode } = res;
    let chunks = [];
    res.on("data",function(chunk){
      chunks.push(chunk);
    });
    res.on("end",function(){
      let title = [];
      
      let html = iconv.decode(Buffer.concat(chunks),"gb2312");
      let $ = cheerio.load(html, {decodeEntities: false});
      // console.log($);
      $(".co_content8 .ulink").each(function(i,d) {
        let $d = $(d);
        let titleHref = [];
        titleHref.push({
          href: $d.attr("href")
        });
        getLink(titleHref)
      });
      // console.log(ans);
    });  
  });
}


// /*
//
function getLink(titleHref) {
  console.log("getLink");
  console.log(titleHref);
  if(titleHref) {
    titleHref.forEach(function(v,k) {
      console.log("~~~~~~~~~~~~~~~~~~~~");
      let infoUrl = Host + v.href;
      // console.log(infoUrl);
    
        http.get(infoUrl,function(res) {
          const { statusCode } = res;
          const contentType = res.headers["content-type"];
        
          let error;
          if (statusCode !== 200) {
            error = new Error("\n" +
                             `: ${statusCode}`);
          } 
          if (error) {
            console.error(error.message);
            // 
            res.resume();
            return;
          }
          console.log("getlink http");
          let chunks = [];
          res.on("data",function(chunk) {  
            chunks.push(chunk);
          });
          res.on("end", function(){
            try {
              let html = iconv.decode(Buffer.concat(chunks),"gb2312");
              let $ = cheerio.load(html, {decodeEntities: false});
              let bt = "";
              bt = $("-sharpZoom td").children("a").attr("href");
              // console.log(bt);
              // console.log(typeof bt)
              ans.push(bt);
            }catch (e) {
              console.error("bt",e.message);
            }
          })
        }).on("error", (e) => {
          console.error(`: ${e.message}`);
        });
    });
  }
};
// */
for(let i = 1; i <= totalPage; iPP) {
  getTitleHref(baseUrl,i);
};



-- split line-

const cheerio = require("cheerio");
const http = require("http");
const iconv = require("iconv-lite");

let baseUrl = "http://www.ygdy8.net/html/gndy/dyzz/list_23_";
let Host = "http://www.ygdy8.net/";
let titleHref = [];
const totalPage = 1; //
let res = [];
//
function getTitleHref(url,page) {
  let startUrl = url+page+".html";
  http.get(startUrl,function(res) {
    let chunks = [];

    res.on("data",function(chunk){
      chunks.push(chunk);
    });
    res.on("end",function(){
      let title = [];
      let html = iconv.decode(Buffer.concat(chunks),"gb2312");
      let $ = cheerio.load(html, {decodeEntities: false});
      // console.log($);
      $(".co_content8 .ulink").each(function(i,d) {
        let $d = $(d);
        titleHref.push({
          href: $d.attr("href")
        });
      });
      console.log(titleHref);
    });
    if(page <= totalPage) {
      getTitleHref(url,PPpage);
    }else {
      console.log(page);
      getLink(titleHref);
    }
   
  });
}

//
function getLink(titleHref) {
  console.log("getLink");

  titleHref.forEach(function(v,k) {
    console.log("~~~~~~~~~~~~~~~~~~~~");
    let infoUrl = Host + v.href;
    console.log(infoUrl);
    // try {
      http.get(infoUrl,function(res) {
        console.log("getlink http");
        
        let chunks = [];
        res.on("data",function(chunk) {
          chunks.push(chunk);
        });
        res.on("end", function(){
          let html = iconv.decode(Buffer.concat(chunks),"gb2312");
          let $ = cheerio.load(html, {decodeEntities: false});
          
          
          let reg = /.*/;
          let info = "";
          let bt = "";
          let textInfo = $(".co_content8 -sharpZoom p").eq(0).text();
          info = textInfo.match(reg)[0];
          bt = $("-sharpZoom td").children("a").attr("href");
          res.push({
            Info:info,
            Bt:bt
          });
          console.log(res);
        })
        //
        //res.on("error",function(){
        //  console.log("error");
        //})
      })
  // }catch(e) {
  //   console.log(e);
  // }
  });
};

getTitleHref(baseUrl,1)

if there is too much asynchronism in the above code, I don"t know what the problem is. Ask node to instruct the titleHref.forEach in the, getLink () function, and the node thread will collapse. Console.log ("enter getlink http") is also not printed

Mar.22,2021

getLink (titleHref); res.on ('end',.) has not been executed yet. How to get in an empty array.

Menu