Home > front end >  You look, how to use nodejs data access to the site
You look, how to use nodejs data access to the site

Time:11-28

https://bck.hermes.cn/products? The locale=cn_zh & amp; Category=WOMENBAGSBAGSCLUTCHES& Sort=relevance

With the puppeteer tried, can't crawl

CodePudding user response:

 
//"use strict";
Var HTTPS=the require (' HTTPS ')//Node. Js provides the HTTP module is used to build the HTTP server and client
Var fs=the require (' fs)
Var path=the require (" path ")
Var cheerio=the require (' cheerio ')
//var url='http://www.xbiquge.la/21/21549/10832214.html'//should tuck station site

Var options={//catch data request header, simulate a browser to access
The hostname: "www.9zgg.com",
Path: '/list - select - id - 2 - type - area - year - star - state - order - addtime. HTML',
Port: 443,
Method: "GET",
Headers: {//may, according to the network request header to rewrite page file in
//'connection' : 'keep alive -'
'the SEC - fetch - mode:' navigate,
'the user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36 ',
'the upgrade - Insecure - Requests' :' 1 ',
'accept' : 'text/HTML, application/XHTML + XML, application/XML. Q=0.9, image/webp image/apng, */*; Q=0.8, application/signed - exchange; V=b3 ',
'the accept - Language' : 'useful - CN, useful; Q=0.9, en. Q=0.8 '
}
};
HTTPS. Get (options, function (res) {//set the request
Res. SetEncoding (' utf-8)
Var HTML='
Res. On (' data ', function (data) {
HTML +=data//string splicing
})
Res. On (' end ', function () {//at the end of the page to access the callback function
Var courseData=https://bbs.csdn.net/topics/filterChapters (HTML)//use an array to accept data access page data method callback
Let the content=courseData. The map ((o)=& gt; {//will obtain the data to json format
Return JSON. Stringify (o)//JSON stringify () method is used to JavaScript value converted to a JSON string,
})
Fs. WriteFile ('./222. Json ', the content and function (err) {//file path, the content of writing, the callback function will be json imported in 222. The json file
If (err) throw new Error (' failed to write file '+ err);
The console. The log (" successfully written to the file ")
})
})
}) on (' error ', function () {
The console. The log (' access to resources error! ')
})


The function filterChapters (HTML) {
Var $=cheerio. Load (HTML)//loading need HTML, then you can use a similar happily jQuery syntax of
Var chapters=$(' col - xs - 2 ')//looking for resources in the HTML class
Var courseData=https://bbs.csdn.net/topics/[]//create an array, used to store resources
Chapters. Each (function (item, index) {//our HTML document traversal
//var chapter=$(this) then use jQuery method to get the page need to grasp the label data
//the console. The log (chapter. Children (" p "). The find (' continu '). The text ())
Var chapterTitle=chapter. Children (" p "). The find (' img). Attr (' Alt ')//title
Var tvUrl=chapter. Children (" p "). The find (' a '). Attr (' href '). The split (' show/) [1]//jump link
Var imgUrl=chapter. Children (" p "). The find (' img). Attr (' data - the original ')//picture link
Var updateStatus=chapter. Children (" p "). The find (' continu '). The text ()//update the first set
Var type='TV series//video type
Var url={tvUrl} ` ` http://www.m4yy.com/show/$//jump link joining together into a full url
CourseData. Push ({
ChapterTitle: chapterTitle,
TvUrl: tvUrl,
ImgUrl: imgUrl,
UpdateStatus: updateStatus,
Type: type,
Url: the url
})
})
Return courseData/resource/return need
}

To write such a DEMO before, the returned data is probably
{
"ChapterTitle" : "only walking flower,"
"TvUrl" : "35222 HTML",
"ImgUrl" : "https://ae01.alicdn.com/kf/H93a96eef530d40e88b6c053b6568ec67W.png",
"UpdateStatus" : "t \ \ t update to 109 sets",
"Type" : "TV series,"
"Url" : "http://www.m4yy.com/show/35222.html"
},
{
"ChapterTitle" : "one can't be little,"
"TvUrl" : "36947 HTML",
"ImgUrl" : "https://p.ssl.qhimg.com/d/dy_98699842cc2aa1d066b321e543acffcc.jpg",
"UpdateStatus" : "\ t \ n \ t update to 23 set",
"Type" : "TV series,"
"Url" : "http://www.m4yy.com/show/36947.html"
},
Such a JSON

CodePudding user response:

Cheerio climb

CodePudding user response:

Next Google efhelper this plugin you can not download directly, automatic beautification, then download function
  • Related