I've created a script using request
and cheerio
libraries to fetch different post titles and their corresponding links from a website. The script appears to be doing fine. If you take a look at the script below, you can see that I've used getposts((item,link) => console.log({item,link}));
to call the function.
Now, the question is:
How can I include startUrl (website link) as a parameter while calling the function keeping the rest of the logics as they are?
var request = require('request');
var cheerio = require('cheerio');
const startUrl = 'https://stackoverflow.com/questions/tagged/web-scraping';
function getposts(callback) {
request(startUrl, function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.summary .question-hyperlink').each(function() {
var items = $(this).text();
var links = $(this).attr('href');
return callback(items, links);
});
}
});
}
getposts((item,link) => console.log({item,link}));
CodePudding user response:
Create a new parameter for a url and pass in startUrl:
var request = require('request');
var cheerio = require('cheerio');
const startUrl = 'https://stackoverflow.com/questions/tagged/web-scraping';
function getposts(url, callback) {
request(url, function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.summary .question-hyperlink').each(function() {
var items = $(this).text();
var links = $(this).attr('href');
return callback(items, links);
});
}
});
}
getposts(startUrl, (item,link) => console.log({item,link}));