When I try to scrape Kerri Hughes
from this webpage using the following script built upon google apps, I always get Info
as result. The name is static and available in page source (ctrl u) but still the script fails to grab it. How can I fix it?
function myFunction() {
var options = {
"method" : "GET",
"headers" : {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
}
};
var webURL = "https://pubs.acs.org/doi/abs/10.1021/acschembio.1c00993";
var response = UrlFetchApp.fetch(webURL,options);
var $ = Cheerio.load(response.getContentText());
var sName = $("span.hlFld-ContribAuthor").first().text();
console.log(sName);
}
CodePudding user response:
I guessed that in your URL, the cookie might be required to be used. So, when your script is modified, it becomes as follows.
Modified script.
function myFunction() {
var webURL = "https://pubs.acs.org/doi/abs/10.1021/acschembio.1c00993";
var res = UrlFetchApp.fetch(webURL, { followRedirects: false });
var options = { headers: { "Cookie": JSON.stringify(res.getAllHeaders()["Set-Cookie"]) } };
var response = UrlFetchApp.fetch(webURL, options);
var $ = Cheerio.load(response.getContentText());
var sName = $("span.hlFld-ContribAuthor").first().text();
console.log(sName); // Kerri Hughes
}
Result:
When I tested the above script, I confirmed the value of Kerri Hughes
in the log.