I found this code online, however it does not work when I "copy and paste" it into Google Apps Script:
https://webscraping.pro/scrape-google-app-script/
What changes must I make for it to work, or alternatively is there a straight-forward way to search for specific key words on a website and return the results in Google Sheets?
CodePudding user response:
Try this code in a worksheet, then filter some unusable rows
function textOnly() {
var url='https://stackoverflow.com/questions/69148306/how-do-i-use-google-apps-script-to-scrape-a-website-for-specific-key-words?noredirect=1#comment122219344_69148306'
var sh=SpreadsheetApp.getActiveSpreadsheet().getActiveSheet()
sh.clear()
var data=UrlFetchApp.fetch(url).getContentText().replace(/(\r\n|\n|\r|\t)/gm," ").split('>')
data.forEach(function (part){
var text = part.split('<')[0]
if (text.replace(/([ ] )/gm,"")!='') {
sh.appendRow([part.split('<')[0].replace(/([ ]{2,})/gm," ").replace(/(^ )/gm,"")])
}
})
}
You will then able to filter with your specific key words. To include these key words inside the script, assuming they are located in keywords
range :
function textOnly() {
var url='https://stackoverflow.com/questions/69148306/how-do-i-use-google-apps-script-to-scrape-a-website-for-specific-key-words?noredirect=1#comment122219344_69148306'
var sh=SpreadsheetApp.getActiveSpreadsheet().getActiveSheet()
sh.clear()
var data=UrlFetchApp.fetch(url).getContentText().replace(/(\r\n|\n|\r|\t)/gm," ").split('>')
var list = SpreadsheetApp.getActiveSpreadsheet().getRangeByName('keywords').getValues().join('|').toLowerCase().split('|')
var n=0
data.forEach(function (part){
var text = part.split('<')[0]
var myText = part.split('<')[0].replace(/([ ]{2,})/gm," ").replace(/(^ )/gm,"")
if (text.replace(/([ ] )/gm,"")!='') {
list.forEach(function (term){
if (myText.toLowerCase().includes(term)){
sh.appendRow([myText])
n
}
})
}
})
SpreadsheetApp.getActive().toast(n " item(s) found", "End of script !", 5);
}