I'm trying to scrape a HTML table data, using Google Apps Script's XlmService, of which function is new to me. But I'm getting an error, "Exception: Error on line 4: Element type "n.length" must be followed by either attribute specifications, ">" or "/>"." Is this due to something in this website not compatible with XlmService? Thanks for any advice!
function test() {
const url = 'https://www.tsa.gov/coronavirus/passenger-throughput';
const res = UrlFetchApp.fetch(url, { muteHttpExceptions: true }).getContentText();
var res1 = XmlService.parse(res);
}
CodePudding user response:
It can be parsed about this way:
var dates = res1('.views-field-field-today-date').text().match(/\d \/\d \/\d /g);
var col1 = res1('.views-field-field-2021-throughput').text().match(/\d ,\d [,\d ]*/g);
var col2 = res1('.views-field-field-2020-throughput').text().match(/\d ,\d [,\d ]*/g);
var col3 = res1('.views-field-field-2019-throughput').text().match(/\d ,\d [,\d ]*/g);
console.log(dates[0], col1[0], col2[0], col3[0]);
console.log(dates[1], col1[1], col2[1], col3[1]);
console.log(dates[2], col1[2], col2[2], col3[2]);
And put the data on your sheet this way:
var table = dates.map((d, i) => [d, col1[i], col2[i], col3[i]]);
var range = SpreadsheetApp.getActiveSheet().getRange(2,1,table.length,table[0].length);
range.setValues(table);
Reference:
https://github.com/tani/cheeriogs
Update
Here is the updated version of the code without those clumsy RegExp:
function test3() {
const url = 'https://www.tsa.gov/coronavirus/passenger-throughput';
const res = UrlFetchApp.fetch(url, { muteHttpExceptions: true }).getContentText();
const $ = Cheerio.load(res);
var dates = $('table').find('td.views-field-field-today-date').toArray().map(x => $(x).text().trim());
var col1 = $('table').find('td.views-field-field-2021-throughput').toArray().map(x => $(x).text());
var col2 = $('table').find('td.views-field-field-2020-throughput').toArray().map(x => $(x).text());
var col3 = $('table').find('td.views-field-field-2019-throughput').toArray().map(x => $(x).text());
var table = dates.map((d, i) => [d, col1[i], col2[i], col3[i]]);
var range = SpreadsheetApp.getActiveSheet().getRange(2,1,table.length,table[0].length);
range.setValues(table);
}