I'll preface this with I'm still quite fresh to coding and am still learning,
I am attempting to execute a script that uses two async functions with node.js. The first function uses puppeteer to fetch links from a webpage and the second uses googleapi to log that data into a google sheet.
Issue I'm running into is the array I'm fetching from the first function is coming up as undefined for the second.
I believe it's an issue of the second script being executed before the first is complete and can return the data. Any insight or help will be much appreciated.
const puppeteer = require('puppeteer')
async function scrapeProduct(url) {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto(url);
const grablinks = await page.evaluate(() => {
//retrieves links
});
await browser.close();
return grabLinks;
}
const {google} = require('googleapis')
const credentials = require('')
const client = new google.auth.JWT(
credentials.client_email,
null,
credentials.private_key,
['https://www.googleapis.com/auth/spreadsheets']
);
client.authorize(function(err,tokens){
if (err){
console.log(err);
return;
} else{
console.log('Connected!');
gsrun(client);
}
});
async function gsrun(cl){
const gsapi = google.sheets({version: 'v4', auth: cl});
const grabLinks = await scrapeProduct();
const updateOptions = {
spreadsheetId: '',
range: '',
valueInputOption: 'USER_ENTERED',
resource: { values :grabLinks}
};
let res = await gsapi.spreadsheets.values.update(updateOptions);
}
I run into the following error when I execute:
ProtocolError: Protocol error (Page.navigate): Invalid parameters Failed to deserialize params.url - BINDINGS: mandatory field missing at position 50
50 referring to the end of client.authorize(function(err){
CodePudding user response:
Please, learn more about Scopes.
Your function
scrapeProduct
never executes.
Change your gsrun
function to this:
async function gsrun(cl,url){
const gsapi = google.sheets({version: 'v4', auth: cl});
let grabLinks = await scrapeProduct(url);
const updateOptions = {
spreadsheetId: '',
range: '',
valueInputOption: 'USER_ENTERED',
resource: { values :grabLinks}
};
let res = await gsapi.spreadsheets.values.update(updateOptions);
}
And then run gsrun
with client
and your needed url
inside client.authorize's callback as your wrote.
CodePudding user response:
First, you've only defined scrapeProduct
and aren't calling it:
async function gsrun(cl){
const gsapi = google.sheets({version: 'v4', auth: cl});
const grabLinks = await scrapeProduct(url); // <--- This
const updateOptions = {
spreadsheetId: '',
range: '',
valueInputOption: 'USER_ENTERED',
resource: { values: grabLinks}
};
let res = await gsapi.spreadsheets.values.update(updateOptions);
}
However, this will still error out. This is because you've defined scrapeProduct
to be asynchronous (and await
won't work because scrapeProduct
isn't returning a promise) so updateOptions.resource
will have the undefined value.
You can fix this by making sure scrapeProduct
returns a promise:
function scrapeProduct(url) {
return new Promise(async(resolve, reject) => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto(url);
const grablinks = await page.evaluate(() => {
//retrieves links
});
await browser.close();
resolve(grabLinks);
});
}
Now, calling await scrapeProduct(url)
inside gsrun()
will work as the code won't execute further until grabLinks
is returned from scrapeProduct