I'm trying to develop a tool to scrap and post data from/to websites using "puppeteer",I wrote two modules one to read the URLS from JSON file and has the for-loop which include the function from the other module which visit the website and scrap the data.
When I built the functions, and run the code, for the first time my PC hang, and couldn't identify the issue, until I came across post explaining about "async" and "await" in NodeJS,I tried to understand how to apply it to my code, but I couldn't.
So I'm trying exactly to go through the loop to visit the website, then scrap data, after I finish from the first element, it should go to the next element in the loop rather than execute all elements in one time which make the PC hang.
getUrlsAndVisitWebsite.js
const puppeteer = require('puppeteer')
const fs = require('fs');
var visitWebsite= require('/home/projtect1/visitWebsite.js');
async function getUrlsAndVisitWebsite(){
fs.readFile("/home/project1/urls.json", "utf8", (err, jsonString) => {
if (err) {
console.log("File read failed:", err);
return;
}
var jsonOBj= JSON.parse(jsonString);
for (let i = 0; i < 2; i ) {
var postPath = JSON.parse(jsonOBj[i]).url;
// here I'm using puppeteer to visit websites and scrab data
visitWebsite(postPath)
}
});
}
getUrlsAndVisitWebsite()
visitWebsite.js
const puppeteer = require('puppeteer')
var fs = require('fs');
async function visitWebsite(postPath){
const browser = await puppeteer.launch({
headless: false,
waitUntil: 'networkidle2'
})
const page = await browser.newPage()
await page.setViewport({ width: 0, height: 0});
//Loading Cookie from file
const cookiesString = await fs.readFileSync('./cookies.json');
const Writecookies = JSON.parse(cookiesString);
await page.setCookie(...Writecookies);
// GO to the target URL for post
console.log(postPath)
await page.goto(postPath)
//Wait for the selector that list the elements
await page.waitForSelector('#gridPostListing');
}
module.exports=visitWebsite;
CodePudding user response:
Your visitWebsite function is async, you need to await it in getUrlsAndVisitWebsite. As it is now, it only returns a promise, hence it hangs.
Just add await before your visitWebsite call.
CodePudding user response:
I tried this,
await visitWebsite(postPath)
And got the below error
/home/project1/getUrlsAndVisitWebsite.js:31
await visitWebsite(postPath)
^^^^^
SyntaxError: await is only valid in async function