The exported file contains only one url. The rest of the urls are not found in the exported file. How can I generate a file with all the entries in the loop?
const puppeteer = require("puppeteer");
const fs = require('fs');
let browser;
(async () => {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox']
});
const [page] = await browser.pages();
await page.goto('https://old.reddit.com/',{"waitUntil" : "networkidle0"});
const a_elems = await page.$$('.thumbnail');
for (var i=0; i<a_elems.length && i<3; i ) {
const elem = a_elems[i];
const href = await page.evaluate(e => e.href, elem);
const newPage = await browser.newPage();
await newPage.goto(href,{"waitUntil" : "networkidle0"});
const url = await newPage.evaluate(() => document.location.href);
console.log(url);
fs.writeFileSync('export.json', JSON.stringify(url));
}
await browser.close();
})()
;
Thanks!
CodePudding user response:
Create an array, push each url
onto it in the loop, then move your writeFile
call to the end.
const puppeteer = require("puppeteer");
const fs = require('fs').promises;
let browser;
(async () => {
browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox']
});
const [page] = await browser.pages();
await page.goto('https://old.reddit.com/', {
"waitUntil": "networkidle0"
});
const aElems = await page.$$('.thumbnail');
const urls = [];
for (let i = 0; i < aElems.length && i < 3; i ) {
const href = await aElems[i].evaluate(e => e.href);
const newPage = await browser.newPage();
await newPage.goto(href, {waitUntil: "networkidle0"});
const url = await newPage.evaluate(() => document.location.href);
console.log(url);
urls.push(url);
}
await fs.writeFile('export.json', JSON.stringify(urls));
})()
.catch(err => console.error(err))
.finally(() => browser?.close())
;
Tips:
- You're already in async code, so
writeFileSync
seems suboptimal here relative to the async version. - Use
let
instead ofvar
so you don't get bit byi
breaking scope and popping up with a stale value outside (or inside) the loop block. - Consider
newPage.close();
at the end of the loop. You're only doing 3 pages now, but if this is temporary and you're going to make it 800, then it's a great idea. "waitUntil": "networkidle0"
is really slow. Since all you're doing is accessingdocument.location.href
you can probably speed things up withwaitUntil: "domcontentloaded"
.- JS uses
camelCase
, notsnake_case
. - If you have an ElementHandle, you can just
elementHandle.evaluate(...)
rather thanpage.evaluate(..., elementHandle)
. - Catch errors with
catch
and clean up thebrowser
resource withfinally
. let browser;
was pointless in your original code.