I'm doing some scraping after receiving html from an api. I'd like to do the following:
- Open html page in chrome so I can find selectors in the console.
- Immediately load the same html page into a jsdom instance
- Drop into the repl - I can then find the right selectors in the console and test them out in a live jsdom environment to see if they work.
For 1, I have:
async function openHtml(htmlString) {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setContent(htmlString);
return;
// await browser.close();
}
The code provided with the api is:
var req = http.request(options, function (res) {
var chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function () {
var body = Buffer.concat(chunks);
response = JSON.parse(body); //response.content = html, response.cookies = cookies
const dom = new JSDOM(response.content);
console.log(dom.window.document.querySelector("p").textContent); // "Hello world"
openHtml(response.content);
console.log('hi');
});
});
req.end();
If I run the code at the command line the browser opens as expected. However, if I set a breakpoint at:
console.log('hi');
It does not. How can I get this working?
CodePudding user response:
openHtml
is an async function. So you'll have to set the method calling in await (promise) and main function to async as well.
var req = http.request(options, function (res) {
var chunks = []
res.on('data', function (chunk) {
chunks.push(chunk)
})
res.on('end', async function () {
var body = Buffer.concat(chunks)
response = JSON.parse(body) //response.content = html, response.cookies = cookies
const dom = new JSDOM(response.content)
console.log(dom.window.document.querySelector('p').textContent) // 'Hello world'
await openHtml(response.content)
console.log('hi')
})
})
req.end()