In python's scrapy there is a method to render html in a browser (https://docs.scrapy.org/en/latest/topics/debug.html#open-in-browser).
I'm currently working in node and doing some scraping.
var req = http.request(options, function (res) {
var chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function () {
var body = Buffer.concat(chunks);
console.log(body.toString());
});
});
req.end();
I can get the scraped html page and would like to render this in a browser while debugging. What is the best way to accomplish this in node?
As an example , I'd like to set a breakpoint after the body is declared and in the REPL run something like:
open_in_browser(body.toString())
like in scrapy.
CodePudding user response:
Without using Puppeteer you could use chrome-launcher, then use chrome-remote-interface to interact with the chrome debugging port, which you can call a method to set the page content.
For example,
function scrape_content() {
const http = require('https')
const options = {
hostname: 'stackoverflow.com',
port: 443,
path: '/questions/71363220/is-there-a-way-to-open-html-in-a-browser-in-node',
method: 'GET'
}
const req = http.request(options, function (res) {
const chunks = []
res.on("data", chunk => chunks.push(chunk))
res.on("end", () => open_content_in_chrome(Buffer.concat(chunks).toString()))
})
req.end()
}
scrape_content()
async function open_content_in_chrome(content) {
const ChromeLauncher = require('chrome-launcher')
const chrome = await ChromeLauncher.launch()
console.log(`Chrome debugging port running on ${chrome.port}`)
const CDP = require('chrome-remote-interface')
let client
try {
// connect to chrome debugging port
client = await CDP({
port: chrome.port
})
const { Page } = client
const { frameId } = await Page.navigate({ url: 'about:blank' })
await Page.setDocumentContent({ frameId, html: content })
} catch (err) {
console.error(err)
} finally {
if (client) {
await client.close()
}
}
}