Home > front end >  Is there a way to open html in a browser in node
Is there a way to open html in a browser in node

Time:03-06

In python's scrapy there is a method to render html in a browser (https://docs.scrapy.org/en/latest/topics/debug.html#open-in-browser).

I'm currently working in node and doing some scraping.

var req = http.request(options, function (res) {
   var chunks = [];
   res.on("data", function (chunk) {
       chunks.push(chunk);
   });

   res.on("end", function () {
       var body = Buffer.concat(chunks);
       console.log(body.toString());
   });
});

req.end();

I can get the scraped html page and would like to render this in a browser while debugging. What is the best way to accomplish this in node?

As an example , I'd like to set a breakpoint after the body is declared and in the REPL run something like:

open_in_browser(body.toString())

like in scrapy.

CodePudding user response:

Without using Puppeteer you could use chrome-launcher, then use chrome-remote-interface to interact with the chrome debugging port, which you can call a method to set the page content.

For example,

function scrape_content() {
    const http = require('https')

    const options = {
        hostname: 'stackoverflow.com',
        port: 443,
        path: '/questions/71363220/is-there-a-way-to-open-html-in-a-browser-in-node',
        method: 'GET'
    }

    const req = http.request(options, function (res) {
        const chunks = []
        res.on("data", chunk => chunks.push(chunk))
        res.on("end", () => open_content_in_chrome(Buffer.concat(chunks).toString()))
    })

    req.end()
}

scrape_content()

async function open_content_in_chrome(content) {
    const ChromeLauncher = require('chrome-launcher')

    const chrome = await ChromeLauncher.launch()

    console.log(`Chrome debugging port running on ${chrome.port}`)

    const CDP = require('chrome-remote-interface')

    let client
    try {
        // connect to chrome debugging port
        client = await CDP({
            port: chrome.port
        })

        const { Page } = client

        const { frameId } = await Page.navigate({ url: 'about:blank' })
        await Page.setDocumentContent({ frameId, html: content })

    } catch (err) {
        console.error(err)
    } finally {
        if (client) {
            await client.close()
        }
    }
}
  • Related