Home > Software design >  Nodejs, puppeteer - Saving the result in a json file
Nodejs, puppeteer - Saving the result in a json file

Time:12-24

I am using Puppeteer to build a basic web-scraper on Node js. I download advertisement data from all subpages of a given category:

`

const puppeteer = require('puppeteer');
 
let scrape = async () => {
    const browser = await puppeteer.launch({ headless: true });
    const page = await browser.newPage();
 
    await page.goto('https://www.olx.pl/d/motoryzacja/samochody/');
 
    var results = [];
    var lastPageNumber = 25;    
    for (let index = 0; index < lastPageNumber; index  ) {  
        await page.waitFor(1000);
        results = results.concat(await extractedEvaluateCall(page));
        if (index != lastPageNumber - 1) {
            //await page.waitFor(2000); //error: page.waitFor is not a function? wersja puppeteer nie wspiera?
            await page.click('#root > div.css-50cyfj > div.css-88vtd4 > form > div:nth-child(5) > div > section.css-j8u5qq > div > ul > li:nth-child(2)');
        }
    }
 
    browser.close();
    return results;
};
 
async function extractedEvaluateCall(page) {
    return page.evaluate(() => {
        let data = [];
        let elements = document.querySelectorAll('a');
 
        for (var element of elements) {
            let offer = element.innerText;
            data.push({offer});
        }
 
        return data;
    });
}
 
scrape().then((value) => {
    console.log(value);
    console.log('Collection length: '   value.length);
    console.log(value[0]);
    console.log(value[value.length - 1]);
});

` RESULT:

},
  {
    offer: 'Wolkswagen passat fl 1.9tdi 130km\n'  
      '\n'  
      '3 700 zł\n'  
      '\n'  
      'Giżycko - Dzisiaj o 17:10\n'  
      '\n'  
      '2002 - 532 321 km\n'  
      'Obserwuj'
  },
  {
    offer: 'Renault Scenic Automat, piękny stan, klimatronik, oryginał, bezwypadkowy, opłacony\n'  
      '\n'  
      '11 999 zł\n'  
      '\n'  
      'Ryki - Dzisiaj o 17:10\n'  
      '\n'  
      '2006 - 175 000 km\n'  
      'Obserwuj'
  },
  5100 more items

How to export console data to json file? The first thing that comes to mind is JSON.parse() but I can't implement it when fetching from html

CodePudding user response:

You should convert your object to string with JSON.stringify() first:

let json = JSON.stringify(results);

Then, write it in a json file with file system:

let fs = require('fs');
fs.writeFile('filename.json', json, 'utf8', function(err) {
    if (err) throw err;
    console.log('complete');
});
  • Related