I am using Puppeteer to build a basic web-scraper on Node js. I download advertisement data from all subpages of a given category:
`
const puppeteer = require('puppeteer');
let scrape = async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto('https://www.olx.pl/d/motoryzacja/samochody/');
var results = [];
var lastPageNumber = 25;
for (let index = 0; index < lastPageNumber; index ) {
await page.waitFor(1000);
results = results.concat(await extractedEvaluateCall(page));
if (index != lastPageNumber - 1) {
//await page.waitFor(2000); //error: page.waitFor is not a function? wersja puppeteer nie wspiera?
await page.click('#root > div.css-50cyfj > div.css-88vtd4 > form > div:nth-child(5) > div > section.css-j8u5qq > div > ul > li:nth-child(2)');
}
}
browser.close();
return results;
};
async function extractedEvaluateCall(page) {
return page.evaluate(() => {
let data = [];
let elements = document.querySelectorAll('a');
for (var element of elements) {
let offer = element.innerText;
data.push({offer});
}
return data;
});
}
scrape().then((value) => {
console.log(value);
console.log('Collection length: ' value.length);
console.log(value[0]);
console.log(value[value.length - 1]);
});
` RESULT:
},
{
offer: 'Wolkswagen passat fl 1.9tdi 130km\n'
'\n'
'3 700 zł\n'
'\n'
'Giżycko - Dzisiaj o 17:10\n'
'\n'
'2002 - 532 321 km\n'
'Obserwuj'
},
{
offer: 'Renault Scenic Automat, piękny stan, klimatronik, oryginał, bezwypadkowy, opłacony\n'
'\n'
'11 999 zł\n'
'\n'
'Ryki - Dzisiaj o 17:10\n'
'\n'
'2006 - 175 000 km\n'
'Obserwuj'
},
5100 more items
How to export console data to json file? The first thing that comes to mind is JSON.parse() but I can't implement it when fetching from html
CodePudding user response:
You should convert your object to string with JSON.stringify()
first:
let json = JSON.stringify(results);
Then, write it in a json file with file system
:
let fs = require('fs');
fs.writeFile('filename.json', json, 'utf8', function(err) {
if (err) throw err;
console.log('complete');
});