I can't work out if its possible to do this... I am currently getting this output
{"BUSINESS":[{"title":"Luling City Market"},{"image":"https://i1.ypcdn.com/blob/f714c6a156c8bb5fb93178c0b1c4bc038dd96ec9_150x150_crop.jpg"}]}{"BUSINESS":[{"title":"Barry's Pizza"},{"image":"https://i1.ypcdn.com/blob/3b90e6b5687f5edf98e1abf1e9f74ddaf8f90e1d_150x150_crop.jpg"}]}{"BUSINESS":[{"title":"Luling City Market"},{"image":"https://i2.ypcdn.com/blob/f714c6a156c8bb5fb93178c0b1c4bc038dd96ec9_150x150_crop.jpg"}]}
From:
const puppeteer = require('puppeteer');
var fs = require('fs');
async function scrapeProduct(url){
const file = fs.readFileSync('restaurants.json')
var scrapeTitle = '//*[@id="main-header"]/article/div/h1';
var scrapeImage = '//*[@id="main-header"]/article/a/img';
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const [el] = await page.$x(scrapeImage);
const src = await el.getProperty('src');
const image = await src.jsonValue();
const [el2] = await page.$x(scrapeTitle);
const txt = await el2.getProperty('textContent');
const title = await txt.jsonValue();
console.log({image, title});
var product = {
'BUSINESS_TITLE' : [title],
'BUSINESS_IMAGE' : [image]
}
if (file.length == 0){
fs.writeFile('restaurants.json', JSON.stringify(product), (err) =>{
if (err) { console.error(err); return; };
console.log('Created File');
})
}else{
fs.appendFile('restaurants.json', JSON.stringify(product), (err) =>{
if (err) { console.error(err); return; };
console.log('Updated File');
})
}
browser.close();
}
var urls = [
'https://www.yellowpages.com/houston-tx/mip/luling-city-market-1615603?lid=1000597596315',
'https://www.yellowpages.com/houston-tx/mip/barrys-pizza-472089383?lid=1001593326336'
]
for(var val in urls){
scrapeProduct(urls[val]);
}
Of which is not the right way to format a JSON file...
To ensure that it formatted correctly, it should be more like this:
{"BUSINESS-TITLE":["1","2"],"BUSINESS-IMAGE":["1","2"]}
CodePudding user response:
Instead of append you can do first JSON.parse(file)
then update the content with the new product and finally
fs.writeFile('restaurants.json', JSON.stringify(updatedProducts))
CodePudding user response:
The simplest and valid way of doing this would be in three steps:
Step 1: Parsing the json file to an array.
Step 2: Appending the object to that array.
Step 3: Stringifying and saving that to your file.