I am scraping google maps places data, but what is happening is that it only returns me the first 10 results of the user reviews, not after that. I think there is some problem with scroll functionality.
const puppeteer = require('puppeteer');
function extractItems() {
const extractedElements = document.querySelectorAll('.MyEned span.wiI7pd');
const items = [];
for (let element of extractedElements) {
items.push(element.innerText);
}
return items;
}
async function scrapeItems(
page,
extractItems,
itemCount,
scrollDelay = 2000,
) {
let items = [];
try {
let previousHeight;
while (items.length < itemCount) {
items = await page.evaluate(extractItems);
previousHeight = await page.evaluate('div.m6QErb.DxyBCb.scrollHeight');//selector for scroller
await page.evaluate('window.scrollTo(0, div.m6QErb.DxyBCb.scrollHeight)');
await page.waitForFunction(`div.m6QErb.DxyBCb.scrollHeight > ${previousHeight}`);
await page.waitForTimeout(scrollDelay);
}
} catch(e) { }
return items;
}
(async () => {
let browser = await puppeteer.connect();
browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const [page] = await browser.pages();
page.setViewport({ width: 1280, height: 926 });
await page.goto('https://www.google.com/maps/place/Ace Florist & Flower Delivery/@40.8265438,-73.5011026,15z/data=!4m7!3m6!1s0x0:0x9062074cae10c10f!8m2!3d40.8265438!4d-73.5011026!9m1!1b1');
// Auto-scroll and extract desired items from the page. Currently set to extract eight items.
const items = await scrapeItems(page, extractItems, 30);
console.log(items)
await browser.close();
})();
CodePudding user response:
So I just found out that I have to add document.querySelector
while evaluating
scroll height and also when checking the scroll height is greater than previous Height.
items = await page.evaluate(extractItems);
previousHeight = await page.$eval("div.m6QErb.DxyBCb" , (els) =>
els.map((e) => e.scrollHeight)
)
await page.evaluate(`document.querySelector("div.m6QErb.DxyBCb").scrollTo(0, ${previousHeight[0]})`);
await page.waitForFunction(`document.querySelector("div.m6QErb.DxyBCb").scrollHeight > ${previousHeight[0]}`);
await page.waitForTimeout(scrollDelay);
CodePudding user response:
Thi is working:
'use strict'
const puppeteer = require('puppeteer');
function extractItems() {
const extractedElements = document.querySelectorAll('.MyEned span.wiI7pd');
const items = [];
for (let element of extractedElements) {
items.push(element.innerText);
}
return items;
}
async function scrapeItems(
page,
extractItems,
itemCount,
scrollDelay = 2000,
) {
let items = [];
try {
let previousHeight;
while (items.length < itemCount) {
console.log(`items.length: ${items.length} itemCount: ${itemCount}`)
items = await page.evaluate(extractItems);
previousHeight = await page.evaluate(() => {
const scroller = document.querySelector('div.m6QErb.DxyBCb')
return scroller.scrollHeight
})
await page.evaluate(`document.querySelector("div.m6QErb.DxyBCb").scrollTo(0, ${previousHeight})`);
await page.waitForFunction(`document.querySelector("div.m6QErb.DxyBCb").scrollHeight > ${previousHeight}`);
await page.waitForTimeout(scrollDelay);
}
} catch(e) { }
return items;
}
(async () => {
const browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const [page] = await browser.pages();
page.setViewport({ width: 1280, height: 926 });
await page.goto('https://www.google.com/maps/place/Ace Florist & Flower Delivery/@40.8265438,-73.5011026,15z/data=!4m7!3m6!1s0x0:0x9062074cae10c10f!8m2!3d40.8265438!4d-73.5011026!9m1!1b1');
// Auto-scroll and extract desired items from the page. Currently set to extract eight items.
const items = await scrapeItems(page, extractItems, 30);
console.log(items)
await browser.close();
})();