Home > Back-end >  Scrolling not working while scraping google maps using puppeteer
Scrolling not working while scraping google maps using puppeteer

Time:05-30

I am scraping google maps places data, but what is happening is that it only returns me the first 10 results of the user reviews, not after that. I think there is some problem with scroll functionality.

const puppeteer = require('puppeteer');

function extractItems() {
  const extractedElements = document.querySelectorAll('.MyEned span.wiI7pd');
  const items = [];
  for (let element of extractedElements) {
    items.push(element.innerText);
  }
  return items;
}

async function scrapeItems(
  page,
  extractItems,
  itemCount,
  scrollDelay = 2000,
) {
  let items = [];
  try {
    let previousHeight;
    while (items.length < itemCount) {
      items = await page.evaluate(extractItems);
      previousHeight = await page.evaluate('div.m6QErb.DxyBCb.scrollHeight');//selector for scroller
      await page.evaluate('window.scrollTo(0, div.m6QErb.DxyBCb.scrollHeight)');
      await page.waitForFunction(`div.m6QErb.DxyBCb.scrollHeight > ${previousHeight}`);
      await page.waitForTimeout(scrollDelay);
    }
  } catch(e) { }
  return items;
}

(async () => {
  let browser = await puppeteer.connect();
  browser = await puppeteer.launch({
    headless: false,
    args: ['--no-sandbox', '--disable-setuid-sandbox'],
  });
  const [page] = await browser.pages();
  page.setViewport({ width: 1280, height: 926 });

  await page.goto('https://www.google.com/maps/place/Ace Florist & Flower Delivery/@40.8265438,-73.5011026,15z/data=!4m7!3m6!1s0x0:0x9062074cae10c10f!8m2!3d40.8265438!4d-73.5011026!9m1!1b1');

  // Auto-scroll and extract desired items from the page. Currently set to extract eight items.
  const items = await scrapeItems(page, extractItems, 30);

  console.log(items)

  await browser.close();
})();

CodePudding user response:

So I just found out that I have to add document.querySelector while evaluating scroll height and also when checking the scroll height is greater than previous Height.

    items = await page.evaluate(extractItems);
    previousHeight = await page.$eval("div.m6QErb.DxyBCb" , (els) => 
        els.map((e) => e.scrollHeight)
      )
    await page.evaluate(`document.querySelector("div.m6QErb.DxyBCb").scrollTo(0, ${previousHeight[0]})`);
    await page.waitForFunction(`document.querySelector("div.m6QErb.DxyBCb").scrollHeight > ${previousHeight[0]}`);
    await page.waitForTimeout(scrollDelay);

CodePudding user response:

Thi is working:

'use strict'

const puppeteer = require('puppeteer');
function extractItems() {
  const extractedElements = document.querySelectorAll('.MyEned span.wiI7pd');
  const items = [];
  for (let element of extractedElements) {
    items.push(element.innerText);
  }
  return items;
}
async function scrapeItems(
  page,
  extractItems,
  itemCount,
  scrollDelay = 2000,
) {
  let items = [];
  try {
    let previousHeight;
    while (items.length < itemCount) {
      console.log(`items.length: ${items.length} itemCount: ${itemCount}`)
      
      items = await page.evaluate(extractItems);

      previousHeight = await page.evaluate(() => { 
        const scroller = document.querySelector('div.m6QErb.DxyBCb') 
        return scroller.scrollHeight  
      })

      await page.evaluate(`document.querySelector("div.m6QErb.DxyBCb").scrollTo(0, ${previousHeight})`);
      await page.waitForFunction(`document.querySelector("div.m6QErb.DxyBCb").scrollHeight > ${previousHeight}`);
      await page.waitForTimeout(scrollDelay);

    }
  } catch(e) { }
  return items;
}


(async () => {
  const browser = await puppeteer.launch({
    headless: false,
    args: ['--no-sandbox', '--disable-setuid-sandbox'],
  });
  const [page] = await browser.pages();
  page.setViewport({ width: 1280, height: 926 });

  await page.goto('https://www.google.com/maps/place/Ace Florist & Flower Delivery/@40.8265438,-73.5011026,15z/data=!4m7!3m6!1s0x0:0x9062074cae10c10f!8m2!3d40.8265438!4d-73.5011026!9m1!1b1');

  // Auto-scroll and extract desired items from the page. Currently set to extract eight items.
  const items = await scrapeItems(page, extractItems, 30);

  console.log(items)

await browser.close();
})();
  • Related