Home > Software design >  Loop through pages to return all the product links
Loop through pages to return all the product links

Time:11-06

The goal is to obtain all product links from all pages in the pagination. So far I have managed to print information to the console with console.log (links). However, since I am completely new to this field and completely inexperienced, I have a problem how to pass some value back with the return command. return links.

With console.log(links) I get a warning: getLinks is not iterable

const puppeteer = require('puppeteer')

async function getLinks(){
    const browser = await puppeteer.launch({headless: false, defaultViewport: null});
    const page = await browser.newPage();

    const url = "https://example.com/product-category?p=1&nidx"
    

    await page.goto(url)

    while(await page.$('.change-country-buttons > button:nth-child(1)')){
        await page.waitForTimeout(2000);
        await page.keyboard.press('ArrowDown');
        await page.waitForSelector('.change-country-buttons');
        await page.waitForTimeout(2000);
        await page.click('.change-country-buttons > button:nth-child(1)');
        await page.waitForTimeout(2000);
    }
    while(await page.$(".pagination .pagination--next")){
        await page.waitForTimeout(2000);
        await page.evaluate(() => {
            document.querySelector(".pagination .pagination--next").scrollIntoView();
    });
    await page.waitForTimeout(1000);
    await page.waitForSelector(".pagination .pagination--next")
    await page.waitForTimeout(500);
    await page.click('.pagination .pagination--next')
    const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
    await page.waitForTimeout(1500);
    console.log(links)
    }
}
return links // Is returning links only form the first page and then the loop stops

I tried something with Promise.all () but it wasn't entirely clear to me how to do it.

Please help and be gentle as I am just starting to learn the basics

CodePudding user response:

You need to create an array and push all the helmet links from each page onto it.

This tested successfully for me.

const puppeteer = require('puppeteer')

async function getLinks(){
    const browser = await puppeteer.launch({headless: false, defaultViewport: null});
    const page = await browser.newPage();

    const url = "https://www.motocard.com/en/motorcycle-road-gear/helmets/precio_150-3200/full-face?p=1&nidx"
    
    var all_links = [];
    
    await page.goto(url);

    while(await page.$('.change-country-buttons > button:nth-child(1)')){
        await page.waitForTimeout(2000);
        await page.keyboard.press('ArrowDown');
        await page.waitForSelector('.change-country-buttons');
        await page.waitForTimeout(2000);
        await page.click('.change-country-buttons > button:nth-child(1)');
        await page.waitForTimeout(2000);
    }
    while(await page.$(".pagination .pagination--next")){
        await page.waitForTimeout(2000);
        await page.evaluate(() => {
            document.querySelector(".pagination .pagination--next").scrollIntoView();
        });
        await page.waitForTimeout(1000);
        await page.waitForSelector(".pagination .pagination--next")
        await page.waitForTimeout(500);
        await page.click('.pagination .pagination--next')
        const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
        await page.waitForTimeout(1500);
        //console.log(links)
        all_links.push(...links);
    }
    return all_links;
}

(async ()=>{
    var links = await getLinks();
    console.log('done');
    console.log(links);
})();

Stay safe out there. Cheers.

  • Related