const express = require("express");
const cheerio = require("cheerio");
const axios = require("axios");
const cors = require("cors");
const app = express();
async function getSearchResults(searchFor) {
const url = `https://www.bol.com/be/nl/s/?searchtext=airpods pro`;
const respone = await axios.get(url);
const $=cheerio.load(respone.data);
// verwreken van het resultaat in een array
const ul = $('.product-list');
ul.find('li .product-item__image .h-o-hidden a .skeleton-image').each((i, element) => {
const $element = $(element);
const a = $element.find('img').attr('src');
console.log(a);
});
}
I also give the HTML of the website I'm trying to scrape So far it recognizes that there are indeed ~20 pictures but it gives an undifined value...
CodePudding user response:
You'll need to reference the actual image tag, which is another level down.
Try the following:
ul.find('li .product-item__image .h-o-hidden a .skeleton-image').each((i, element) => {
const $element = $(element);
const a = $element.find('.skeleton-image__container').find('img').attr('src');
});
Or, if there's only one div under skeleton-image
class:
ul.find('li .product-item__image .h-o-hidden a .skeleton-image').each((i, element) => {
const $element = $(element);
const a = $element.find('div').find('img').attr('src');
});
Hope this helps.
CodePudding user response:
your problem is that first img inside this div does not have src attribute.
here is the working code:
const ul = $('.product-list');
ul.find('li .product-item__image .h-o-hidden a .skeleton-image').each((i, element) => {
const $element = $(element);
const a = $element.find('img').attr('data-src');
console.log(a);
});