I have an array containing objects that I want to iterate through to perform axios calls and manipulate the response with functions. Unfortunately, the final output is an array containing multiple nested arrays with the same repeated object, that has only the results of the first element of the array newspapers.
const newspapers= [{
"name": "CNN",
"address": "https://edition.cnn.com/specials/world/cnn-climate",
"base": "https://edition.cnn.com"
},
{
"name": "The Guardian",
"address": "https://www.theguardian.com/environment/climate-crisis",
"base": "https://www.theguardian.com"
}, etc...]
// Initiate global variable for the results
let articles = [];
// Function to remove duplicates, get img if present and consolidate data
function storeData(element, base, name) {
const results = [];
element.find("style").remove();
const title = element.text();
const urlRaw = element.attr("href");
const url =
urlRaw.includes("www") || urlRaw.includes("http") ? urlRaw : base urlRaw;
// Check for duplicated url
if (tempUrls.indexOf(url) === -1) {
// Check for social media links and skip
if (!exceptions.some((el) => url.toLowerCase().includes(el))) {
tempUrls.push(url);
// Get img if child of anchor tag
const imageElement = element.find("img");
if (imageElement.length > 0) {
// Get the src attribute of the image element
results.push({
title,
url,
source: name,
imgUrl: getImageFromElement(imageElement),
});
} else {
results.push({
title,
url: url,
source: name,
});
}
}
}
return results;
}
// Cheerio function
function getElementsCheerio(html, base, name, searchterms) {
const $ = cheerio.load(html);
const termsAlso = searchterms.also;
const termsOnly = searchterms.only;
const concatInfo = [];
termsAlso.forEach((term) => {
$(`a:contains("climate"):contains(${term})`).each(function () {
const tempData = storeData($(this), base, name);
tempData.map((el) => concatInfo.push(el));
});
});
termsOnly.forEach((term) => {
$(`a:contains(${term})`).each(function () {
const tempData = storeData($(this), base, name);
tempData.map((el) => concatInfo.push(el));
});
});
return concatInfo;
}
// API
app.get("/news", (req, res) => {
// Query String
const query = checkForQuery(req);
const wordsToSearch = query ? verifyQuery(query) : "";
Promise.all(
newspapers.map(({ name, address, base }) =>
axios
.get(address, {
headers: { "Accept-Encoding": "gzip,deflate,compress" },
})
.then((res) => {
const html = res.data;
console.log({ name, address, base });
const scrappedElements = getElementsCheerio(
html,
base,
name,
wordsToSearch
);
scrappedElements.map((item) => articles.push(item));
return articles;
})
)
).then((articles) => {
res.json(articles);
});
});
When I log the loop I see it's going through correctly, however the same two articles retrieved from the first newspaper also appear for all others:
console.log / result:
{
name: 'CNN',
address: 'https://edition.cnn.com/specials/world/cnn-climate',
base: 'https://edition.cnn.com'
}
[{title: article1,
url: article1,
source: article1,
imgUrl: article1},
{title: article2,
url: article2,
source: article2,
imgUrl: article2}]
{
name: 'The Times',
address: 'https://www.thetimes.co.uk/environment/climate-change',
base: 'https://www.thetimes.co.uk'
}
[{title: article1,
url: article1,
source: article1,
imgUrl: article1},
{title: article2,
url: article2,
source: article2,
imgUrl: article2}]
etc...
How can I fix this? Why even though the new object containing another newspaper's information is passing through, it collects always the same articles from the first?
Any assistance is deeply appreciated. I'm a frontend developer, doing this for learning purposes and I understand I might lack some basic knowledge that could avoid this silly issue. Thank you in advance!
CodePudding user response:
You don't need articles
Change this:
scrappedElements.map((item) => articles.push(item));
return articles;
to this
return scrappedElements