I have an axios get request which takes too long to resolve. This is for a site hosted on Heroku, which has a request timeout set at 30 seconds. The following code returns the request after about 50 seconds (which is surprisingly long, as there are only 21 urls to loop through in playerLink). Therefore, the request is never resolved on the live site.
Here is the Promise code:
const PORT = 8000
const axios = require('axios')
const cheerio = require('cheerio')
const express = require('express')
const cors = require('cors')
const app = express()
app.use(cors())
app.listen(PORT , () => console.log(`server running on PORT ${PORT}`))
const players = 'https://www.trinethunder.com/sports/sball/2021-22/teams/trine?view=roster'
const playerStats = 'https://www.trinethunder.com'
const playerLink = []
app.get('/players', (req, res) => {
function getPlayers() {
return new Promise((resolve, reject) => {
axios(players)
.then((response) => {
const html = response.data;
const $ = cheerio.load(html);
$("td.text.pinned-col > a", html).each(function () {
var link = $(this).attr("href");
//if link not yet in array, push to array
if (playerLink.indexOf(playerStats link) === -1) {
playerLink.push(playerStats link);
}
});
resolve()
})
.catch((err) => {
console.log(err);
});
});
}
function getPlayerStats() {
setTimeout(async () => {
const statsArray = []
for (let i = 0; i < playerLink.length; i ) {
await new Promise((resolve, reject) => {
axios.get
(playerLink[i])
.then((response) => {
const html = response.data;
const $ = cheerio.load(html);
const statName = [];
const statDesc = [];
const statNum = [];
$("h2 > span:nth-child(1)", html).each(function () {
var name = $(this).text();
statName.push(name);
});
$(".stat-title", html).each(function () {
var stat1 = $(this).text();
statDesc.push(stat1);
});
$(".stat-value", html).each(function () {
var stat2 = $(this).text();
statNum.push(stat2);
});
//Conditional is here because sometimes statsArray
//gets filled multiple times
if (statsArray.length < 63) {
statsArray.push(statName, statDesc, statNum);
}
resolve();
})
.catch((err) => console.log(err));
});
}
res.json(statsArray)
}, 400);
}
getPlayers()
.then(getPlayerStats)
.catch((err) => console.log(err));
});
Simplified Fetch statement for /players:
fetch('http://localhost:8000/players')
.then(response => response.json())
.then(data => {
console.log(data)
}).catch(err=>console.log(err))
Please let me know if you see anything that may be slowing down the execution of the request.
CodePudding user response:
I cleaned up the code, removed the setTimeout()
, set it up for maximum parallelization and instrumented it and made it so it can run stand-alone. After doing so, the log it produces is below and I see that getPlayers()
takes 2413ms and the synchronous cheerio processing of the individual player requests takes a total of 6087ms. From start to finish, the whole thing takes 9415ms on my system.
This is significantly faster than what you report. The biggest structural change I made is that all the individual getPlayerStat requests are made in parallel, not in serial which (if the target server can handle it) will shorten the total wait for network requests on getting player stats. I also removed the setTimeout()
as that seemed like a hack for some other problem and once the code is structured properly for asynchronous handling, that should not be necessary.
Here's the detailed log if you want to see where all the detailed time is spent. You can run the code below on your own system to see what you get there:
000000: begin all
000006: begin getPlayers()
002419: end getPlayers()
002419: begin getPlayerStats
002420: begin get https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
002423: begin get https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
002424: begin get https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
002424: begin get https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
002425: begin get https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
002426: begin get https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
002427: begin get https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
002427: begin get https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
002428: begin get https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
002429: begin get https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
002430: begin get https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
002430: begin get https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
002431: begin get https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
002432: begin get https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
002432: begin get https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
002433: begin get https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
002434: begin get https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
002434: begin get https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
002435: begin get https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
002436: begin get https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
002436: begin get https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
003251: after get https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
003596: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
003599: after get https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
003902: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
003905: after get https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
004200: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
004203: after get https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
004489: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
004492: after get https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
004771: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
004773: after get https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
005060: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
005063: after get https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
005345: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
005348: after get https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
005638: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
005643: after get https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
005943: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
005951: after get https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
006243: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
006245: after get https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
006541: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
006545: after get https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
006821: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
006824: after get https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
007111: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
007118: after get https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
007402: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
007411: after get https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
007681: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
007685: after get https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
007974: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
007976: after get https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
008265: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
008267: after get https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
008553: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
008555: after get https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
008838: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
008840: after get https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
009129: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
009131: after get https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
009415: after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
009415: end all
... data here
getPlayers() took 2413ms
cheerio processing took 6087ms
And, here's the stand-alone code that anyone can run:
const axios = require('axios');
const cheerio = require('cheerio');
const players = 'https://www.trinethunder.com/sports/sball/2021-22/teams/trine?view=roster'
const playerStats = 'https://www.trinethunder.com'
const zeroes = "000000000000000000000000000000";
function zeroPad(num, padLen) {
let str = num "";
let padNum = padLen - str.length;
if (padNum > 0) {
str = zeroes.slice(0, padNum) str;
}
return str;
}
const base = Date.now();
function log(...args) {
let delta = Date.now() - base;
let deltaPad = zeroPad(delta, 6);
console.log(deltaPad ": ", ...args);
}
let getPlayersT = 0;
let cheerioT = 0;
async function run() {
async function getPlayers() {
log("begin getPlayers()");
let startT = Date.now();
const playerLink = [];
const response = await axios(players);
const html = response.data;
const $ = cheerio.load(html);
$("td.text.pinned-col > a", html).each(function() {
const link = $(this).attr("href");
//if link not yet in array, push to array
if (playerLink.indexOf(playerStats link) === -1) {
playerLink.push(playerStats link);
}
});
log("end getPlayers()")
getPlayersT = Date.now() - startT;
return playerLink;
}
async function getPlayerStats(playerLink) {
log("begin getPlayerStats");
const statsArray = [];
await Promise.all(playerLink.map(async link => {
log(`begin get ${link}`)
const response = await axios.get(link);
log(`after get ${link}`)
const html = response.data;
const startT = Date.now();
const $ = cheerio.load(html);
const statName = [];
const statDesc = [];
const statNum = [];
$("h2 > span:nth-child(1)", html).each(function() {
var name = $(this).text();
statName.push(name);
});
$(".stat-title", html).each(function() {
var stat1 = $(this).text();
statDesc.push(stat1);
});
$(".stat-value", html).each(function() {
var stat2 = $(this).text();
statNum.push(stat2);
});
//Conditional is here because sometimes statsArray
//gets filled multiple times
if (statsArray.length < 63) {
statsArray.push(statName, statDesc, statNum);
}
cheerioT = Date.now() - startT;
log(`after cheerio parse ${link}`);
}));
return statsArray;
}
try {
log("begin all")
const playerLink = await getPlayers();
const statsArray = await getPlayerStats(playerLink);
log("end all")
return statsArray;
} catch (e) {
console.log(e);
}
}
run().then(result => {
console.log(result);
console.log(`getPlayers() took ${getPlayersT}ms`);
console.log(`cheerio processing took ${cheerioT}ms`);
}).catch(err => {
console.log("error");
});