i'm scraping some data from the web and want to push all the data that comes to my db.
I do it with node.js with help of thread_workers
.
each thread scrap data from another url
and when the scraping is done and I got all the objects that I need I post it.
in each thread when it's end to scrap the data i get an array with minimum 40000 objects ,
that i need to post when i start to post it to the db in some point it's throw me this error connect EADDRINUSE 18.62.228.13:443
full exception :
AxiosError: connect EADDRINUSE 18.62.228.13:443
at TCPConnectWrap.afterConnect [as oncomplete] (node:net:1161:16) {
port: 443,
address: '18.62.228.13',
syscall: 'connect',
code: 'EADDRINUSE',
errno: -4091,
config: {
transitional: {
silentJSONParsing: true,
forcedJSONParsing: true,
clarifyTimeoutError: false
},
adapter: [Function: httpAdapter],
transformRequest: [ [Function: transformRequest] ],
transformResponse: [ [Function: transformResponse] ],
timeout: 0,
xsrfCookieName: 'XSRF-TOKEN',
xsrfHeaderName: 'X-XSRF-TOKEN',
maxContentLength: -1,
maxBodyLength: -1,
env: { FormData: [Function] },
validateStatus: [Function: validateStatus],
headers: {
Accept: 'application/json, text/plain, */*',
'Content-Type': 'application/json',
'User-Agent': 'axios/0.27.2',
'Content-Length': 168
},
method: 'post'
},
request: <ref *1> Writable {
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: false,
needDrain: false,
ending: false,
ended: false,
finished: false,
destroyed: false,
decodeStrings: true,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: true,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 0,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: true,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
_events: [Object: null prototype] {
response: [Function: handleResponse],
error: [Function: handleRequestError],
socket: [Function: handleRequestSocket]
},
_eventsCount: 3,
_maxListeners: undefined,
_options: {
maxRedirects: 21,
maxBodyLength: 10485760,
protocol: 'https:',
path: '/m/insert',
method: 'POST',
headers: [Object],
agent: undefined,
agents: [Object],
auth: undefined,
hostname: 'lalalalala',
port: null,
nativeProtocols: [Object],
pathname: 'lalalalal'
},
_ended: false,
_ending: true,
_redirectCount: 0,
_redirects: [],
_requestBodyLength: 168,
_requestBodyBuffers: [ [Object] ],
_onNativeResponse: [Function (anonymous)],
_currentRequest: ClientRequest {
_events: [Object: null prototype],
_eventsCount: 7,
_maxListeners: undefined,
outputData: [],
outputSize: 0,
writable: true,
destroyed: false,
_last: true,
chunkedEncoding: false,
shouldKeepAlive: false,
maxRequestsOnConnectionReached: false,
_defaultKeepAlive: true,
useChunkedEncodingByDefault: true,
sendDate: false,
_removedConnection: false,
_removedContLen: false,
_removedTE: false,
_contentLength: null,
_hasBody: true,
_trailer: '',
finished: false,
_headerSent: true,
_closed: false,
socket: [TLSSocket],
_header: 'POST lalalala HTTP/1.1\r\n'
'Accept: application/json, text/plain, */*\r\n'
'Content-Type: application/json\r\n'
'User-Agent: axios/0.27.2\r\n'
'Content-Length: 168\r\n'
'Host: alalalala\r\n'
'Connection: close\r\n'
'\r\n',
_keepAliveTimeout: 0,
_onPendingData: [Function: nop],
agent: [Agent],
socketPath: undefined,
method: 'POST',
maxHeaderSize: undefined,
insecureHTTPParser: undefined,
path: '/manual/search/insert',
_ended: false,
res: null,
aborted: false,
timeoutCb: null,
upgradeOrConnect: false,
parser: null,
maxHeadersCount: null,
reusedSocket: false,
host: 'search.findmanual.guru',
protocol: 'https:',
_redirectable: [Circular *1],
[Symbol(kCapture)]: false,
[Symbol(kNeedDrain)]: false,
[Symbol(corked)]: 0,
[Symbol(kOutHeaders)]: [Object: null prototype]
},
_currentUrl: ,
[Symbol(kCapture)]: false
}
}
AxiosError: connect EADDRINUSE 18.62.228.13:443
at TCPConnectWrap.afterConnect [as oncomplete] (node:net:1161:16) {
port: 443,
address: '18.62.228.13',
syscall: 'connect',
code: 'EADDRINUSE',
errno: -4091,
config: {
transitional: {
silentJSONParsing: true,
forcedJSONParsing: true,
clarifyTimeoutError: false
},
adapter: [Function: httpAdapter],
transformRequest: [ [Function: transformRequest] ],
transformResponse: [ [Function: transformResponse] ],
timeout: 0,
xsrfCookieName: 'XSRF-TOKEN',
xsrfHeaderName: 'X-XSRF-TOKEN',
maxContentLength: -1,
maxBodyLength: -1,
env: { FormData: [Function] },
validateStatus: [Function: validateStatus],
headers: {
Accept: 'application/json, text/plain, */*',
'Content-Type': 'application/json',
'User-Agent': 'axios/0.27.2',
'Content-Length': 167
},
method: 'post',
request: <ref *1> Writable {
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: false,
needDrain: false,
ending: false,
ended: false,
finished: false,
destroyed: false,
decodeStrings: true,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: true,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 0,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: true,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
_events: [Object: null prototype] {
response: [Function: handleResponse],
error: [Function: handleRequestError],
socket: [Function: handleRequestSocket]
},
_eventsCount: 3,
_maxListeners: undefined,
_options: {
maxRedirects: 21,
maxBodyLength: 10485760,
protocol: 'https:',
path: 'insert',
method: 'POST',
headers: [Object],
agent: undefined,
agents: [Object],
auth: undefined,
hostname: 'lalalala',
port: null,
nativeProtocols: [Object],
pathname: 'lalallalaa'
},
_ended: false,
_ending: true,
_redirectCount: 0,
_redirects: [],
_requestBodyLength: 167,
_requestBodyBuffers: [ [Object] ],
_onNativeResponse: [Function (anonymous)],
_currentRequest: ClientRequest {
_events: [Object: null prototype],
_eventsCount: 7,
_maxListeners: undefined,
outputData: [],
outputSize: 0,
writable: true,
destroyed: false,
_last: true,
chunkedEncoding: false,
shouldKeepAlive: false,
maxRequestsOnConnectionReached: false,
_defaultKeepAlive: true,
useChunkedEncodingByDefault: true,
sendDate: false,
_removedConnection: false,
_removedContLen: false,
_removedTE: false,
_contentLength: null,
_hasBody: true,
_trailer: '',
finished: false,
_headerSent: true,
_closed: false,
socket: [TLSSocket],
_header: 'POST HTTP/1.1\r\n'
'Accept: application/json, text/plain, */*\r\n'
'Content-Type: application/json\r\n'
'User-Agent: axios/0.27.2\r\n'
'Content-Length: 167\r\n'
'Host: lallala\r\n'
'Connection: close\r\n'
'\r\n',
_keepAliveTimeout: 0,
_onPendingData: [Function: nop],
agent: [Agent],
socketPath: undefined,
method: 'POST',
maxHeaderSize: undefined,
insecureHTTPParser: undefined,
path: '/manual/search/insert',
_ended: false,
res: null,
aborted: false,
timeoutCb: null,
upgradeOrConnect: false,
parser: null,
maxHeadersCount: null,
reusedSocket: false,
host: 'sllalallaau',
protocol: 'https:',
_redirectable: [Circular *1],
[Symbol(kCapture)]: false,
[Symbol(kNeedDrain)]: false,
[Symbol(corked)]: 0,
[Symbol(kOutHeaders)]: [Object: null prototype]
},
_currentUrl: 'https://lalalalal',
[Symbol(kCapture)]: false
}
}
and here is my thread function :
async function parseData(url) {
const {data} = await axios.get(url ).catch(console.log)
const $ = cheerio.load(data);
const result = [];
const obj = {};
const header = $('h1.Uheader');
obj.brand = $(header[0]).text();
const category = $('div.cathead');
const categoryArray = [];
for (let i = 0; i < category.length; i ) {
categoryArray.push({"href": $(category[i]).children("a").attr('href'), "text":
$(category[i]).children("a").text().replace(/[^a-zA-Z0-9 ]/g, '').trim()})
}
for (let i = 0; i < categoryArray.length; i ) {
try {
obj.category = categoryArray[i].text;
const dataTwo = await axios.get("https://www.lalalala.com"
categoryArray[i].href).catch()
const cher = cheerio.load(dataTwo.data);
const aTag = cher('div.col-sm-2.mname')
for (let j = 0 ; j < aTag.length; j ) {
obj.url = "https://www.lalalala.com" $(aTag[j]).children("a").attr('href');
obj.title = obj.brand " " categoryArray[i].text " " $(aTag[j]).children("a").text().replace(/[^a-zA-Z0-9 ]/g, '').trim();
//result.push(obj)
axios.post("https://lalalalala", obj)
.then(data => console.log("ok " j))
.catch(e => {
console.log(e)
});
}
} catch (e) {
}
}
parentPort.postMessage({message : "done"});
}
please can someone please explain me why this is happning and how can I fix it ?
CodePudding user response:
Because (based on your comment here) your application is performing a lot of POST requests, it's important to limit the number of concurrent POST requests that can be performed. Otherwise, there may be (10's of) thousands of POST requests being executed at about the same time, which can cause problems (not only locally, but also on the server the requests are sent to).
The error connect EADDRINUSE
means that there are no more free TCP ports available on the local machine (a TCP connection to a remote server also requires a local TCP port, called an "ephemeral port" because it's only in use for the duration of the request) because there are too many requests pending.
An possible fix, and one that you said is working, is to wait for each POST request to finish before continuing the rest of the code:
await axios.post(…)
If that also doesn't help you'd have to look into job queues that can be configured to limit the number of concurrent requests/promises/jobs/...).