I got a simple async function where I "scrapping" site from URLs. Everything works fine, but now I want to save results into my txt file.
I tried to do simply array where I able to push every result also errors;
Now I got a problem where should I do write to file.
I tried putting it to a separated function then do await function inside my async function but function with write to file i always fired first.
There is full code
const https = require("https");
const fs = require("fs");
const readline = require("readline");
const path = require("path");
let urls = [];
let results = [];
(async function readUrls() {
const fileStream = fs.createReadStream("urls.txt");
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity,
});
for await (let line of rl) {
urls.push(line);
}
for await (let url of urls) {
https
.get(url, (res) => {
const {
statusCode
} = res;
const contentType = res.headers["content-type"];
let error;
if (statusCode !== 200) {
error = new Error("Request Failed.\n" `Status Code: ${statusCode}`);
}
if (error) {
const firstPath = url.split("/")[7];
//there is array
results.push(firstPath);
//--------------
console.error("data : " firstPath " - " " nothing found");
res.resume();
return;
}
res.setEncoding("utf8");
let rawData = "";
res.on("data", (chunk) => {
rawData = chunk;
});
(async () => {
await res.on("end", () => {
try {
const parsedData = JSON.parse(rawData);
const parsedResult = parsedData["data"]["id"] " - " parsedData["data"]["price"];
//there is array
results.push(parsedResult);
//--------------
console.log("data : " parsedData["data"]["id"] " - " parsedData["data"]["price"]);
} catch (e) {
console.error(e.message);
}
});
})();
})
.on("error", (e) => {
console.error(`Got error: ${e.message}`);
});
}
})();
There is my simple function to write into file
fs.writeFile('result.txt', results, (new Date()), function (err) {
if (err) {
console.log("Error occurred", err);
}
console.log("File write successfull");
});
I tried do something
async function secondFunction(){
await firstFunction();
// wait for firstFunction...
};
What I want to achive? I want to scrape every url from my text file and get ID and Price ( this is simple JSON response into browser no html - it works ) At the end I want to save everything into text file.
CodePudding user response:
I made a version of your code that uses node-fetch to call the urls. I prefer this one as it is similar to what one can use on the web
To use it you should install it:
npm install node-fetch
const fetch = require("node-fetch"); // I prefer to use node-fetch for my calls
const fs = require("fs");
const readline = require("readline");
const path = require("path");
let urls = [];
let results = [];
(async function readUrls() {
const fileStream = fs.createReadStream("urls.txt");
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity,
});
for await (let line of rl) {
urls.push(line);
}
// Make the calls one after the other
for (let url of urls) {
try {
// We can call the urls with node-fetch and await the response
const res = await fetch(url);
const { status } = res;
let error;
if (status !== 200)
error = new Error("Request Failed.\n" `Status Code: ${statusCode}`);
if (error) {
const firstPath = url.split('/')[7];
results.push(firstPath);
console.error("data : " firstPath " - " " nothing found");
// As we are inside a loop here, we use continue instead of return
continue;
}
try {
// Here we try to take the response as json
const parsedData = await res.json();
const parsedResult = parsedData["data"]["id"] " - " parsedData["data"]["price"];
//there is array
results.push(parsedResult);
//--------------
console.log(`Data: ${parsedResult}`);
} catch (e) {
// In case we can't get the response as json we log the error
console.error(e.message);
}
} catch (httpError) {
//This is for when the call to fetch fails for some reason
console.error(httpError.message);
}
}
// Here we join the results to a string so that we can save it properly to the file
const resultAsText = results.join("\n");
// Then after all the urls are processed we can write them to a file
fs.writeFile('result.txt', resultAsText, 'utf8', function (err) {
if (err) {
console.log("Error occurred", err);
} else {
console.log("File write successfull");
}
});
})();