I am currently trying to develop a google cloud function to parse multipart files (excel format or csv) in order to populate the firestore database.
I am using busboy in a helper function to parse the file, convert it to json and return it to the main function.
Everything goes well until I am trying to return the parsed data. I thought the most logic way of doing was to return the data from the busboy 'finish' event but it seems not to return the data as once back in the main function it is undefined. I first thought of some issue related to asynchronous code execution but when I tried to only print the data in the busboy finish event it worked properly.
I've tried to find some related content online but unfortunately didnt success. Here is my helper function :
// Takes a multipart request and sends back redable data
const processRequest = (req) => {
const busboy = Busboy({headers: req.headers});
formats = ['application/vnd.ms-excel', 'text/csv', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'];
var finalData;
// fieldname is the request key name of the file
// file is the stream
// fname is the name of the fileq
busboy.on('file', (fieldname, file, fname) => {
// Checks if file is right format
if(!formats.includes(fname.mimeType)) throw new FileFormatError('File must be excel or csv');
bytes = [];
// Checks that the request key is the right one
if(fieldname == 'file') {
// Data is the actual bytes, adds it to the buffer each time received
file.on('data', (data) => {
bytes.push(data);
});
// Concatenates the bytes into a buffer and reads data given mimetype
file.on('end', async () => {
buffer = Buffer.concat(bytes);
if(fname.mimeType === 'application/vnd.ms-excel' ||
fname.mimeType === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') {
workbook = XLSX.read(buffer, {type: 'buffer'});
json = excelToJson(workbook);
console.log(json);
finalData = json;
}
if (fname.mimeType === 'text/csv') {
var csv = [];
const stream = Readable.from(buffer.toString());
stream.pipe(CSV.parse({delimiter: ','}))
.on('error', (err) => {
console.log('csv parsing error');
console.log(err.message);
}).on('data', (row) => {
csv.push(row);
}).on('end', () => {
console.log('csv file properly processed');
console.log(csv);
// CSV PARSING LOGIC TO COME, JUST TESTING RIGHT NOW
finalData = csv;
});
}
});
}
});
busboy.on('finish', () => {
console.log('busboy finished');
return finalData;
// WHEN ONLY PRINTED THE DATA IS PRESENT AND DISPLAYS PROPERLY HERE
})
// Processes request body bytes
busboy.end(req.rawBody);
}
There must be something I am misunderstanding but as of yet I cannot point out what.
Thanks in advance for your time :)
CodePudding user response:
You're not waiting for your CSV parsing to actually finish.
It would be better to refactor your async code to use async
/await
.
Since you're using libraries that might only support callback-style async, you'll need to do some new Promise
wrapping yourself.
Understandably, I haven't tested the below code, but something like this...
/**
* Parse the given buffer as a CSV, return a promise of rows
*/
function parseCSV(buffer) {
return new Promise((resolve, reject) => {
const csv = [];
const stream = Readable.from(buffer.toString());
stream
.pipe("text/csv".parse({ delimiter: "," }))
.on("error", reject)
.on("data", (row) => csv.push(row))
.on("end", () => resolve(csv));
});
}
/**
* Parse the given buffer as a spreadsheet, return a promise
*/
async function parseSpreadsheet(mimeType, buffer) {
if (
mimeType === "application/vnd.ms-excel" ||
mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
) {
const workbook = XLSX.read(buffer, { type: "buffer" });
return excelToJson(workbook);
}
if (mimeType === "text/csv") {
return parseCSV(buffer);
}
throw new Error(`Unknown mime type ${mimeType}`);
}
/**
* Get the bytes of the field `fieldName` in the request.
*/
function getFileFromRequest(req, fieldName) {
return new Promise((resolve, reject) => {
const busboy = Busboy({ headers: req.headers });
busboy.on("file", (name, file, info) => {
// Only process the field we care about
if (name != fieldName) {
return;
}
const bytes = [];
file.on("data", (data) => bytes.push(data));
file.on("end", () =>
resolve({
info,
buffer: Buffer.concat(bytes),
}),
);
file.on("error", reject);
});
busboy.end(req.rawBody);
});
}
async function parseRequest(req) {
// (1) Get the file as a buffer
const { info, buffer } = await getFileFromRequest(req, "file");
// (2) Try parsing it as a spreadsheet
const data = await parseSpreadsheet(info.mimeType, buffer);
// (3) Do something with the data?
return data;
}