I'm following this tutorial to gather a file from my Azure Data Lake Storage, then temporarily store it locally in my Azure Function, perform some operations within that file (which is a .xlsx file) and some more things that aren't relevant right now. I'm getting the following error when trying to locally open the downloaded file with the ExcelJS library:
Result: FailureException: Error: End of data reached (data length = 100338, asked index = 161705). Corrupted zip
The full piece of code, is as follows:
const Excel = require('exceljs');
const wb = new Excel.Workbook();
const fs = require('fs');
const path = require('path');
const { StorageSharedKeyCredential, DataLakeServiceClient } = require("@azure/storage-file-datalake");
module.exports = async function (context, req) {
const accountName = "xx";
const accountKey = "xx";
// Connect to the storage account
const datalakeServiceClient = GetDataLakeServiceClient(accountName, accountKey);
// Get the container
const containerPath = 'my-path';
const fileSystemClient = datalakeServiceClient.getFileSystemClient(containerPath);
// Obtain the file
const fileClient = fileSystemClient.getFileClient('my-file.xlsx');
const downloadResponse = await fileClient.read();
const downloaded = await streamToString(downloadResponse.readableStreamBody);
async function streamToString(readableStream) {
return new Promise((resolve, reject) => {
const chunks = [];
readableStream.on("data", (data) => {
chunks.push(data.toString());
});
readableStream.on("end", () => {
resolve(chunks.join(""));
});
readableStream.on("error", reject);
});
}
// Temporarily create it
fs.writeFileSync('excel.xlsx', downloaded, function (err) {
if (err) throw err;
});
// Proceed with the operations on the excel
var filePath = path.resolve('/home/site/wwwroot', 'excel.xlsx')
await wb.xlsx.readFile(filePath).then(function() {
context.log('here2');
var sh = wb.getWorksheet('Sheet 1');
context.log(sh.getRow(7));
});
// Delete the temporary file
fs.unlinkSync('excel.xlsx', function (err) {
if (err) throw err;
});
}
/**
* Function to create an instance of DataLakeServiceClient
* @param accountName The name of the storage account
* @param accountKey Access Key for the storage account
*/
function GetDataLakeServiceClient(accountName, accountKey) {
const sharedKeyCredential =
new StorageSharedKeyCredential(accountName, accountKey);
const datalakeServiceClient = new DataLakeServiceClient(
`https://${accountName}.dfs.core.windows.net`, sharedKeyCredential);
return datalakeServiceClient;
}
The file is present if I navigate to the path /home/site/wwwroot/, but if I try to download and open it, it says there's an error with the file. This means it's not being copied correctly from the data lake.
Any input or help would be appreciated. Many thanks.
CodePudding user response:
Please try to change your streamToString
to something like below:
async function streamToString(readableStream) {
return new Promise((resolve, reject) => {
let data = Buffer.from([]);
readableStream.on("data", (dataBuffer) => {
data = Buffer.concat([data, dataBuffer], data.length dataBuffer.length);
});
readableStream.on("end", () => {
resolve(data);
});
readableStream.on("error", reject);
});
}