"InvalidParameterType" error for image files sent as blob to AWS Textract from external so-CodePudding

CURRENTLY

I am trying to get AWS Textract working for images supplied from a function in Google Scripts, that is sent to a Lambda resolved. I am following documentation on https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Textract.html#analyzeDocument-property

My Google Scripts code:

function googleFunction(id) {
  
  let file = DriveApp.getFileById(id);
  console.log("File is a "   file.getMimeType());
  let blob = file.getBlob();

  let params = {
    doc: blob,
  };

  var options = {
    method: "PUT",
    "Content-Type": "application/json",
    payload: JSON.stringify(params),
  };

  let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}

My Lambda resolver code:

"use strict";

const AWS = require("aws-sdk");

exports.handler = async (event) => {
  let params = JSON.parse(event.body);
  console.log("Parse as document...");
  let textract = new AWS.Textract();
  let doc = params["doc"];
  let config = {
    Document: {
      Bytes: doc,
      FeatureTypes: ["TABLES"],
    }
  };
  textract.analyzeDocument(config, function (err, data) {
    console.log("analyzing...");
    if (err) {
      console.log(err, err.stack);
    }
    // an error occurred
    else {
      console.log("data:"   JSON.stringfy(data));
    } // successful response
  });
};

ISSUE

File is successfully sent from Google Scripts to Lambda, but the following error is returned:

"errorType": "InvalidParameterType",
"errorMessage": "Expected params.Document.Bytes to be a string, Buffer, Stream, Blob, or typed array object"

Questions

Is there a way of verifying what the format of the doc variable is, to ensure it meets AWS Textract's requirements?
Can anyone see a possible cause for the errors being returned?

NOTES

Textract works fine when the same file is uploaded to an S3 bucked, and supplied in the config using: S3Object: { Bucket: 'bucket_name', Name: 'file_name' }
I have confirmed the file is a JPEG

CodePudding user response：

Got it working with 2 changes:

added getBytes() to Google side code
added Buffer.from() to AWS side code

My Google Scripts code:

function googleFunction(id) {
  
  let file = DriveApp.getFileById(id);
  console.log("File is a "   file.getMimeType());
  let blob = file.getBlob().getBytes();

  let params = {
    doc: blob,
  };

  var options = {
    method: "PUT",
    "Content-Type": "application/json",
    payload: JSON.stringify(params),
  };

  let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}

My Lambda resolver code:

"use strict";

const AWS = require("aws-sdk");

exports.handler = async (event) => {
  let params = JSON.parse(event.body);
  console.log("Parse as document...");
  let textract = new AWS.Textract();
  let doc = params["doc"];
  let config = {
    Document: {
      Bytes: Buffer.from(doc),
      FeatureTypes: ["TABLES"],
    }
  };
  textract.analyzeDocument(config, function (err, data) {
    console.log("analyzing...");
    if (err) {
      console.log(err, err.stack);
    }
    // an error occurred
    else {
      console.log("data:"   JSON.stringfy(data));
    } // successful response
  });
};