Home > Mobile >  Upload any file type to S3 using Lambda
Upload any file type to S3 using Lambda

Time:11-28

I'm trying to upload files to S3 using API Gateway and Lambda, all the processes work fine until I arrive at the Lambda, my lambda looks like this:

import base64
import boto3
import os

s3_client = boto3.client('s3')
bucket_name = os.environ['S3_BUCKET_NAME']


def lambda_handler(event, context):
    contend_decode = base64.b64decode(event['body'])
    response = s3_client.put_object(Bucket=bucket_name, Body=contend_decode)
    print(response)

    return {
        'statusCode': 200,
        'body': 'File uploaded'
    }

When I upload for example an mp3 file I receive an error that says:

[ERROR] ValueError: string argument should contain only ASCII characters
Traceback (most recent call last):
  File "/var/task/lambda_function.py", line 10, in lambda_handler
    contend_decode = base64.b64decode(event['body'])
  File "/var/lang/lib/python3.8/base64.py", line 80, in b64decode
    s = _bytes_from_decode_data(s)
  File "/var/lang/lib/python3.8/base64.py", line 39, in _bytes_from_decode_data
    raise ValueError('string argument should contain only ASCII characters')
[ERROR] ValueError: string argument should contain only ASCII characters Traceback (most recent call last):   File "/var/task/lambda_function.py", line 10, in lambda_handler     contend_decode = base64.b64decode(event['body'])   File "/var/lang/lib/python3.8/base64.py", line 80, in b64decode     s = _bytes_from_decode_data(s)   File "/var/lang/lib/python3.8/base64.py", line 39, in _bytes_from_decode_data     raise ValueError('string argument should contain only ASCII characters')

Any idea about this issue, please?

Edit:

The content of the event is something like this:

{
    "resource": "/upload",
    "path": "/upload",
    "httpMethod": "POST",
    "headers": {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br",
        "CloudFront-Forwarded-Proto": "https",
        "CloudFront-Is-Desktop-Viewer": "true",
        "CloudFront-Is-Mobile-Viewer": "false",
        "CloudFront-Is-SmartTV-Viewer": "false",
        "CloudFront-Is-Tablet-Viewer": "false",
        "CloudFront-Viewer-ASN": "5410",
        "CloudFront-Viewer-Country": "FR",
        "Content-Type": "audio/mpeg",
        "Host": "um8xxxxpxx.execute-api.eu-west-1.amazonaws.com",
        "Postman-Token": "fe49e15f-82c6-44c7-8399-4b6fba9b9abc",
        "User-Agent": "PostmanRuntime/7.29.2",
        "Via": "1.1 12bc6711250373a4xxxxxxxxxx44504.cloudfront.net (CloudFront)",
        "X-Amz-Cf-Id": "5Zv2MVCxxxxxxxxxxxxyzMuv_CfIAxxxxxxxxxxxxJyz4JtHb-QImYZGQ==",
        "X-Amzn-Trace-Id": "Root=1-6383d306-4e81300e0000000c3262b7a45",
        "x-api-key": "g4KOPDl5zoB0E2QBpAAXSaESDFyGkR38f000",
        "X-Forwarded-For": "1XX.XX9.2XX.XX9, 1XX.XX6.XX5.XXX",
        "X-Forwarded-Port": "443",
        "X-Forwarded-Proto": "https"
    },
    "multiValueHeaders": {
        "Accept": [
            "*/*"
        ],
        "Accept-Encoding": [
            "gzip, deflate, br"
        ],
        "CloudFront-Forwarded-Proto": [
            "https"
        ],
        "CloudFront-Is-Desktop-Viewer": [
            "true"
        ],
        "CloudFront-Is-Mobile-Viewer": [
            "false"
        ],
        "CloudFront-Is-SmartTV-Viewer": [
            "false"
        ],
        "CloudFront-Is-Tablet-Viewer": [
            "false"
        ],
        "CloudFront-Viewer-ASN": [
            "5410"
        ],
        "CloudFront-Viewer-Country": [
            "FR"
        ],
        "Content-Type": [
            "audio/mpeg"
        ],
        "Host": [
            "um8xxxxpxx.execute-api.eu-west-1.amazonaws.com"
        ],
        "Postman-Token": [
            "fDDDDf-82c8-44c9-DDD1-4b6f9QASFF9abc"
        ],
        "User-Agent": [
            "PostmanRuntime/7.29.2"
        ],
        "Via": [
            "1.1 12bVASD16aeca2DDD44504.cloudfront.net (CloudFront)"
        ],
        "X-Amz-Cf-Id": [
            "5Zv2MVCnaDDDzMuv_CfIA6iC89CiUnjDDDAZXAb-QImYZGQ=="
        ],
        "X-Amzn-Trace-Id": [
            "Root=1-6383AZDD-4e81002e022374c326hu8a45"
        ],
        "x-api-key": [
            "g4KOPDl5zoBia3cT4pYMkynzyGkX00aa"
        ],
        "X-Forwarded-For": [
            "1XX.XX9.2XX.XX9, 1XX.XX6.XX5.XXX"
        ],
        "X-Forwarded-Port": [
            "443"
        ],
        "X-Forwarded-Proto": [
            "https"
        ]
    },
    "queryStringParameters": "None",
    "multiValueQueryStringParameters": "None",
    "pathParameters": "None",
    "stageVariables": "None",
    "requestContext": {
        "resourceId": "adddazq",
        "resourcePath": "/upload",
        "httpMethod": "POST",
        "extendedRequestId": "cR3o-zddEFgazz=",
        "requestTime": "27/Nov/2022:21:13:42  0000",
        "path": "/dev/upload",
        "accountId": "114782879802",
        "protocol": "HTTP/1.1",
        "stage": "dev",
        "domainPrefix": "ua8xjwxraf",
        "requestTimeEpoch": 1669583622098,
        "requestId": "23e099f9-eda4-42b2-8b4f-b1aaea589978",
        "identity": {
            "cognitoIdentityPoolId": "None",
            "cognitoIdentityId": "None",
            "apiKey": "h4KOPDl5zoqsdT4pYMkynzdddaz8f95560",
            "principalOrgId": "None",
            "cognitoAuthenticationType": "None",
            "userArn": "None",
            "apiKeyId": "z887qsddox4",
            "userAgent": "PostmanRuntime/7.29.2",
            "accountId": "None",
            "caller": "None",
            "sourceIp": "176.139.21.129",
            "accessKey": "None",
            "cognitoAuthenticationProvider": "None",
            "user": "None"
        },
        "domainName": "um8xxxxpxx.execute-api.eu-west-1.amazonaws.com",
        "apiId": "um8xxxxpxx"
    },
    "body": "\x04\x08-P�\x10,Gh�m\x0c\x06K����Te�U�-��\r\x01�Y��l�,3�\x11�Q�4$�........6��\x1872Ip�d�p\x1d�M�PX�0`�x�0����d�\x0f�\x0c.ǃ��\x12\x00\x00\r \x00\x00\x01\x18��........",
    "isBase64Encoded": "False"
}

Note: I put just a little bit of characters that exist in the body, just for demonstration purpose.

CodePudding user response:

The Error

So I still think that John Rotenstein's answer is objectively correct, ie the problem is that you can't decode event['body'] into a string, because its a string in the form of bytes that have non-ascii characters, and thats why it is throwing an error.

If you look at event['body'] you should be able to maybe piece that much together:

"\x04\x08-P�\x10,Gh�m\x0c\x06K����Te�U�-��\r\x01�Y��l�,3�\x11�Q�4$�........6��\x1872Ip�d�p\x1d�M�PX�0`�x�0����d�\x0f�\x0c.ǃ��\x12\x00\x00\r \x00\x00\x01\x18��........"

If you're unfamiliar with this, this string is actually decoded bytes. Something that you get from doing, with the caveat that it was casted to a string, so its no longer a bytes like object:

a = "hello world"
b = a.encode("utf-8")
# or
c = bytes(a, "utf-8")
# or - the one below I think defaults to utf8
a = b"hello world"

Thing is, I don't know what encoding it was using to decode it into bytes, and its unclear as to if I can expect body to be bytes every time or if it would be a base64 string as isBase64Encoded might would leave me to believe. I'm not 100% certain, but my assumption is that if you do something like the below, granted the resulting decoded string may not be base64, you can get a base64 string output:

if not event['isBase64Encoded']:
    event['body'] = bytes(event[body], "whatever that encoding is").decode()
    # b64encode takes a string and converts it to a bytes like object.
    # b64decode takes a bytes like object and converts it to a string.
    event['body'] = base64.b64decode(event['body'])
print(event['body'])

You should get whatever that decoded string (ie bytes) currently is, but you have to know what was used to decode it.


Pushing Objects to the Bucket

However, you seem to also want to push those bytes to a bucket - the docs:

response = client.put_object(
    Body=bytes(event["body"], encoding),
    Bucket="my_bucket",
    #ContentEncoding=event["multiValueHeaders"]["Accept-Encoding"],
    ContentType=event["multiValueHeaders"]["Content-Type"],
    Key="my/object/name.mp4"
)

So realistically, set all of those key word values and you should be golden - you don't have to run a base64 decode operation in this instance (based on what was returned in your event - you might if it actually was encoded as a base64 string), just pass put_object() the bytes. Here is a link to what ContentEncoding is, compared to ContentType, which may shed some light on whether or not you should use it or need to use it.


What Your Function Might Should Be

You shouldn't use such generalized try / except statements like I did below, but if it really bothers you, you can hunt down what those errors throw and add it in yourself or remove them completely, but conceptually, this should be what you want.

import base64
import boto3
import os

s3_client = boto3.client('s3')
bucket_name = os.environ['S3_BUCKET_NAME']


def lambda_handler(event, context):
    if not event['isBase64Encoded']:
        try:
            event['body'] = bytes(event[body], "whatever that encoding is").decode()
        except:
            return {
                # AWS probably returns a 403, so maybe return something different for debugging?
                'statusCode': 406,
                'body': 'Misconfigured object.'
            }
    else:
        try:
            event['body'] = base64.b64decode(event['body'])
        except:
            return {
                # AWS probably returns a 403, so maybe return something different for debugging?
                'statusCode': 406,
                'body': 'Misconfigured object.'
            }

    try:
        response = client.put_object(
            Body=bytes(event["body"], encoding),
            Bucket="my_bucket",
            #ContentEncoding=event["multiValueHeaders"]["Accept-Encoding"],
            ContentType=event["multiValueHeaders"]["Content-Type"],
            Key="my/object/name.mp4"
        )
    except:
        return {
            # AWS probably returns a 403, so maybe return something different for debugging?
            'statusCode': 406,
            'body': 'Misconfigured object.'
        }
    else:
        print(response)
        return {
            'statusCode': 200,
            'body': 'File uploaded'
        }

Extra Resources

Base64 Encode - docs

Base64 Decode - docs

CodePudding user response:

The error is:

ValueError: string argument should contain only ASCII characters

The error is on this line:

contend_decode = base64.b64decode(event['body'])

So, it is saying that event['body'] does not contain base64 encoded data.

The binary content will actually be provided in the content parameter.

Therefore, the line should instead be:

contend_decode = base64.b64decode(event['content'])
  • Related