Scenario: : I am trying to push some selected items from source table to destination table which are in AWS dynamodb using python boto3 module. For this, I am getting the interested rows from source table, but unable to put the same items in the destination table.
NOTE: : Here, it has Decimal values - Handling them successfully. But, it also has the attribute which has dictionary values. where I am failing to push those entries.
Here is the code.
import boto3
import argparse
# import simplejson as json
import json
from decimal import *
from boto3.dynamodb.conditions import Key, Attr
class DecimalEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Decimal):
return str(obj)
return json.JSONEncoder.default(self, obj)
def migrate_data(region, sourcetable, desttable):
"""
:param region: region of AWS
:param sourcetable: source table to collect the data
:param desttable: dest backup table to move the data
:return: Move the selected data to dest and delete from source table
"""
try:
dynamodb = boto3.resource('dynamodb', region_name=region)
pdynamodb = boto3.client('dynamodb', region_name=region)
table = dynamodb.Table(sourcetable)
sresponse = table.scan(
FilterExpression=Attr('expirationDate').contains('2021-') & Attr('entityType').eq('prospect')
)
data = sresponse['Items']
print("## Quantity of 2021 records: {}".format(len(data)))
## Move to destination:
print("## Uploading the selected data from {} to {}".format(sourcetable, desttable))
for item in data:
jdata = json.dumps(item, cls=DecimalEncoder)
print("## Decimal Encoded data type is: {} and data : {}".format(type(jdata), jdata))
jitem = json.loads(jdata)
print("## Data loads must be in dict now: {}".format(type(jitem)))
item = {
'expirationDate': {'S': jitem['expirationDate']},
'entityType': {'S': jitem['entityType']},
'findProspectIndex': {'S': jitem['findProspectIndex']},
'apr': {'S': jitem['apr']},
'createdDate': {'S': jitem['createdDate']},
'marketingCampaignId': {'S': jitem['marketingCampaignId']},
'productLine': {'S': jitem['productLine']},
'status': {'S': jitem['status']},
'offerAmount': {'S': jitem['offerAmount']},
'pqCode': {'S': jitem['pqCode']},
'prospectOfferCode': {'S': jitem['prospectOfferCode']},
'customerType': {'S': jitem['customerType']},
'id': {'S': jitem['id']},
'updatedDate': {'S': jitem['updatedDate']},
# 'personalInfo': {'S': jitem['personalInfo']}
'personalInfo': {
'firstName': {'S': jitem['personalInfo']['firstName']},
'lastName': {'S': jitem['personalInfo']['lastName']},
'phoneNumber': {'S': jitem['personalInfo']['phoneNumber']},
'last4Ssn': {'S': jitem['personalInfo']['last4Ssn']},
'residentialAddress': {
'zip': {'S': jitem['personalInfo']['residentialAddress']['zip']},
'addressLine1': {'S': jitem['personalInfo']['residentialAddress']['addressLine1']},
'addressLine2': {'S': jitem['personalInfo']['residentialAddress']['addressLine2']},
'state': {'S': jitem['personalInfo']['residentialAddress']['state']},
'city': {'S': jitem['personalInfo']['residentialAddress']['city']}
}
}
}
print("## Entry item type is : {} and data: {}".format(type(item), item))
dresponse = pdynamodb.put_item(
TableName=desttable,
Item=item
)
print("## Type of data : {}".format(type(dresponse)))
print("## Quantity of 2021 records moved - {}".format(len(dresponse)))
## Remove the records from source table
for item in data:
print("## Deleting the item of id : {}".format(item['id']))
sresponse.delete_item(
Key={'id': item['id']}
)
except Exception as E:
print("## Caught an exception while deleting the data.. {}".format(E))
if __name__ == '__main__':
print("################### MIGRATE DATA from DynamoDB table to other table ####################\n")
parser = argparse.ArgumentParser(description="Input the source and destincation tables to migrate data")
parser.add_argument("-r", "--region", dest="region", action="store", help="provide region name",
default="us-east-1", required=False)
parser.add_argument("-st", "--sourcetable", dest="sourcetable", action="store", help="provide source table name",
required=True)
parser.add_argument("-dt", "--desttable", dest="desttable", action="store", help="provide destination table name",
required=True)
args = parser.parse_args()
migrate_data(args.region, args.sourcetable, args.desttable)
ERROR:
## Decimal Encoded data type is: <class 'str'> and data : {"expirationDate": "2021-11-04T00:00:00.000Z", "entityType": "prospect", "findProspectIndex": "curran_1_6904_83202", "apr": "26.99", "createdDate": "2021-10-29T11:01:52.172Z", "marketingCampaignId": "NCPQCARD20210913", "productLine": "credit card", "status": "new", "offerAmount": "0", "pqCode": "347384022", "prospectOfferCode": "1000100010003000", "customerType": "NC", "id": "e8b3d41d-ac0b-4790-9147-ee5b409b9a13", "updatedDate": "2021-10-29T11:01:52.172Z", "personalInfo": {"firstName": "RONALD_1", "lastName": "curran_1", "phoneNumber": "2083176132.0", "last4Ssn": "6904", "residentialAddress": {"zip": "83202", "addressLine1": "5085 GALENA ST", "addressLine2": "", "state": "ID", "city": "POCATELLO"}}}
## Data loads must be in dict now: <class 'dict'>
## Entry item type is : <class 'dict'> and data: {'expirationDate': {'S': '2021-11-04T00:00:00.000Z'}, 'entityType': {'S': 'prospect'}, 'findProspectIndex': {'S': 'curran_1_6904_83202'}, 'apr': {'S': '26.99'}, 'createdDate': {'S': '2021-10-29T11:01:52.172Z'}, 'marketingCampaignId': {'S': 'NCPQCARD20210913'}, 'productLine': {'S': 'credit card'}, 'status': {'S': 'new'}, 'offerAmount': {'S': '0'}, 'pqCode': {'S': '347384022'}, 'prospectOfferCode': {'S': '1000100010003000'}, 'customerType': {'S': 'NC'}, 'id': {'S': 'e8b3d41d-ac0b-4790-9147-ee5b409b9a13'}, 'updatedDate': {'S': '2021-10-29T11:01:52.172Z'}, 'personalInfo': {'firstName': {'S': 'RONALD_1'}, 'lastName': {'S': 'curran_1'}, 'phoneNumber': {'S': '2083176132.0'}, 'last4Ssn': {'S': '6904'}, 'residentialAddress': {'zip': {'S': '83202'}, 'addressLine1': {'S': '5085 GALENA ST'}, 'addressLine2': {'S': ''}, 'state': {'S': 'ID'}, 'city': {'S': 'POCATELLO'}}}}
## Caught an exception while deleting the data.. Parameter validation failed:
Unknown parameter in Item.personalInfo: "firstName", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Unknown parameter in Item.personalInfo: "lastName", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Unknown parameter in Item.personalInfo: "phoneNumber", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Unknown parameter in Item.personalInfo: "last4Ssn", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Unknown parameter in Item.personalInfo: "residentialAddress", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Experts:, Please help me out here with a solution to handle the dict of dict values pushing to destination table.
CodePudding user response:
The Issue
You do not declare personalInfo as a datatype:
'personalInfo': {
'firstName': {'S': jitem['personalInfo']['firstName']},
'lastName': {'S': jitem['personalInfo']['lastName']},
'phoneNumber': {'S': jitem['personalInfo']['phoneNumber']},
'last4Ssn': {'S': jitem['personalInfo']['last4Ssn']},
'residentialAddress': {
'zip': {'S': jitem['personalInfo']['residentialAddress']['zip']},
'addressLine1': {'S': jitem['personalInfo']['residentialAddress']['addressLine1']},
'addressLine2': {'S': jitem['personalInfo']['residentialAddress']['addressLine2']},
'state': {'S': jitem['personalInfo']['residentialAddress']['state']},
'city': {'S': jitem['personalInfo']['residentialAddress']['city']}
}
The Solution
It should be declared as a Map type which is the DynamoDB data type for dicts and requires a prefix of M
:
'personalInfo': { 'M': {
'firstName': {'S': jitem['personalInfo']['firstName']},
'lastName': {'S': jitem['personalInfo']['lastName']},
'phoneNumber': {'S': jitem['personalInfo']['phoneNumber']},
'last4Ssn': {'S': jitem['personalInfo']['last4Ssn']},
'residentialAddress': {
'zip': {'S': jitem['personalInfo']['residentialAddress']['zip']},
'addressLine1': {'S': jitem['personalInfo']['residentialAddress']['addressLine1']},
'addressLine2': {'S': jitem['personalInfo']['residentialAddress']['addressLine2']},
'state': {'S': jitem['personalInfo']['residentialAddress']['state']},
'city': {'S': jitem['personalInfo']['residentialAddress']['city']}
}
}
The better approach
I'm extremely curious why you begin using the Resource Client while reading and then switch to the Low Level Client to write, creating extra work for yourself having to add the types at the risk of exceptions or data corruption. I strongly advise that you use the same client for both and avoid any of the conversions you are attempting.
CodePudding user response:
It looks like you are trying to store a dictionary within a dictionary in your item variable, but in DynamoDB, you can only store strings within a dictionary, so you will need to convert your dictionary values to strings as well.
just use json.dumps()
to convert your dictionary values to strings before inserting them into item. Then, when retrieving the data from the table, you can use json.loads()
to convert the string values back to dictionaries.