I am trying to upload google play console reports to s3 using boto3. The code below works well when i try to print dataframes in loop, which means i am successfully getting the files i need.
from io import StringIO # python3; python2: BytesIO
import boto3
import os
from google.cloud import storage
import pandas as pd
import io
jsonfile = os.path.join(
os.path.dirname(__file__), 'private_key.json')
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= jsonfile
# getting all file names from bucket
storage_client = storage.Client()
bucket_name = 'pubsite_prod_rev_1223445566778899'
bucket = storage_client.bucket(bucket_name)
#blob = bucket.blob(source_blob_name)
allblobs = storage_client.list_blobs(bucket_name)
# filtering out the files i need. for example: abc/123/game1/201801_channel.csv,abc/123/game1/202110_channel.csv
for blobfile in allblobs:
if "abc/123" in blobfile.name:
if "game1" in blobfile.name:
if "channel.csv" in blobfile.name:
#print(blobfile.name) # successfully getting all file names
source_blob_name = blobfile.name
blob = bucket.blob(source_blob_name)
data = blob.download_as_string()
df = pd.read_csv(io.BytesIO(data),encoding='utf-16')
print(df) # successfully printing dataframes for all of the files
#upload files to s3
bucket = 'the-knights-iaps-raw' # already created on S3
csv_buffer = StringIO()
df.to_csv(csv_buffer)
s3_resource = boto3.resource('s3', aws_access_key_id='JE4WNFJCN24JNJN13FC',aws_secret_access_key = 'jdsafjlhsafj34j32n4tj23nZ')
fileNamefors3 = source_blob_name.split("/", 2)
s3_resource.Object(bucket, "Acquisition/Buyers7d/StickmanAdventureGame/Channel/" fileNamefors3[2]).put(Body=csv_buffer.getvalue())
But uploading all these dataframes to s3 is causing an error:
File "C:\Users\USER\PycharmProjects\Gamexis_gpc\cvcv.py", line 28, in blob = bucket.blob(source_blob_name) AttributeError: 'str' object has no attribute 'blob'
I am not python pro but that would be great if someone can help.
CodePudding user response:
Here you're creating the bucket:
bucket = storage_client.bucket(bucket_name)
.. but then later on inside the for loop you overwrite that variable:
bucket = 'the-knights-iaps-raw'
Use a different variable name for the string.