We have over 50k files with Carriage Return \r
or
in filename. An example
{
"LastModified": "2021-12-25T20:33:05.000Z",
"ETag": "\"653e05d2e10dffc122aa91a93b699413\"",
"StorageClass": "STANDARD",
"Key": "portalahm/zz9JDN3n.jpg\r",
"Owner": {
"ID": "e3fdea5553e3b1a5f37cea2df020a92c6a2efbadcdaf58a2589e930b85a95aff"
},
"Size": 1703936
},
Can anyone suggest how to rename these files by removing that special character using CLI commands aws s3 mv
or download them to Windows system using s3api get-object
or aws s3 cp
?
All the attempts of renaming using mv
or downloading the objects is giving Key does not exist
error.
TIA
CodePudding user response:
Use the script below to bulk rename files, it will trim file names with \r
#############################
# Configuration:
bucketname = "YOUR_S3_BUCKET_NAME"
access_key = 'YOUR_ACCESS_KEY_ID'
secret_key = 'YOUR_SECRET_ACCESS_KEY'
#############################
require 'rubygems'
require 'aws/s3'
include AWS::S3
Base.establish_connection!(
:access_key_id => access_key,
:secret_access_key => secret_key
)
b = Bucket.find(bucketname)
marker = ''
while b.size > 0 do
puts "\n\n--------------------new page----------------------"
puts "\n From marker #{marker}"
puts "\n\n--------------------------------------------------"
b.each {|s3o|
if s3o.key =~ /\r/i
begin
old_key = s3o.key
new_key = s3o.key.gsub(/\r/i, '')
S3Object.copy(old_key, new_key, bucketname)
puts "copied #{old_key} to #{new_key}"
#Uncomment this if you're feeling confident and want to delete the key
#s3o.delete
rescue Exception => e
puts "\n\n @@@@@@@@@@@@ EXCEPTION on key #{s3o.key} \n\n"
puts e.message
puts "@@@@@@@@@@@@@@}"
next
end
end
}
marker = b.objects.last.key
b = Bucket.find('
CodePudding user response:
I couldn't quite recreate your situation with \r
, but I managed to do it with \n
by inserting a CR into an object key. So, here is a Python script that should be able to copy the files for you:
import boto3
s3_resource = boto3.resource('s3')
for object in s3_resource.Bucket('my-bucket').objects.all():
if '\r' in object.key:
new_object = s3_resource.Object(object.bucket_name, object.key.replace('\r', ''))
new_object.copy({'Bucket': object.bucket_name, 'Key': object.key})
print(object.key, new_object.key)
# object.delete() # Remove comment-marker to delete source object after copy