I have an app deployed on Heroku.
I followed the manual by link to set up static files uploading to S3, and it works. And now I need to upload the CSV file, that was created by the celery task and upload it to S3. The problem is that the Heroku file system is read-only and I can not save a file on it. Hence, I get an error FileNotFoundError: [Errno 2] No such file or directory: 'tmp/a30113c5-bbbc-4432-9826-3918e547d407.csv'
How do I?
@app.task
def upload_file(file_name, bucket, object_name=None):
"""Upload a file to an S3 bucket."""
# If S3 object_name was not specified, use file_name
if object_name is None:
object_name = os.path.basename(file_name)
# Upload the file
s3_client = boto3.client("s3")
try:
response = s3_client.upload_file(file_name, bucket, object_name)
except ClientError as e:
logging.error(e)
return False
return True
@app.task
def make_csv(data: List[Any], task_id: str):
"""Produce csv file with generated fake data and name it as task id."""
headers: List[str] = ["name", "phone", "email"]
file_path = os.path.normpath(f"tmp/{task_id}.csv")
with open(file=file_path, mode="w", encoding="UTF-8", newline="") as csv_file:
writer = csv.writer(
csv_file, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
writer.writerow(headers)
writer.writerows(data)
return csv_file
@app.task(bind=True)
def generate_fake_data(self, total: int):
"""Generate fake data function."""
fake_data: List[Any] = []
for _ in range(total):
name = fake.name()
phone = fake.phone_number()
email = fake.email()
fake_data.append([name, phone, email])
csv_file = make_csv(data=fake_data, task_id=self.request.id)
upload_file(
file_name=csv_file,
bucket=os.getenv("AWS_STORAGE_BUCKET_NAME"),
)
return f"{total} random data rows created."
As per Ersain answer, I made this way:
@app.task
def upload_file(file_name: str, bucket: str, data: StringIO) -> bool:
"""Upload a file to an S3 bucket."""
s3_client = boto3.client("s3")
try:
response = s3_client.put_object(
Body=data.getvalue(), Bucket=bucket, Key=f"upload/{file_name}.csv"
)
except ClientError as e:
logging.error(e)
return False
return True
@app.task
def make_csv(data: List[Any]) -> StringIO:
"""Generate a fake data and store it in memory."""
headers: List[str] = ["name", "phone", "email"]
# Create a StringIO object to store the data in memory
file_buffer = StringIO()
writer = csv.writer(
file_buffer, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
writer.writerow(headers)
writer.writerows(data)
return file_buffer
CodePudding user response:
If the file system is read-only, then save the file in memory (using StringIO
) and upload it to s3 accordingly:
from io import StringIO
@app.task
def make_csv(data: List[Any], task_id: str):
"""Produce csv file with generated fake data and name it as task id."""
file_path = os.path.normpath(f"tmp/{task_id}.csv")
csv_file = StringIO()
writer = csv.writer(
csv_file, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
writer.writerow(headers)
writer.writerows(data)
return csv_file
And in your upload_file
task, use put_object
method instead of upload_file
, and send the content as bytes:
@app.task
def upload_file(file, bucket, object_name=None):
...
s3_client = boto3.client("s3")
raw_file = bytes(file.getvalue()) # convert to bytes
try:
response = s3_client.put_object(raw_file, bucket, object_name)
except ClientError as e:
logging.error(e)
return False
return True