I am trying to export data from the database to a CSV file. The export is done perfectly, however, I have some Arabic text in the database where when exporting the data, I get Latin-9 characters. As below. (I am on Windows)
When this CSV file is opened in Notepad, I can see the correct values
ID,Serial,City,Office
1,ASDF4321,مصر,مصر
2,FDSA1234,السعودية,السعودية
3,ASDF4321,مصر,مصر
4,FDSA1234,السعودية,السعودية
My code is:
import csv
from io import BytesIO, StringIO
from flask import Flask, send_file
from flask_sqlalchemy import SQLAlchemy
app = Flask(__name__, instance_relative_config=True)
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///app.sqlite3"
db = SQLAlchemy(app)
class Device(db.Model):
__tablename__ = "device"
id = db.Column(db.Integer, primary_key=True)
serial = db.Column(db.String(255), nullable=True)
city = db.Column(db.String(255), nullable=True)
office = db.Column(db.String(255), nullable=True)
def __repr__(self):
return f"<Device {self.serial!r}, {self.city!r}, {self.office!r}>"
with app.app_context():
db.create_all()
device1 = Device(serial="ASDF4321", city="مصر", office="مصر")
device2 = Device(serial="FDSA1234", city="السعودية", office="السعودية")
db.session.add(device1)
db.session.add(device2)
db.session.commit()
@app.route("/", methods=["GET"])
def index():
return "Home"
@app.route("/export", methods=["GET", "POST"])
def export():
si = StringIO()
devices = Device.query.all()
csvwriter = csv.writer(si, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["ID", "Serial", "City", "Office"])
for i, device in enumerate(devices, start=1):
csvwriter.writerow([i, device.serial, device.city, device.office])
mem = BytesIO()
mem.write(si.getvalue().encode())
mem.seek(0)
si.close()
return send_file(
mem, mimetype="text/csv", download_name="Export-File.csv", as_attachment=True
)
if __name__ == "__main__":
app.run(debug=True)
How can I export to a CSV file and have it look like this:
CodePudding user response:
I managed to solve the issue and I had to add the byte order mark (BOM) which is a Unicode character that is used to indicate that a file is encoded in UTF-8
. It must be added as the first character in the file.
@app.route("/export", methods=["GET", "POST"])
def export():
si = StringIO()
si.write("\ufeff") # This line
devices = Device.query.all()
csvwriter = csv.writer(si, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["ID", "Serial", "City", "Office"])
for i, device in enumerate(devices, start=1):
csvwriter.writerow([i, device.serial, device.city, device.office])
mem = BytesIO()
mem.write(si.getvalue().encode())
mem.seek(0)
si.close()
return send_file(
mem, mimetype="text/csv", download_name="Export-File.csv", as_attachment=True
)
OR
Use "".encode("utf-8-sign")
. The utf-8-sig
codec will prepend the BOM at the beginning.
@app.route("/export", methods=["GET", "POST"])
def export():
si = StringIO()
devices = Device.query.all()
csvwriter = csv.writer(si, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["ID", "Serial", "City", "Office"])
for i, device in enumerate(devices, start=1):
csvwriter.writerow([i, device.serial, device.city, device.office])
mem = BytesIO()
mem.write(si.getvalue().encode("utf-8-sig")) # Edit here
mem.seek(0)
si.close()
return send_file(
mem, mimetype="text/csv", download_name="Export-File.csv", as_attachment=True
)