I'm trying to download and save a .csv file I receive daily in my outlook inbox. The code saves the file in the a local folder. When I try to access it using pandas, I get a 'utf-8' codec error
import os
from enum import Enum
import win32com.client as win32
from datetime import datetime
import pandas as pd
class OutlookFolder(Enum):
olFolderInbox = 6
outlook = win32.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(OutlookFolder.olFolderInbox.value)
messages = inbox.Items
messages = messages.Restrict("[SenderEmailAddress] = [email protected]")
t = datetime.today().date()
outputDir = r"C:\Users\ABCD\Documents\ABCD\Daily Seed File"
try:
for message in list(messages):
if message.ReceivedTime.date() == t:
try:
s = message.sender
for attachment in message.Attachments:
fn = attachment.FileName[:-4] "_" str(message.ReceivedTime.date()) ".csv"
attachment.SaveASFile(os.path.join(outputDir,fn))
print(f"attachment {attachment.FileName} from {s} saved")
except Exception as e:
print("error when saving the attachment:" str(e))
except Exception as e:
print("error when processing email messages:" str(e))
fp = outputDir "\\" fn
df = pd.read_csv(fp)
df.head()
I'm getting the following error -
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte
CodePudding user response:
You could try to use the encoding
parameter:
df = pd.read_csv(fp, encoding='utf8')
CodePudding user response:
The file had a UTF-16 encoding and tab or \t
separator.
The following works -
df = pd.read_csv(fp, encoding ='utf-16', sep="\t")