I need to remove timestamps in my file. It should only return the name. My source text file looks like this
7:52:01 AM sherr
hello GOOD morning .
おはようございます。
7:52:09 AM sherr
Who ?
誰?
7:52:16 AM sherr
OK .
わかりました
and my code looks like this
from openpyxl import Workbook
import copy
wb = Workbook()
with open('chat_20220207131707.txt', encoding='utf-8') as sherr:
row = 1
column = 1
ws = wb.active
for line in sherr:
ws.cell(row=row, column=column, value=line.strip())
if (column := column 1) > 3:
row = 1
column = 1
for row in ws.iter_rows():
for cell in row:
alignment = copy.copy(cell.alignment)
alignment.wrapText=True
cell.alignment = alignment
wb.save('sherrplease.xlsx')
CodePudding user response:
If the file always has the same structure, which it certainly looks like, this can be done with simple splitting of the string in question.
from openpyxl import Workbook
import copy
wb = Workbook()
with open('chat_20220207131707.txt', encoding='utf-8') as sherr:
row = 1
column = 1
ws = wb.active
for line in sherr:
if column == 1:
## split the line and rejoin
value = " ".join(line.strip().split(' ')[2:])
else:
value = line.strip()
ws.cell(row=row, column=column, value=value)
if (column := column 1) > 3:
row = 1
column = 1
for row in ws.iter_rows():
for cell in row:
alignment = copy.copy(cell.alignment)
alignment.wrapText=True
cell.alignment = alignment
wb.save('sherrplease.xlsx')