Extract data from multiple word tables and write them into the same sheet in Excel.
path = 'folder path'
worddocs_list = []
for filename in os.listdir(path):
wordDoc = Document(path '\\' filename)
worddocs_list.append(wordDoc)
writer = pd.ExcelWriter("./test.xlsx", engine='xlsxwriter')
i = 0
for wordDoc in worddocs_list:
for table in wordDoc.tables:
lst = []
for row in table.rows[5:6]:
row_lst = []
for cell in row.cells[1:7]:
c = cell.text
row_lst.append(c)
lst.append(row_lst)
df = pd.DataFrame()
N = 6
split_list = np.array_split(lst, N, axis=0)
for i in range(N):
df.loc[0, 'Column_{}'.format(i)] = split_list[0][0][i]
print(df)
df.to_excel(writer, sheet_name=f'Sheet{1}')
i = 1
writer.save()
In the TEST file, only the content of the last loop. How to write ‘ROW 1’ in each loop, thank you!!
CodePudding user response:
It looks like you are taking just one row from each table. You can gather all of these rows together and then do one write at the end.
out_rows = [
[c.text for c in table.rows[5].cells[1:7]]
for wordDoc in worddocs_list
for table in wordDoc.tables
]
df = pd.DataFrame(out_rows, columns=[f"Column_{i}" for i in range(6)])
df.to_excel(writer, sheet_name=f'Sheet{1}')