I am trying to convert excel database into python. I have a trading data which I need to import into the system in xml format.
my code is following:
df = pd.read_excel("C:/Users/junag/Documents/XML/Portfolio2.xlsx", sheet_name="Sheet1", dtype=object)
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
tree = ET.ElementTree(root)
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")
for row in df.itertuples():
Portfolio = ET.SubElement(Portfolios, "Portfolio", Name=row.Name, BaseCurrency=row.BaseCurrency2, TradingPower=str(row.TradingPower),
ValidationProfile=row.ValidationProfile, CommissionProfile=row.CommissionProfile)
PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")
if row.Type == "Cash":
PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume))
Cash = ET.SubElement(PortfolioPosition, 'Cash', Currency=str(row.Currency))
else:
PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume),
Invested=str(row.Invested), BaseInvested=str(row.BaseInvested))
Instrument = ET.SubElement(PortfolioPosition, 'Instrument', Ticker=str(row.Ticker), ISIN=str(row.ISIN), Market=str(row.Market),
Currency=str(row.Currency2), CFI=str(row.CFI))
ET.indent(tree, space="\t", level=0)
tree.write("Portfolios_converted2.xml", encoding="utf-8")
The output looks like this: enter image description here
While I need it to look like this: enter image description here
How can I improve my code to make the output xml look better? please advise
CodePudding user response:
Converting excel to XML in python
import openpyxl
import xml.etree.ElementTree as ET
def convert_excel_to_xml(file_name, sheet_name):
wb = openpyxl.load_workbook(file_name)
sheet = wb[sheet_name]
root = ET.Element("root")
for row in sheet.rows:
for cell in row:
ET.SubElement(root, "cell", value=cell.value)
tree = ET.ElementTree(root)
tree.write("{}.xml".format(sheet_name))
Run the function
convert_excel_to_xml("test.xlsx", "Sheet1")
CodePudding user response:
Since you need a single <Portfolio>
and <PortfolioPositions>
as parent grouping, consider a nested loop by iterating through a list of data frames. Then, within each data frame loop through its rows:
import xml.etree.ElementTree as ET
import pandas as pd
df = pd.read_excel("Input.xlsx", sheet_name="Sheet1", dtype=object)
# LIST OF DATA FRAME SPLITS
df_list = [g for i,g in df.groupby(
["Name", "BaseCurrency2", "TradingPower", "ValidationProfile", "CommissionProfile"]
)]
# ROOT LEVEL
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
# ROOT CHILD LEVEL
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")
# GROUP LEVEL ITERATION
for df in df_list:
Portfolio = ET.SubElement(
Portfolios,
"Portfolio",
Name = df["Name"][0],
BaseCurrency = df["BaseCurrency2"][0],
TradingPower = str(df["TradingPower"][0]),
ValidationProfile = df["ValidationProfile"][0],
CommissionProfile = df["CommissionProfile"][0]
)
PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")
# ROW LEVEL ITERATION
for row in df.itertuples():
if row.Type == "Cash":
PortfolioPosition = ET.SubElement(
PortfolioPositions,
"PortfolioPosition",
Type = row.Type,
Volume = str(row.Volume)
)
Cash = ET.SubElement(
PortfolioPosition,
"Cash",
Currency = str(row.Currency)
)
else:
PortfolioPosition = ET.SubElement(
PortfolioPositions,
"PortfolioPosition",
Type = row.Type,
Volume = str(row.Volume),
Invested = str(row.Invested),
BaseInvested = str(row.BaseInvested)
)
Instrument = ET.SubElement(
PortfolioPosition,
"Instrument",
Ticker = str(row.Ticker),
ISIN = str(row.ISIN),
Market = str(row.Market),
Currency = str(row.Currency2),
CFI = str(row.CFI)
)
ET.indent(tree, space="\t", level=0)
tree.write("Output.xml", encoding="utf-8")