Home > Software engineering >  Convert excel to XML in python
Convert excel to XML in python

Time:11-15

I am trying to convert excel database into python. I have a trading data which I need to import into the system in xml format.

my code is following:

df = pd.read_excel("C:/Users/junag/Documents/XML/Portfolio2.xlsx", sheet_name="Sheet1", dtype=object)
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
tree = ET.ElementTree(root)
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")

for row in df.itertuples():
Portfolio = ET.SubElement(Portfolios, "Portfolio", Name=row.Name, BaseCurrency=row.BaseCurrency2, TradingPower=str(row.TradingPower),
                          ValidationProfile=row.ValidationProfile, CommissionProfile=row.CommissionProfile)
PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")
if row.Type == "Cash":
    PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume))
    Cash = ET.SubElement(PortfolioPosition, 'Cash', Currency=str(row.Currency))
else:
    PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume),
                                      Invested=str(row.Invested), BaseInvested=str(row.BaseInvested))
    Instrument = ET.SubElement(PortfolioPosition, 'Instrument', Ticker=str(row.Ticker), ISIN=str(row.ISIN), Market=str(row.Market),
                               Currency=str(row.Currency2), CFI=str(row.CFI))


ET.indent(tree, space="\t", level=0)
tree.write("Portfolios_converted2.xml", encoding="utf-8")

The output looks like this: enter image description here

While I need it to look like this: enter image description here

How can I improve my code to make the output xml look better? please advise

CodePudding user response:

Converting excel to XML in python

import openpyxl
import xml.etree.ElementTree as ET

def convert_excel_to_xml(file_name, sheet_name):
    wb = openpyxl.load_workbook(file_name)
    sheet = wb[sheet_name]
    root = ET.Element("root")
    for row in sheet.rows:
        for cell in row:
            ET.SubElement(root, "cell", value=cell.value)
    tree = ET.ElementTree(root)
    tree.write("{}.xml".format(sheet_name))

Run the function

convert_excel_to_xml("test.xlsx", "Sheet1")

CodePudding user response:

Since you need a single <Portfolio> and <PortfolioPositions> as parent grouping, consider a nested loop by iterating through a list of data frames. Then, within each data frame loop through its rows:

import xml.etree.ElementTree as ET
import pandas as pd

df = pd.read_excel("Input.xlsx", sheet_name="Sheet1", dtype=object)

# LIST OF DATA FRAME SPLITS
df_list = [g for i,g in df.groupby(
    ["Name", "BaseCurrency2", "TradingPower", "ValidationProfile", "CommissionProfile"]
)]

# ROOT LEVEL
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')

# ROOT CHILD LEVEL
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")

# GROUP LEVEL ITERATION
for df in df_list:
    Portfolio = ET.SubElement(
        Portfolios, 
        "Portfolio", 
        Name = df["Name"][0],
        BaseCurrency = df["BaseCurrency2"][0], 
        TradingPower = str(df["TradingPower"][0]),
        ValidationProfile = df["ValidationProfile"][0], 
        CommissionProfile = df["CommissionProfile"][0]
    )

    PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")

    # ROW LEVEL ITERATION
    for row in df.itertuples():
        if row.Type == "Cash":
            PortfolioPosition = ET.SubElement(
                PortfolioPositions, 
                "PortfolioPosition", 
                Type = row.Type, 
                Volume = str(row.Volume)
            )
            Cash = ET.SubElement(
                PortfolioPosition, 
                "Cash", 
                Currency = str(row.Currency)
            )
        else:
            PortfolioPosition = ET.SubElement(
                 PortfolioPositions, 
                 "PortfolioPosition", 
                 Type = row.Type,
                 Volume = str(row.Volume),
                 Invested = str(row.Invested), 
                 BaseInvested = str(row.BaseInvested)
            )
            Instrument = ET.SubElement(
                 PortfolioPosition, 
                 "Instrument", 
                 Ticker = str(row.Ticker),
                 ISIN = str(row.ISIN),
                 Market = str(row.Market),
                 Currency = str(row.Currency2),
                 CFI = str(row.CFI)
            )

ET.indent(tree, space="\t", level=0)
tree.write("Output.xml", encoding="utf-8")
  • Related