Home > Enterprise >  Loop through multiple xml files
Loop through multiple xml files

Time:02-01

I'm fairly new to python and would like to loop through multiple xml files. I'm currently using the existing code to pull in sample2 xml file:

import xml.etree.ElementTree as ET
import pandas as pd
import os


tree=ET.parse("sample2.xml")
root = tree.getroot()

qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")

data =[]
for x, y, z in zip(qty, pri, cor):
    #print(x.get("v"), y.get("v"))
    a = x.get("v"), y.get("v"), z.get("v")
    data.append(a)
    
    
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)

#print(df)

total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']

df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]

I tried adding soup xml to the below line of code but this didn't work

tree=ET.parse("sample2.xml , "soup xml")

root = tree.getroot()

CodePudding user response:

Consider turning your code into a function and calling it for the various files you need:

import xml.etree.ElementTree as ET
import pandas as pd
import os

def my_xml_processor(filename):

   tree=ET.parse(filename)
   root = tree.getroot()

   qty=root.iterfind(".//Qty")
   pri=root.iterfind(".//PriceAmount")
   cor=root.iterfind(".//AuctionIdentification")

   data =[]
   for x, y, z in zip(qty, pri, cor):
       #print(x.get("v"), y.get("v"))
       a = x.get("v"), y.get("v"), z.get("v")
       data.append(a)
    
    
   df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
   df['Qty'] = df['Qty'].astype(float)
   df['Price'] = df['Price'].astype(float)

   #print(df)

   total = df['Qty'].sum()
   price = df['Price'].mean()
   border = df.loc[0,'Border']

   df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

   df2['Qty'] = [total]
   df2['Price'] = [price]
   df2['Border'] = [str(border)[0:12]]

   return df2

You can then call it for your files:

my_xml_processor("sample2.xml")

my_xml_processor("soup.xml")

CodePudding user response:

You could use your existing code, but running it in a loop for each filename you have, something like:


import xml.etree.ElementTree as ET
import pandas as pd
import os


files = ['sample2.xml', 'sample3.xml', 'sample4.xml']

for file in files: #read each filename from above list
    tree=ET.parse(file)
    root = tree.getroot()

    qty=root.iterfind(".//Qty")
    pri=root.iterfind(".//PriceAmount")
    cor=root.iterfind(".//AuctionIdentification")

    data =[]
    for x, y, z in zip(qty, pri, cor):
        #print(x.get("v"), y.get("v"))
        a = x.get("v"), y.get("v"), z.get("v")
        data.append(a)
        
        
    df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
    df['Qty'] = df['Qty'].astype(float)
    df['Price'] = df['Price'].astype(float)

    #print(df)

    total = df['Qty'].sum()
    price = df['Price'].mean()
    border = df.loc[0,'Border']

    df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

    df2['Qty'] = [total]
    df2['Price'] = [price]
    df2['Border'] = [str(border)[0:12]]
  • Related