Home > OS >  getting specific attribute name and it's value from xml to csv via python and pandas
getting specific attribute name and it's value from xml to csv via python and pandas

Time:11-12

i am converting nested xml file to csv via python and pandas library. there is many attribute in xml but i just want to export specific attribute name with its value in the csv file.

    <TestReport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" id="33357fcc-edf2-3ad4-a6f2-588a9492f804" source="OI Stand" converter="irs_xml.xsl" version="1.6">
      <FileType>Test Report</FileType>
      <CreationTime>2020-09-28T09:42:07.3875529 02:00</CreationTime>
      <TestExecution start="2265.595799" time="68.619646" id="009b96f0-9d26-4226-9c9a-7364dcab0467" ts="2020-09-28T09:40:58">                     
        <Attr name="ProductNumber" type="String" value="11266" />        
        <Attr name="SerialNumber" type="Number" value="324" />      
        <TestSteps>
          <TestStep measid="CNID_0210" measname="UART Read Node ID" status="Passed" datatype="Boolean" value="True" stepname="[CNID_0210] UART Read Node ID" steptype="PassFailTest" start="2277.5219649" time="0.0280892" stepid="ID#:Ua1/2g1/6hGrTAABBUH5EC" group="[CNID] Config Node ID">
            <Attr name="ReadNodeId Cmd" type="String" value="RAC" />
            <Attr name="NodeId" type="String" value=" 255" />
          </TestStep>
          <TestStep measid="MOCV_1510" measname="Check OCV" status="Passed" datatype="Number" limlo="49.000" value="49.429" limhi="51.400" unit="V" stepname="[MOCV_1510] Check OCV" steptype="Test" start="2309.5348324" time="5.72E-05" stepid="ID#:pIldp t98hG/wTz4Yu5AjB" group="[MOCV] Measure OCV" />
          <TestStep>
          .
          .
          </TestSteps>
          <PartNr />
      </TestExecution>
   </TestReport>

my code(it shows all the attribute)

import xml.etree.ElementTree as ET
import pandas as pd
tree = ET.parse('324.irp')
root = tree.getroot()
rows = []
df_columns = ['CreationTime', 'FileType', 'SerialNumber']
CreationTime = root.find("CreationTime").text
FileType = root.find("FileType").text
for child in root.iter('Attr'):
        name = child.attrib.get('name')
        value = child.attrib.get('value')
        print (name, value)
for elem in root.iter('TestStep'):
        measname = elem.attrib.get('measname')
        value = elem.attrib.get('value')
        print (measname, value)
if CreationTime and FileType and value:
        rows.append({"CreationTime": CreationTime, "FileType": FileType, "value": value})
        print(CreationTime, FileType, value)
df = pd.DataFrame(rows, columns= df_columns)
df.to_csv(r'C:\Users\PycharmProjects\pythonProject\output.csv', index = False)

Expecting result:

CreationTime,FileType,SerialNumber,Check OCV
2020-09-28T09:42:07.3875529 02:00,Test Report,324,49.429V

could you please anyone help me out how can i print specific attribute name(SerialNumber,Check OCV) with its value(324,49.429V). out of many attribute?

CodePudding user response:

Try the below

import xml.etree.ElementTree as ET
import pandas as pd

xml = '''<?xml version="1.0" encoding="UTF-8"?>
<TestReport xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="33357fcc-edf2-3ad4-a6f2-588a9492f804" source="OI Stand" converter="irs_xml.xsl" version="1.6">
   <FileType>Test Report</FileType>
   <CreationTime>2020-09-28T09:42:07.3875529 02:00</CreationTime>
   <TestExecution start="2265.595799" time="68.619646" id="009b96f0-9d26-4226-9c9a-7364dcab0467" ts="2020-09-28T09:40:58">
      <Attr name="ProductNumber" type="String" value="11266" />
      <Attr name="SerialNumber" type="Number" value="324" />
      <TestSteps>
         <TestStep measid="CNID_0210" measname="UART Read Node ID" status="Passed" datatype="Boolean" value="True" stepname="[CNID_0210] UART Read Node ID" steptype="PassFailTest" start="2277.5219649" time="0.0280892" stepid="ID#:Ua1/2g1/6hGrTAABBUH5EC" group="[CNID] Config Node ID">
            <Attr name="ReadNodeId Cmd" type="String" value="RAC" />
            <Attr name="NodeId" type="String" value=" 255" />
         </TestStep>
         <TestStep measid="MOCV_1510" measname="Check OCV" status="Passed" datatype="Number" limlo="49.000" value="49.429" limhi="51.400" unit="V" stepname="[MOCV_1510] Check OCV" steptype="Test" start="2309.5348324" time="5.72E-05" stepid="ID#:pIldp t98hG/wTz4Yu5AjB" group="[MOCV] Measure OCV" />
         <TestStep />
      </TestSteps>
      <PartNr />
   </TestExecution>
</TestReport>'''

def get_ele(name):
  expr = f'.//{name}'
  return root.find(expr).text

def get_attr(name):
  expr = f'.//Attr[@name="{name}"]'
  return root.find(expr).attrib['value']  

df_columns = {'CreationTime':get_ele, 'FileType':get_ele, 'SerialNumber':get_attr}

root = ET.fromstring(xml)
data = [{name :func(name) for name,func in df_columns.items()}]

df = pd.DataFrame(data)
print(df)

output

                      CreationTime     FileType SerialNumber
0  2020-09-28T09:42:07.3875529 02:00  Test Report          324
  • Related