I have this simple xml file:
<BSB>
<APPLSUMMARY>
<MAIN W="S1" X="{ND}"/>
<COUNTS Z="0" AB="0" BB="0" CB="0" DB="0" EB="0" FB="0" GB="{ND}"/>
<SCOTDEBT OQB="{ND}"/>
<NOTICES HB="0" IB="3"/>
<SUB_BLOCKS C="3" D="3" E="1" F="0"/>
<ALIAS_NO UPB="0" VPB="{ND}" WPB="0"/>
<ASSOC_NO DD="0" ED="0" AC="0"/>
<ALERTSUMM PB="0" QB="0" RB="{ND}" SB="{ND}" TB="{ND}" UB="{ND}"/>
<HHOSUMM BC="{ND}" RGB="{ND}"/>
<TPD INB="{ND}" JNB="{ND}" KNB="{ND}" LNB="{ND}"/>
<OCCUPANCY AD="1"/>
<DECEASED LQB="1" FCC="{ND}" GCC="{ND}" HCC="{ND}" ICC="{ND}"/>
<IMPAIRED MQB="0"/>
<ACTIVITY JCC="{ND}" KCC="{ND}" LCC="{ND}"/>
<ADVERSE MCC="{ND}" HHC="{ND}"/>
</APPLSUMMARY>
</BSB>
I want to create in python a csv file that contains only the DECEASED contents in columns like this:
So, I am trying to get the values of the DECEASED bit and align them in columns.
I have tried this:
import xml.etree.ElementTree as ET
import io
parsed = objectify.parse(open(path)) // path is where the xml file is saved
root = parsed.getroot()
data = []
for elt in root.BSB.DECEASED:
el_data = {}
for child in elt.getchildren():
el_data[child.tag] = child.text
data.append(el_data)
perf =pd.DataFrame(data).drop_duplicates(subset=None, keep='first', inplace=False)
print(perf)
perf.to_csv('DECESEAD.csv')
I get an empty dataset:
Empty DataFrame Columns: [] Index: []
Can anyone help me get the values inside the DECEASED tag, please?
CodePudding user response:
The code below collects the data you are looking for
import xml.etree.ElementTree as ET
from typing import Dict
xml = '''<BSB>
<APPLSUMMARY>
<MAIN W="S1" X="{ND}"/>
<COUNTS Z="0" AB="0" BB="0" CB="0" DB="0" EB="0" FB="0" GB="{ND}"/>
<SCOTDEBT OQB="{ND}"/>
<NOTICES HB="0" IB="3"/>
<SUB_BLOCKS C="3" D="3" E="1" F="0"/>
<ALIAS_NO UPB="0" VPB="{ND}" WPB="0"/>
<ASSOC_NO DD="0" ED="0" AC="0"/>
<ALERTSUMM PB="0" QB="0" RB="{ND}" SB="{ND}" TB="{ND}" UB="{ND}"/>
<HHOSUMM BC="{ND}" RGB="{ND}"/>
<TPD INB="{ND}" JNB="{ND}" KNB="{ND}" LNB="{ND}"/>
<OCCUPANCY AD="1"/>
<DECEASED LQB="1" FCC="{ND}" GCC="{ND}" HCC="{ND}" ICC="{ND}"/>
<IMPAIRED MQB="0"/>
<ACTIVITY JCC="{ND}" KCC="{ND}" LCC="{ND}"/>
<ADVERSE MCC="{ND}" HHC="{ND}"/>
</APPLSUMMARY>
</BSB>'''
def _clean_dict(attributes: Dict) -> Dict:
result = {}
for k, v in attributes.items():
if v[0] == '{':
val = v[1:-1]
else:
val = v
result[k] = val
return result
data = []
root = ET.fromstring(xml)
for d in root.findall('.//DECEASED'):
data.append(_clean_dict(d.attrib))
print(data)
output (list of dicts)
[{'LQB': '1', 'FCC': 'ND', 'GCC': 'ND', 'HCC': 'ND', 'ICC': 'ND'}]