I have a list of dictionaries
[{'elementid': 'BsWfsElement.1.1', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86400', 'lat': '32.05570', 'paramname': 'multiplicity', 'paramvalue': '4'}, {'elementid': 'BsWfsElement.1.2', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86400', 'lat': '32.05570', 'paramname': 'peak_current', 'paramvalue': '-11'}, {'elementid': 'BsWfsElement.1.3', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86400', 'lat': '32.05570', 'paramname': 'cloud_indicator', 'paramvalue': '0'}, {'elementid': 'BsWfsElement.1.4', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86400', 'lat': '32.05570', 'paramname': 'ellipse_major', 'paramvalue': '5.8'}, {'elementid': 'BsWfsElement.2.1', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86350', 'lat': '32.02770', 'paramname': 'multiplicity', 'paramvalue': '0'}, {'elementid': 'BsWfsElement.2.2', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86350', 'lat': '32.02770', 'paramname': 'peak_current', 'paramvalue': '-16'}, {'elementid': 'BsWfsElement.2.3', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86350', 'lat': '32.02770', 'paramname': 'cloud_indicator', 'paramvalue': '0'}, {'elementid': 'BsWfsElement.2.4', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86350', 'lat': '32.02770', 'paramname': 'ellipse_major', 'paramvalue': '1.6'}, {'elementid': 'BsWfsElement.3.1', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86730', 'lat': '32.07100', 'paramname': 'multiplicity', 'paramvalue': '0'}, {'elementid': 'BsWfsElement.3.2', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86730', 'lat': '32.07100', 'paramname': 'peak_current', 'paramvalue': '-35'}, {'elementid': 'BsWfsElement.3.3', 'obstime': '2022-07-11T20:00:05', 'lon': '59.86730', 'lat': '32.07100', 'paramname': 'cloud_indicator', 'paramvalue': '0'}, {'elementid': 'BsWfsElement.3.4'
which I want to group by the id subsection in the key elementid
, in a way that appends the paramname
and paramvalue
values from dictionaries that have the .2
, .3
and .4
to the "first" dictionary that has the .1
ending, since every other item in the .2
, .3
and .4
dictionaries are duplicates. When this would be done, I'd remove the elementid
item and combine the paramname
and paramvalue
items.
So an example of my desired output in the end would then be
[{'obstime': '2022-07-11T20:00:05', 'lon': '59.86400', 'lat': '32.05570', 'multiplicity': '4', 'peak_current': '-11', 'cloud_indicator': '0', 'ellipse_major': '58'} ... ]
My code that creates the list of dictionaries from an XML file
from urllib.request import urlopen
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import fromstring, ElementTree
from itertools import groupby
from operator import itemgetter
file = urlopen('https://opendata.fmi.fi/wfs?service=WFS&version=2.0.0&request=getFeature&storedquery_id=fmi::observations::lightning::simple×tep=1&starttime=2022-07-11T20:00:00Z&endtime=2022-07-11T20:05:00Z')
data = file.read()
tree = ElementTree(fromstring(data))
root = tree.getroot()
paramnames = []
paramvalues = []
lon = []
lat = []
obstime = []
ids = []
ET.register_namespace('wfs', "http://www.opengis.net/wfs/2.0")
ET.register_namespace('gml', "http://www.opengis.net/gml/3.2")
ET.register_namespace('BsWfs', "http://xml.fmi.fi/schema/wfs/2.0")
ET.register_namespace('xsi', "http://www.w3.org/2001/XMLSchema-instance")
for pn in root.findall('.//{http://xml.fmi.fi/schema/wfs/2.0}ParameterName'):
pnstr = (pn.text.replace('', ''))
paramnames.append(pnstr)
for pv in root.findall('.//{http://xml.fmi.fi/schema/wfs/2.0}ParameterValue'):
pvstr = (pv.text.replace('', ''))
paramvalues.append(pvstr)
for ps in root.findall('.//{http://www.opengis.net/gml/3.2}pos'):
psstr = (ps.text.replace('', ''))
lons = psstr.split(None, 1)
del lons[-1]
lats = psstr.split(None, 2)
del lats [-0]
lon.append(lons[0])
lat.append(lats[0])
for tm in root.findall('.//{http://xml.fmi.fi/schema/wfs/2.0}Time'):
tmstr = (tm.text.replace('Z', ''))
obstime.append(tmstr)
for i in root.findall('.//{http://xml.fmi.fi/schema/wfs/2.0}BsWfsElement'):
idstr = i.get("{http://www.opengis.net/gml/3.2}id")
ids.append(idstr)
zippedlist = list(zip(ids, obstime, lon, lat, paramnames, paramvalues)))
dictnames = ('elementid', 'obstime', 'lon', 'lat', 'paramname', 'paramvalue')
list_of_dicts = [dict(zip(dictnames,l)) for l in zippedlist]
print(list_of_dicts)
I tried sorting them by the lon
item, but found out that it actually doesn't produce the result I wanted
list_of_dicts = sorted(list_of_dicts,
key = itemgetter('lon'))
for key, value in groupby(list_of_dicts,
key = itemgetter('lon')):
for k in value:
print(k)
print(list_of_dicts)
Output:
{'elementid': 'BsWfsElement.250.1', 'obstime': '2022-07-11T20:02:42', 'lon': '55.16820', 'lat': '30.88440', 'paramname': 'multiplicity', 'paramvalue': '1'}
{'elementid': 'BsWfsElement.250.2', 'obstime': '2022-07-11T20:02:42', 'lon': '55.16820', 'lat': '30.88440', 'paramname': 'peak_current', 'paramvalue': '21'}
{'elementid': 'BsWfsElement.250.3', 'obstime': '2022-07-11T20:02:42', 'lon': '55.16820', 'lat': '30.88440', 'paramname': 'cloud_indicator', 'paramvalue': '0'}
{'elementid': 'BsWfsElement.250.4', 'obstime': '2022-07-11T20:02:42', 'lon': '55.16820', 'lat': '30.88440', 'paramname': 'ellipse_major', 'paramvalue': '2.8'}
{'elementid': 'BsWfsElement.240.1', 'obstime': '2022-07-11T20:02:40', 'lon': '55.67710', 'lat': '31.12120', 'paramname': 'multiplicity', 'paramvalue': '1'}
{'elementid': 'BsWfsElement.240.2', 'obstime': '2022-07-11T20:02:40', 'lon': '55.67710', 'lat': '31.12120', 'paramname': 'peak_current', 'paramvalue': '109'}
{'elementid': 'BsWfsElement.240.3', 'obstime': '2022-07-11T20:02:40', 'lon': '55.67710', 'lat': '31.12120', 'paramname': 'cloud_indicator', 'paramvalue': '0'}
{'elementid': 'BsWfsElement.240.4', 'obstime': '2022-07-11T20:02:40', 'lon': '55.67710', 'lat': '31.12120', 'paramname': 'ellipse_major', 'paramvalue': '1.6'}
...
CodePudding user response:
from collections import defaultdict
combined_elements = defaultdict(dict)
for element in elements:
# get values
elementid = element['elementid'].rsplit('.',1)[0]
paramname = element['paramname']
paramvalue = element['paramvalue']
# remove keys
for key in ['elementid','paramname','paramvalue']:
element.pop(key)
# add to combined dict
element.update({paramname:paramvalue})
combined_elements[elementid].update(element)
# print elements
for elem in combined_elements.values():
print(elem)
I used your first list as elements
. The combined_elements still has the elementid
s as keys (without the last .x
part) so you can refer to them if you want.
Outputs:
{'obstime': '2022-07-11T20:02:42', 'lon': '55.16820', 'lat': '30.88440', 'multiplicity': '1', 'peak_current': '21', 'cloud_indicator': '0', 'ellipse_major': '2.8'}
{'obstime': '2022-07-11T20:02:40', 'lon': '55.67710', 'lat': '31.12120', 'multiplicity': '1', 'peak_current': '109', 'cloud_indicator': '0', 'ellipse_major': '1.6'}
CodePudding user response:
import re
import json
tmp={}
for x in data:
x['elementid']=re.sub(r'\.[0-9] $', '', x['elementid'])
idx = json.dumps({k: v for k,v in sorted(x.items()) if k not in ['paramname', 'paramvalue']})
try:
tmp[idx].append({x['paramname']: x['paramvalue']})
except KeyError:
tmp[idx]=[{x['paramname']: x['paramvalue']}]
ouput=[{**json.loads(k), **{k:v for x in list(tmp.values())[0] for k,v in x.items()}} for k,v in tmp.items()]
returns:
[{'elementid': 'BsWfsElement',
'lat': '32.05570',
'lon': '59.86400',
'obstime': '2022-07-11T20:00:05',
'multiplicity': '4',
'peak_current': '-11',
'cloud_indicator': '0',
'ellipse_major': '5.8'},
{'elementid': 'BsWfsElement',
'lat': '32.02770',
'lon': '59.86350',
'obstime': '2022-07-11T20:00:05',
'multiplicity': '4',
'peak_current': '-11',
'cloud_indicator': '0',
'ellipse_major': '5.8'},
{'elementid': 'BsWfsElement',
'lat': '32.07100',
'lon': '59.86730',
'obstime': '2022-07-11T20:00:05',
'multiplicity': '4',
'peak_current': '-11',
'cloud_indicator': '0',
'ellipse_major': '5.8'}]