all I want to merge duplicate values based on BeginOffset and EndOffset values, and change the names according to the duplicated values consider this, The input i have is this
data = [
{
"Text": "First",
"Id": 1,
"BeginOffset": 60,
"EndOffset": 73,
"Score": 0.5620501637458801,
"Category": "Testing",
"Type": "Name",
},
{
"Text": "Second",
"Id": 5,
"BeginOffset": 60,
"EndOffset": 73,
"Score": 0.959932804107666,
"Category": "Testing 2",
"Type": "Name 2"
},
{
"Text": "Third",
"Id": 2,
"BeginOffset": 85,
"EndOffset": 94,
"Score": 0.9013960361480713,
"Category": "Testing 3",
"Type": "Value"}]
And the output i want
data = [
{
"Text": "First",
"Id": 1,
"BeginOffset": 60,
"EndOffset": 73,
"Score-1": 0.5620501637458801,
"Category-1": "Testing",
"Type-1": "Name",
"Score-2": 0.959932804107666,
"Category-2": "Testing 2",
"Type-2": "Name 2"
},
{
"Text": "Third",
"Id": 2,
"BeginOffset": 85,
"EndOffset": 94,
"Score": 0.9013960361480713,
"Category": "Testing 3",
"Type": "Value",
}]
You can see the second data has been merged with the first data cause they have the same BeginOffset and EndOffset same, so we merge them both into one single object with different name.
Can anyone help in this, Thanks in advance...
CodePudding user response:
You could do it like this. There may be better ways:
data = [
{
"Text": "First",
"Id": 1,
"BeginOffset": 60,
"EndOffset": 73,
"Score": 0.5620501637458801,
"Category": "Testing",
"Type": "Name",
},
{
"Text": "Second",
"Id": 5,
"BeginOffset": 60,
"EndOffset": 73,
"Score": 0.959932804107666,
"Category": "Testing 2",
"Type": "Name 2"
},
{
"Text": "Third",
"Id": 2,
"BeginOffset": 85,
"EndOffset": 94,
"Score": 0.9013960361480713,
"Category": "Testing 3",
"Type": "Value"}
]
BO = 'BeginOffset'
EO = 'EndOffset'
SC = 'Score'
CA = 'Category'
TY = 'Type'
N = '_n'
output = []
def get_match(jdata, dc):
for d in jdata:
if d[BO] == dc[BO] and d[EO] == dc[EO]:
return d
for d in data:
d.setdefault(N, 0)
if (dc := get_match(output, d)):
if dc[N] == 0:
for k in CA, SC, TY:
dc[f'{k}-1'] = dc[k]
del dc[k]
dc[N] = 1
dc[N] = 1
for k in CA, SC, TY:
dc[f'{k}-{dc[N]}'] = d[k]
else:
output.append(d)
for d in output:
del d[N]
print(output)
Output:
[{'Text': 'First', 'Id': 1, 'BeginOffset': 60, 'EndOffset': 73, 'Category-1': 'Testing', 'Score-1': 0.5620501637458801, 'Type-1': 'Name', 'Category-2': 'Testing 2', 'Score-2': 0.959932804107666, 'Type-2': 'Name 2'}, {'Text': 'Third', 'Id': 2, 'BeginOffset': 85, 'EndOffset': 94, 'Score': 0.9013960361480713, 'Category': 'Testing 3', 'Type': 'Value'}]