Remove and Merge data with different name in JSON object || Python-CodePudding

all I want to merge duplicate values based on BeginOffset and EndOffset values, and change the names according to the duplicated values consider this, The input i have is this

data = [
{
  "Text": "First",
  "Id": 1,
  "BeginOffset": 60,
  "EndOffset": 73,
  "Score": 0.5620501637458801,
  "Category": "Testing",
  "Type": "Name",
    
},
{
  "Text": "Second",
  "Id": 5,
  "BeginOffset": 60,
  "EndOffset": 73,
  "Score": 0.959932804107666,
  "Category": "Testing 2",
  "Type": "Name 2"
},
{
  "Text": "Third",
  "Id": 2,
  "BeginOffset": 85,
  "EndOffset": 94,
  "Score": 0.9013960361480713,
  "Category": "Testing 3",
  "Type": "Value"}]

And the output i want

data = [
{
  "Text": "First",
  "Id": 1,
  "BeginOffset": 60,
  "EndOffset": 73,
  "Score-1": 0.5620501637458801,
  "Category-1": "Testing",
  "Type-1": "Name",
  "Score-2": 0.959932804107666,
  "Category-2": "Testing 2",
  "Type-2": "Name 2"
    
},
{
  "Text": "Third",
  "Id": 2,
  "BeginOffset": 85,
  "EndOffset": 94,
  "Score": 0.9013960361480713,
  "Category": "Testing 3",
  "Type": "Value",
  
}]

You can see the second data has been merged with the first data cause they have the same BeginOffset and EndOffset same, so we merge them both into one single object with different name.

Can anyone help in this, Thanks in advance...

CodePudding user response：

You could do it like this. There may be better ways:

data = [
    {
        "Text": "First",
        "Id": 1,
        "BeginOffset": 60,
        "EndOffset": 73,
        "Score": 0.5620501637458801,
        "Category": "Testing",
        "Type": "Name",

    },
    {
        "Text": "Second",
        "Id": 5,
        "BeginOffset": 60,
        "EndOffset": 73,
        "Score": 0.959932804107666,
        "Category": "Testing 2",
        "Type": "Name 2"
    },
    {
        "Text": "Third",
        "Id": 2,
        "BeginOffset": 85,
        "EndOffset": 94,
        "Score": 0.9013960361480713,
        "Category": "Testing 3",
        "Type": "Value"}
]

BO = 'BeginOffset'
EO = 'EndOffset'
SC = 'Score'
CA = 'Category'
TY = 'Type'
N = '_n'

output = []

def get_match(jdata, dc):
    for d in jdata:
        if d[BO] == dc[BO] and d[EO] == dc[EO]:
            return d

for d in data:
    d.setdefault(N, 0)
    if (dc := get_match(output, d)):
        if dc[N] == 0:
            for k in CA, SC, TY:
                dc[f'{k}-1'] = dc[k]
                del dc[k]
            dc[N] = 1   
        dc[N]  = 1
        for k in CA, SC, TY:
            dc[f'{k}-{dc[N]}'] = d[k]
    else:
        output.append(d)

for d in output:
    del d[N]

print(output)

Output:

[{'Text': 'First', 'Id': 1, 'BeginOffset': 60, 'EndOffset': 73, 'Category-1': 'Testing', 'Score-1': 0.5620501637458801, 'Type-1': 'Name', 'Category-2': 'Testing 2', 'Score-2': 0.959932804107666, 'Type-2': 'Name 2'}, {'Text': 'Third', 'Id': 2, 'BeginOffset': 85, 'EndOffset': 94, 'Score': 0.9013960361480713, 'Category': 'Testing 3', 'Type': 'Value'}]