Home > Net >  How to group pairs in a list based on share items and connections in python
How to group pairs in a list based on share items and connections in python

Time:07-10

Say I have a list, L1, of pairs such as:

L = [[9,3],[3,7],[7,5], [5,3], [9,5], [9,7], [2,5], [7,2], [2,8]]

I would like to cluster together values from pairs that are completely connected

for example [9,3] is connected to [3,7], [9,5], and [9,7].[3,7] is also connected to [7,5] which is connected to [9,5] making this an overlapping group of pairs.

likewise [7,5],[2,5], and [7,2] are also a group of overlapping pairs

however, [2,8] would not be a group.

Hence, as a final result, I would like to have a list such as

G = [[3, 5, 7, 9], [2, 5, 7], [2, 8]]

Does anyone have any idea on how to do this efficiently?

Thank you :)

CodePudding user response:

I'm not sure if I found the most elegant way, but I think it solves your task.

L1 = [ [1,2], [2,6], [5,7], [1,6], [3,6], [2,3], [7,9], [6,7] ]
l1 = list(map(tuple, L1))
groups=set()
used = set()
#find groups:
for combi in itertools.combinations(l1,3):
    t1, t2, t3 = combi
    union = set(t1) | set(t2) | set(t3)
    if len(union) == 3:
        groups.add(tuple(union))
        used.update([t1,t2,t3])
print(groups)
# {(1, 2, 6), (2, 3, 6)}
print(used)
# {(1, 2), (2, 3), (2, 6), (3, 6), (1, 6)}

#add tuples which are not in the groups:
result = list(groups)   [tup for tup in l1 if tup not in used]
print(result)

[(1, 2, 6), (2, 3, 6), (5, 7), (7, 9), (6, 7)]

If you are searching for bigger chain / unknown length chains then this one works aswell. As in the comments already discussed, if data gets big the performance is probably suboptimal.

import itertools
from collections import Counter

#changed the input a bit for debugging reasons
L1 = [ [1,2], [2,5], [5,7], [7,8], [8,1], [5,4], [4,3], [3,7], [1,6], [5,6], [1,9] ]

l1 = list(map(tuple, L1))
len_chain_min = 3 # choose shortest valid chain
len_chain_max = 8 # choose highest valid chain
groups=set()
used = set()

#find groups:
for chains in range(len_chain_min, len_chain_max 1):
    for combi in itertools.combinations(l1,chains):
        combi_sets = list(map(set, [*combi]))
        union = set.union(*combi_sets)
        each_number_occurences = Counter((num for tupl in combi for num in tupl))
        # chain is only valid if every number never occurs more than twice, no brnaches aloud
        if (len(union) == chains) and (each_number_occurences.most_common(1)[0][1] <= 2):
            groups.add(tuple(union))
            used.update([*combi])
            print(f"number of tuples: {chains}\nunion group: {tuple(union)}\nused tuples for this group: {combi_sets}\n")

# add tuples which are not in the groups:
result = sorted(list(groups),key=len)   [tupl for tupl in l1 if tupl not in used]
print(f"{result=}")

Output:

number of tuples: 4
union group: (1, 2, 5, 6)
used tuples for this group: [{1, 2}, {2, 5}, {1, 6}, {5, 6}]

number of tuples: 4
union group: (3, 4, 5, 7)
used tuples for this group: [{5, 7}, {4, 5}, {3, 4}, {3, 7}]

number of tuples: 5
union group: (1, 2, 5, 7, 8)
used tuples for this group: [{1, 2}, {2, 5}, {5, 7}, {8, 7}, {8, 1}]

number of tuples: 5
union group: (1, 5, 6, 7, 8)
used tuples for this group: [{5, 7}, {8, 7}, {8, 1}, {1, 6}, {5, 6}]

number of tuples: 7
union group: (1, 2, 3, 4, 5, 7, 8)
used tuples for this group: [{1, 2}, {2, 5}, {8, 7}, {8, 1}, {4, 5}, {3, 4}, {3, 7}]

number of tuples: 7
union group: (1, 3, 4, 5, 6, 7, 8)
used tuples for this group: [{8, 7}, {8, 1}, {4, 5}, {3, 4}, {3, 7}, {1, 6}, {5, 6}]

result=[(1, 2, 5, 6), (3, 4, 5, 7), (1, 5, 6, 7, 8), (1, 2, 5, 7, 8), (1, 2, 3, 4, 5, 7, 8), (1, 3, 4, 5, 6, 7, 8), (1, 9)]

CodePudding user response:

L = np.array([[9,3],[3,7],[7,5], [5,3], [9,5], [9,7], [2,5], [7,2], [2,8]]) #change list to array
G = []
for j in range(0, len(L)):
    
    f = np.where(L == L[j][0])[0]
    o = np.where(L == L[j][0])[1]
    o1 = o[f != j]
    f = f[f != j]
    used = []
    for g in range(0, len(f)):
        o2 = 1 - o1[g]
        if [L[f[g]][o2],L[j][1]] in L.tolist() or [L[j][1], L[f[g]][o2]] in L.tolist():
            coll = sorted([L[f[g]][o2],L[j][1], L[j][0]])
            if len(f) > 1:
                coll2 = coll.copy()
                coll2.remove(L[j][0])
                v = 0
                while v < len(f):
                    coll2 = coll.copy()
                    coll2.remove(L[j][0])
                    added = []
                    for b in coll2:
                        added.append([L[f[v]][ 1 - o1[v]],b] in L.tolist() or [b, L[f[v]][ 1 - o1[v]]] in L.tolist())
                    if all(added):   
                        coll.append(L[f[v]][ 1 - o1[v]])
                        coll = sorted(coll)
                        v  =1
                    else:
                        v  =1
                if coll in G:
                    used.append(0)
                else:
                    #print(j, g)
                    G.append(coll)
                    used.append(0)                          
            elif coll in G:
                used.append(0)
            else:
                G.append(coll)
                used.append(0)
        else:
          pass
    if 0 in used:
        pass
    else:
        G.append(list(L[j]))

print(G)

not the most efficient way but it works

  • Related