Say I have a list, L1, of pairs such as:
L = [[9,3],[3,7],[7,5], [5,3], [9,5], [9,7], [2,5], [7,2], [2,8]]
I would like to cluster together values from pairs that are completely connected
for example [9,3] is connected to [3,7], [9,5], and [9,7].[3,7] is also connected to [7,5] which is connected to [9,5] making this an overlapping group of pairs.
likewise [7,5],[2,5], and [7,2] are also a group of overlapping pairs
however, [2,8] would not be a group.
Hence, as a final result, I would like to have a list such as
G = [[3, 5, 7, 9], [2, 5, 7], [2, 8]]
Does anyone have any idea on how to do this efficiently?
Thank you :)
CodePudding user response:
I'm not sure if I found the most elegant way, but I think it solves your task.
L1 = [ [1,2], [2,6], [5,7], [1,6], [3,6], [2,3], [7,9], [6,7] ]
l1 = list(map(tuple, L1))
groups=set()
used = set()
#find groups:
for combi in itertools.combinations(l1,3):
t1, t2, t3 = combi
union = set(t1) | set(t2) | set(t3)
if len(union) == 3:
groups.add(tuple(union))
used.update([t1,t2,t3])
print(groups)
# {(1, 2, 6), (2, 3, 6)}
print(used)
# {(1, 2), (2, 3), (2, 6), (3, 6), (1, 6)}
#add tuples which are not in the groups:
result = list(groups) [tup for tup in l1 if tup not in used]
print(result)
[(1, 2, 6), (2, 3, 6), (5, 7), (7, 9), (6, 7)]
If you are searching for bigger chain / unknown length chains then this one works aswell. As in the comments already discussed, if data gets big the performance is probably suboptimal.
import itertools
from collections import Counter
#changed the input a bit for debugging reasons
L1 = [ [1,2], [2,5], [5,7], [7,8], [8,1], [5,4], [4,3], [3,7], [1,6], [5,6], [1,9] ]
l1 = list(map(tuple, L1))
len_chain_min = 3 # choose shortest valid chain
len_chain_max = 8 # choose highest valid chain
groups=set()
used = set()
#find groups:
for chains in range(len_chain_min, len_chain_max 1):
for combi in itertools.combinations(l1,chains):
combi_sets = list(map(set, [*combi]))
union = set.union(*combi_sets)
each_number_occurences = Counter((num for tupl in combi for num in tupl))
# chain is only valid if every number never occurs more than twice, no brnaches aloud
if (len(union) == chains) and (each_number_occurences.most_common(1)[0][1] <= 2):
groups.add(tuple(union))
used.update([*combi])
print(f"number of tuples: {chains}\nunion group: {tuple(union)}\nused tuples for this group: {combi_sets}\n")
# add tuples which are not in the groups:
result = sorted(list(groups),key=len) [tupl for tupl in l1 if tupl not in used]
print(f"{result=}")
Output:
number of tuples: 4
union group: (1, 2, 5, 6)
used tuples for this group: [{1, 2}, {2, 5}, {1, 6}, {5, 6}]
number of tuples: 4
union group: (3, 4, 5, 7)
used tuples for this group: [{5, 7}, {4, 5}, {3, 4}, {3, 7}]
number of tuples: 5
union group: (1, 2, 5, 7, 8)
used tuples for this group: [{1, 2}, {2, 5}, {5, 7}, {8, 7}, {8, 1}]
number of tuples: 5
union group: (1, 5, 6, 7, 8)
used tuples for this group: [{5, 7}, {8, 7}, {8, 1}, {1, 6}, {5, 6}]
number of tuples: 7
union group: (1, 2, 3, 4, 5, 7, 8)
used tuples for this group: [{1, 2}, {2, 5}, {8, 7}, {8, 1}, {4, 5}, {3, 4}, {3, 7}]
number of tuples: 7
union group: (1, 3, 4, 5, 6, 7, 8)
used tuples for this group: [{8, 7}, {8, 1}, {4, 5}, {3, 4}, {3, 7}, {1, 6}, {5, 6}]
result=[(1, 2, 5, 6), (3, 4, 5, 7), (1, 5, 6, 7, 8), (1, 2, 5, 7, 8), (1, 2, 3, 4, 5, 7, 8), (1, 3, 4, 5, 6, 7, 8), (1, 9)]
CodePudding user response:
L = np.array([[9,3],[3,7],[7,5], [5,3], [9,5], [9,7], [2,5], [7,2], [2,8]]) #change list to array
G = []
for j in range(0, len(L)):
f = np.where(L == L[j][0])[0]
o = np.where(L == L[j][0])[1]
o1 = o[f != j]
f = f[f != j]
used = []
for g in range(0, len(f)):
o2 = 1 - o1[g]
if [L[f[g]][o2],L[j][1]] in L.tolist() or [L[j][1], L[f[g]][o2]] in L.tolist():
coll = sorted([L[f[g]][o2],L[j][1], L[j][0]])
if len(f) > 1:
coll2 = coll.copy()
coll2.remove(L[j][0])
v = 0
while v < len(f):
coll2 = coll.copy()
coll2.remove(L[j][0])
added = []
for b in coll2:
added.append([L[f[v]][ 1 - o1[v]],b] in L.tolist() or [b, L[f[v]][ 1 - o1[v]]] in L.tolist())
if all(added):
coll.append(L[f[v]][ 1 - o1[v]])
coll = sorted(coll)
v =1
else:
v =1
if coll in G:
used.append(0)
else:
#print(j, g)
G.append(coll)
used.append(0)
elif coll in G:
used.append(0)
else:
G.append(coll)
used.append(0)
else:
pass
if 0 in used:
pass
else:
G.append(list(L[j]))
print(G)
not the most efficient way but it works