How to remove items from 2 lists based on duplicate items in one of the lists while maintaining orde-CodePudding

I have 2 lists:

list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]

My goal is to remove the duplicate sublists from list_1 and also the strings in list_2 that have the same index as the sublists removed, while maintaining the order of the sublists.

I found that I can remove duplicate sublists while maintaining the order of the sublists in this SO using:

from itertools import * 
def unique_everseen(iterable, key=None):
    "List unique elements, preserving order. Remember all elements ever seen."
    seen = set()
    seen_add = seen.add
    if key is None:
        for element in filterfalse(seen.__contains__, iterable):
            seen_add(element)
            yield element
    else:
        for element in iterable:
            k = key(element)
            if k not in seen:
                seen_add(k)
                yield element

list(unique_everseen(list_1, key=frozenset))

But it's not clear how to match the removed sublists indices on list_2

Optimal output:

new_list_1 = [[1,1], [1,3], [1,4]]
new_list_2 = ["string_1", "string_2", "string_4"]

CodePudding user response：

You can use zip:

list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]

output_1, output_2 = [], []

seen = set()

for sublst, s in zip(list_1, list_2):
    if (tup := tuple(sublst)) in seen:
        continue

    seen.add(tup)

    output_1.append(sublst)
    output_2.append(s)

print(output_1) # [[1, 1], [1, 3], [1, 4]]
print(output_2) # ['string_1', 'string_2', 'string_4']

Note that you need to store tuple(sublst) (not sublst itself) in the set seen, since a sublst like [1,1] is not hashable.

CodePudding user response：

res1 = []
res2 = []

for i in range(len(list_1)):
    if list_1[i] not in list_1[:i]:
        res1.append(list_1[i])
        res2.append(list_2[i])

res1, res2 = map(list, zip(*[(list_1[i], list_2[i]) for i in range(len(list_1)) if list_1[i] not in list_1[:i]]))

CodePudding user response：

You just need to keep an additional list to keep track of duplicates in list_1 and then use the lists of comprehension to shorten the resulting lists.

list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]

selector = [True] * len(list_1)
for i in range(len(list_1)):
    for j in range(i):
        if list_1[j] == list_1[i]:
            selector[i] = False
            break

new_list_1 = [element for i, element in enumerate(list_1) if selector[i]]
new_list_2 = [element for i, element in enumerate(list_2) if selector[i]]

CodePudding user response：

Solution with set() and zip

def deduplicate(items):
     seen = set()
     for a, b in items:
         a = tuple(a)
         if not a in seen:
             seen.add(a)
             yield list(a), b

list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]        
tmp = list(deduplicate(zip(list_1, list_2)))
print([x[0] for x in tmp])
print([x[1] for x in tmp])


# [[1, 1], [1, 3], [1, 4]]
# ['string_1', 'string_2', 'string_4']

CodePudding user response：

Another implementation based on your code, simply modifying your function to return the indexes of unique elements instead of the elements themselves:

from itertools import *

list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]
 
def unique_everseen(iterable):
#   Lists indexes of unique elements in iterable, preserving order.
    seen = set()
    seen_add = seen.add
    for element in iterable:
        k = frozenset(element)
        if k not in seen:
            seen_add(k)
            yield iterable.index(element)

unique_indexes = [i for i in unique_everseen(list_1)]
print(unique_indexes)
filtered_list_1 = [list_1[i] for i in unique_indexes]
filtered_list_2 = [list_2[i] for i in unique_indexes]
print(filtered_list_1)
print(filtered_list_2)