I have 2 lists:
list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]
My goal is to remove the duplicate sublists from list_1
and also the strings in list_2
that have the same index as the sublists removed, while maintaining the order of the sublists.
I found that I can remove duplicate sublists while maintaining the order of the sublists in this SO using:
from itertools import *
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
seen = set()
seen_add = seen.add
if key is None:
for element in filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
list(unique_everseen(list_1, key=frozenset))
But it's not clear how to match the removed sublists indices on list_2
Optimal output:
new_list_1 = [[1,1], [1,3], [1,4]]
new_list_2 = ["string_1", "string_2", "string_4"]
CodePudding user response:
You can use zip
:
list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]
output_1, output_2 = [], []
seen = set()
for sublst, s in zip(list_1, list_2):
if (tup := tuple(sublst)) in seen:
continue
seen.add(tup)
output_1.append(sublst)
output_2.append(s)
print(output_1) # [[1, 1], [1, 3], [1, 4]]
print(output_2) # ['string_1', 'string_2', 'string_4']
Note that you need to store tuple(sublst)
(not sublst
itself) in the set seen
, since a sublst
like [1,1]
is not hashable.
CodePudding user response:
res1 = []
res2 = []
for i in range(len(list_1)):
if list_1[i] not in list_1[:i]:
res1.append(list_1[i])
res2.append(list_2[i])
or
res1, res2 = map(list, zip(*[(list_1[i], list_2[i]) for i in range(len(list_1)) if list_1[i] not in list_1[:i]]))
CodePudding user response:
You just need to keep an additional list to keep track of duplicates in list_1 and then use the lists of comprehension to shorten the resulting lists.
list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]
selector = [True] * len(list_1)
for i in range(len(list_1)):
for j in range(i):
if list_1[j] == list_1[i]:
selector[i] = False
break
new_list_1 = [element for i, element in enumerate(list_1) if selector[i]]
new_list_2 = [element for i, element in enumerate(list_2) if selector[i]]
CodePudding user response:
Solution with set()
and zip
def deduplicate(items):
seen = set()
for a, b in items:
a = tuple(a)
if not a in seen:
seen.add(a)
yield list(a), b
list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]
tmp = list(deduplicate(zip(list_1, list_2)))
print([x[0] for x in tmp])
print([x[1] for x in tmp])
# [[1, 1], [1, 3], [1, 4]]
# ['string_1', 'string_2', 'string_4']
CodePudding user response:
Another implementation based on your code, simply modifying your function to return the indexes of unique elements instead of the elements themselves:
from itertools import *
list_1 = [[1,1], [1,3], [1,1], [1,4]]
list_2 = ["string_1", "string_2", "string_3", "string_4"]
def unique_everseen(iterable):
# Lists indexes of unique elements in iterable, preserving order.
seen = set()
seen_add = seen.add
for element in iterable:
k = frozenset(element)
if k not in seen:
seen_add(k)
yield iterable.index(element)
unique_indexes = [i for i in unique_everseen(list_1)]
print(unique_indexes)
filtered_list_1 = [list_1[i] for i in unique_indexes]
filtered_list_2 = [list_2[i] for i in unique_indexes]
print(filtered_list_1)
print(filtered_list_2)