I have the below lists examples. I want to get the following expected outputs.
example1:
list_for_compare = ['1', '4', '5', '7', '8', '9', '10']
mainlist = ['4', '7', '9']
expected_output1 = [["4", "5"], ["7", "8"], ["9", "10"]]
example2:
list_for_compare = ['1.2.1', '1.2.3', '2.2.5', '3.2.4', '5.4.3', '5.5.1', '5.8.2']
mainlist = ['1.2.2', '2.2.4', '5.4.2']
expected_output2 = [["1.2.2", "1.2.3"], ["2.2.4", "2.2.5", "3.2.4"], ["5.4.2", "5.5.1", "5.8.2"]]
I tried below code to find inbetween items.
def inbetweenitems_list(mainlist, inbetween):
withmidlist = []
for i in range(len(mainlist)-1):
found = filter(lambda x: mainlist[i] <= x <= mainlist[i 1], inbetween)
withmidlist.append([mainlist[i] , list(found) , mainlist[i 1]])
return withmidlist
The output is as below:
example1:
inbetweenitems_list(mainlist, list_for_compare)
output1 = [['4', ['4', '5', '7'], '7'], ['7', ['7', '8', '9'], '9']]
example2:
inbetweenitems_list(mainlist, list_for_compare)
output2 = [['1.2.2', ['1.2.3'], '2.2.4'], ['2.2.4', ['2.2.5', '3.2.4'], '5.4.2']]
I'm not finding many ways to filter and compare stringified numbers and get the expected outputs.
CodePudding user response:
This does what you ask. This also makes an entry for all the items prior to the first match; you can remove the top entry in the output list, if you want.
def process(list_for_compare, mainlist):
output = []
accum = []
nextup = mainlist.pop(0)
for item in list_for_compare:
if item >= nextup:
output.append(accum)
accum = []
nextup = mainlist.pop(0) if mainlist else 'zzzz'
accum.append( item )
output.append(accum)
return output
print(process( ['1', '4', '5', '7', '8', '9', '10'], ['4', '7', '9'] ) )
print(process( ['1.2.1', '1.2.3', '2.2.5', '3.2.4', '5.4.3', '5.5.1', '5.8.2'], ['1.2.2', '2.2.4', '5.4.2'] )
Output:
timr@Tims-NUC:~/src$ python x.py
[['1'], ['4', '5'], ['7', '8'], ['9', '10']]
[['1.2.1'], ['1.2.3'], ['2.2.5', '3.2.4'], ['5.4.3', '5.5.1', '5.8.2']]
CodePudding user response:
You can use a regular expression to extract all numbers from the strings and convert them to integers in a tuple. This will make them comparable using their numeric content.
import re
def sortedSplit(L,splits,key=None):
if not key: # convert string to comparable tuple of numbers
key = lambda v:tuple(map(int,re.findall(r"\d ",v)))
result = [[s] for s in splits] # split value is first of group
for v in L: # process values
for r in reversed(result): # find group to place value in
if key(v)<key(r[0]):continue # skip larger split values
if v != r[0] : r.append(v) # add to group
break # group found, process next value
return result
Output:
L = ['1', '4', '5', '7', '8', '9', '10']
S = ['4', '7', '9']
print(sortedSplit(L,S))
# [['4', '5', '7'], ['7', '8', '9'], ['9', '10']]
L = ['1.2.1', '1.2.3', '2.2.5', '3.2.4', '5.4.3', '5.5.1', '5.8.2']
S = ['1.2.2', '2.2.4', '5.4.2']
print(sortedSplit(L,S))
# [['1.2.2', '1.2.3'], ['2.2.4', '2.2.5', '3.2.4'],
['5.4.2', '5.4.3', '5.5.1', '5.8.2']]
To make it run slightly faster, you can use a binary search function to find the target group for each value. The bisect
module has a bisect_right
function that performs a binary search in an ordered list and returns an index.
import re
from bisect import bisect_right
def sortedSplit(L,splits,key=None):
if not key: # convert string to comparable tuple of numbers
key = lambda v:tuple(map(int,re.findall(r"\d ",v)))
result = [[s] for s in splits] # split value is first of group
splitKeys = list(map(key,splits)) # sorted keys in for binary search
for v in L: # process values
g = bisect_right(splitKeys,key(v))-1 # find target group for value
if g>=0 and splits[g] != v: # add if not = split value
result[g].append(v)
return result