import numpy as np
group = np.array([0,0,0,0,1,1,1,1,1,1,2,2,2,2])
array = np.array([1,2,3,0,0,2,0,3,4,0,0,0,0,1])
targt = np.array([1,1,1,0,0,2,2,2,2,0,0,0,0,1])
def func(group: np.array, array: np.array):
pass
return array
- Step 1: Find the indexes of the first & last non-zero elements for each group, i.e. (0, 2) for group 0, (5, 8) for group 1, (13, 13) for group 2.
- Step 2: Replace the slices between each indexes with the first nan-zero value within each group, i,e, group 0 [1,2,3,0] -> [1,1,1,0], group 1 [0,2,0,3,4,0] -> [0,2,2,2,2,0], group 2 no change.
How could I do it without spliting arrays or iteration?
[Solutions]
def first_nonzero_index(arr: np.array, axis: int, mode: str = None, invalid_value: float = -1):
mask = arr != 0
if mode is None or mode == "head":
return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_value)
else:
return np.where(mask.any(axis=axis),
arr.shape[axis] - np.flip(mask, axis=axis).argmax(axis=axis) - 1, invalid_value)
def func1(group: np.array, array: np.array): # my solution
group_size = np.bincount(group)[:-1]
group_idx_end = np.cumsum(group_size)
array_split = np.split(array, group_idx_end)
concat_list = []
for arr in array_split:
idx_start = first_nonzero_index(arr, axis=0, mode="head")
if idx_start != -1:
idx_end = first_nonzero_index(arr, axis=0, mode="tail") 1
arr_ffill_first_nonzero = np.zeros_like(arr, dtype=float)
arr_ffill_first_nonzero[idx_start:idx_end] = arr[idx_start]
concat_list.append(arr_ffill_first_nonzero)
else:
concat_list.append(arr)
return np.hstack(concat_list)
def fill(val): # contributor @d.b
inds = np.where(val != 0)[0]
start, end = min(inds), max(inds)
fill_val = val[start]
val[start:end 1] = fill_val
return val
def split(val, grp): # contributor @d.b
arr = []
curr = [val[0]]
for i in range(1, len(val)):
if grp[i] == grp[i - 1]:
curr.append(val[i])
else:
arr.append(np.array(curr))
curr = [val[i]]
if curr:
arr.append(np.array(curr))
return arr
def func2(group, array): # contributor @d.b
return np.concatenate([fill(x) for x in split(array, group)])
def func3(group: np.array, array: np.array): # my solution
group_size = np.bincount(group)[:-1]
group_idx_end = np.cumsum(group_size)
array_split = np.split(array, group_idx_end)
concat_list = []
for arr in array_split:
idx = np.where(arr != 0)[0]
start, end = min(idx), max(idx)
arr[start:end 1] = arr[start]
concat_list.append(arr)
return np.hstack(concat_list)
def func4(group, array): # contributor @d.b
nonzero = (array != 0)
_, marker_idx = np.unique(group[nonzero], return_index=True)
nonzero_idx = np.arange(len(array))[nonzero]
#STEP 2
starts = np.minimum.reduceat(nonzero_idx, marker_idx)
ends = np.maximum.reduceat(nonzero_idx, marker_idx)
#STEP 3
values = array[starts]
out = np.zeros_like(array)
out[starts] = values
#check the case we can't insert the last negative value
if ends[-1] 1==len(array):
out[ends[:-1] 1] = -values[:-1]
else:
out[ends 1] = -values
return np.cumsum(out)
Output: [1. 1. 1. 0. 0. 2. 2. 2. 2. 0. 0. 0. 0. 1.]
[Test]
n = 2000
group = np.array([0,0,0,0,1,0,0,0,0,1,0,0,0,0,0]*n) # 30000
array = np.array([1,2,3,0,0,2,0,3,0,0,0,0,0,1,0]*n)
group = np.cumsum(group)
Run 100 times:
func1 | func2 | func3 | func4 (Best solution) |
---|---|---|---|
9.70s | 2.54s | 1.99s | 0.03s |
CodePudding user response:
group = np.array([0,0,0,0,1,1,1,1,1,1,2,2,2,2])
array = np.array([1,2,3,0,0,2,0,3,4,0,0,0,0,1])
targt = np.array([1,1,1,0,0,2,2,2,2,0,0,0,0,1])
You can do the following steps:
STEP 1. Find indices of nonzero items of
array
and mark the startings of new groupsnonzero_idx -> [*0,1,2,/,*/,5,/,7,8,/,*/,/,/,13] (cross out slashes) marker_idx -> [0, 4, 10]
STEP 2. Find starting and ending indices for each group, use
np.ufunc.reduceat
starts -> [ 0, 5, 13] ends -> [ 2, 8, 13]
STEP 3. Think of an
out
array such thatnp.cumsum(out)
collapses intotarget
array. Like so:[1,0,0,-1,0,2,0,0,0,-2,0,0,0,1] -> [1,1,1,0,0,2,2,2,2,0,0,0,0,1]
Now, code:
#STEP 1
nonzero = (array != 0)
_, marker_idx = np.unique(group[nonzero], return_index=True)
nonzero_idx = np.arange(len(array))[nonzero]
#STEP 2
starts = np.minimum.reduceat(nonzero_idx, marker_idx)
ends = np.maximum.reduceat(nonzero_idx, marker_idx)
#STEP 3
values = array[starts]
out = np.zeros_like(array)
out[starts] = values
#check the case we can't insert the last negative value
if ends[-1] 1==len(array):
out[ends[:-1] 1] = -values[:-1]
else:
out[ends 1] = -values
>>> np.cumsum(out)
array([1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1], dtype=int32)
No loops needed!
CodePudding user response:
1
import numpy as np
import pandas as pd
def foo(s):
chk = np.where(s > 0)[0]
start = min(chk)
end = max(chk)
ans = [True if (start <= ind <= end) else False for ind in range(len(s))]
return ans
pd.Series(array).groupby(group).transform(
lambda x: x.mask(foo(x), x[x > 0].iloc[0])).to_numpy()
# array([1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1])
2
def split(val, grp):
inds = np.where(np.append(False, grp[1:] != grp[:-1]))[0]
return np.array_split(val, inds)
def fill(val):
inds = np.where(val > 0)[0]
start, end = min(inds), max(inds)
val[start:end 1] = val[start]
return val
np.concatenate([fill(x) for x in split(array, group)])
# array([1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1])