How to improve the efficiency of a code in Python-CodePudding

I have an array of Pe of shape (5100,5100). I am trying to find the neighbor indices using the code below but for this shape of Pe, the computational time is 100 seconds. Is there a more time efficient way to do it?

def get_neighbor_indices(position, dimensions):
    '''
    dimensions is a shape of np.array
    '''
    i, j = position
    indices = [(i 1,j), (i-1,j), (i,j 1), (i,j-1)]
    return [
        (i,j) for i,j in indices
        if i>=0 and i<dimensions[0]
            and j>=0 and j<dimensions[1]
        ]

def iterate_array(init_i, init_j, arr, condition_func):
    '''
    arr is an instance of np.array
    condition_func is a function (value) => boolean
    '''
    indices_to_check = [(init_i,init_j)]
    checked_indices = set()
    result = []
    t0 = None
    t1 = None
    timestamps = []
    while indices_to_check:
        pos = indices_to_check.pop()
        if pos in checked_indices:
            continue
        item = arr[pos]
        checked_indices.add(pos)
        if condition_func(item):
            result.append(item)
            t1=time.time()
            if(t0==None):
                t0=t1
            timestamps.append(t1-t0)
            indices_to_check.extend(
                get_neighbor_indices(pos, arr.shape)
            )
    return result,timestamps


Visited_Elements,timestamps=iterate_array(0,0, Pe, lambda x : x < Pin0)

CodePudding user response：

With scipy and a slight change in the way the filter condition is described this can be made rather fast:

import numpy as np
from scipy.ndimage import label
import time

def collect_array(init_i, init_j, arr, condition_arr):
    t0=time.time()
    if not condition_arr[init_i, init_j]:
        return [], time.time() - t0
    
    islands = label(condition_arr)[0]
    mask = islands != islands[init_i, init_j]
    mx = np.ma.masked_array(Pe, mask)
    return mx.compressed(), time.time() - t0


# Trying it out
Pe = np.random.rand(5100, 5100)
Pin0 = 0.7

Visited_Elements, timestamp = collect_array(0,0, Pe, Pe < Pin0)

print(Visited_Elements,timestamp)

The core of the code is the label function and the fact that the condition function is replaced by a boolean array.