I've implemented some sort of object stability calculator in pandas. But performance time is horrible. Can someone help me, please.
def calculate_stability(ind, df, sidx, max_k):
indexes = sidx[:, ind]
indexes = np.delete(indexes, np.where(indexes == ind))
d = 0
last_crtit_obj_count = 0
for j in range(max_k):
if df.at[ind, "Class"] == df.at[indexes[j], "Class"]:
d = d 1
if d / (j 1) > 1/2:
last_crtit_obj_count = (j 1)
print(f'\t Object {ind} = {last_crtit_obj_count / max_k}')
return last_crtit_obj_count / max_k
df.iloc was very slow. That's why I changed to df.at.
Code is here
Need to vectorized version of loop.
CodePudding user response:
Here is the version without the loop:
def calculate_stability(ind, df, sidx, max_k):
indexes = sidx[:, ind]
indexes = indexes[indexes != ind][:max_k]
# `d` contains all values from the first condition from the original loop:
d = (df["Class"][ind] == df["Class"][indexes]).cumsum()
# `j` contains all values from the original `range` 1:
j = np.arange(1, len(d) 1)
# select `last_crtit_obj_count` values:
crtit_objs = j[(d / j > 1 / 2)]
# calculate `last_crtit_obj_count / max_k`
result = crtit_objs[-1] / max_k if len(crtit_objs) else 0
print(f"\t Object {ind} = {result}")
return result