how to overcome the "'numpy.ndarray' object is not callable" error?-CodePudding

I looked into the anomaly detection using both PCA and Autoencoder using the codes from the following link: Machine learning for anomaly detection and condition monitoring and I try to run the code part for using PCA with Mahalanobis Distance, however, if I run the code I always get the exception message and it turns out the problem is with the covariance matrix function part where the error 'numpy.ndarray' object is not callable appears. I tried to create new variables, change the dataframe into NumPy but nothing worked what is causing this error?

Code:

def cov_matrix(data, verbose=False):
    # data = pd.DataFrame(data).to_numpy()
    print('calculating the covaraince matrix')
    covariance_matrix = np.cov(data, rowvar=False)
    print('Done the covaraince matrix')
    if is_pos_def(covariance_matrix):
        inv_covariance_matrix = np.linalg.inv(covariance_matrix)
        if is_pos_def(inv_covariance_matrix):
            return covariance_matrix, inv_covariance_matrix
        else:
            print("Error: Inverse of Covariance Matrix is not positive definite!")
    else:
        print("Error: Covariance Matrix is not positive definite!")
        
def MahalanobisDist(inv_cov_matrix, mean_distr, data, verbose=False):
    inv_covariance_matrix = inv_cov_matrix
    vars_mean = mean_distr
    diff = data - vars_mean
    md = []
    for i in range(len(diff)):
        md.append(np.sqrt(diff[i].dot(inv_covariance_matrix).dot(diff[i])))
    return md

def MD_detectOutliers(dist, extreme=False, verbose=False):
    k = 3. if extreme else 2.
    threshold = np.mean(dist) * k
    outliers = []
    for i in range(len(dist)):
        if dist[i] >= threshold:
            outliers.append(i)  # index of the outlier
    return np.array(outliers)

def MD_threshold(dist, extreme=False, verbose=False):
    k = 3. if extreme else 2.
    threshold = np.mean(dist) * k
    return threshold

    #### Main code:
    # Inputting the training and test dataframes:
    data_train = np.array(principalDf_C0.values)
    data_test_C1 = np.array(principalDf_C1.values)
    data_test_C2 = np.array(principalDf_C2.values)
    data_test_C3 = np.array(principalDf_C4.values)
    data_test_C4 = np.array(principalDf_C5.values)
    
    print('Training Dataframe: ', data_train[:,])
    print('Test1 Dataframe: ', data_test_C1)
    print('Test2 Dataframe: ', data_test_C2)
    print('Test3 Dataframe: ', data_test_C3)
    print('Test4 Dataframe: ', data_test_C4)
    
    data_train_df = pd.DataFrame(principalDf_C0.values)
    data_test_df_C1 =  pd.DataFrame(principalDf_C1.values)
    data_test_df_C2 =  pd.DataFrame(principalDf_C2.values)
    data_test_df_C3 =  pd.DataFrame(principalDf_C4.values)
    data_test_df_C4 =  pd.DataFrame(principalDf_C5.values)
    
    # Calculating the covariance matrix:
    cov_matrix, inv_cov_matrix = cov_matrix(data=data_train)
    
    # Calculating the mean value for the input variables:
    mean_distr = data_train_df.mean(axis=0)
    
    # Calculating the Mahalanobis distance and threshold value to flag datapoints as an anomaly:
    dist_test_C1 = MahalanobisDist(inv_cov_matrix, mean_distr, data_test_df_C1, verbose=True)
    dist_test_C2 = MahalanobisDist(inv_cov_matrix, mean_distr, data_test_df_C2, verbose=True)
    dist_test_C3 = MahalanobisDist(inv_cov_matrix, mean_distr, data_test_df_C3, verbose=True)
    dist_test_C4 = MahalanobisDist(inv_cov_matrix, mean_distr, data_test_df_C4, verbose=True)
    dist_train = MahalanobisDist(inv_cov_matrix, mean_distr, data_train_df, verbose=True)
    threshold = MD_threshold(dist_train, extreme = True)

    # Distribution of Threshold value for flagging an anomaly:
    plt.figure()
    sns.distplot(np.square(dist_train),bins = 10, kde= False)
    # plt.xlim([0.0,15])
    plt.show()
    
    plt.figure()
    sns.distplot(dist_train, bins = 10, kde= True, color = 'green');
    # plt.xlim([0.0,5])
    plt.xlabel('Mahalanobis dist')
    plt.show()
    
    anomaly_train = pd.DataFrame(index=data_train_df.index)
    anomaly_train['Mob dist']= dist_train
    anomaly_train['Thresh'] = threshold
    # If Mob dist above threshold: Flag as anomaly
    anomaly_train['Anomaly'] = anomaly_train['Mob dist'] > anomaly_train['Thresh']
    anomaly_train.index = X_train_PCA.index
    
    anomaly_C1 = pd.DataFrame(index=data_test_df_C1.index)
    anomaly_C1['Mob dist']= dist_test_C1
    anomaly_C1['Thresh'] = threshold
    # If Mob dist above threshold: Flag as anomaly
    anomaly_C1['Anomaly'] = anomaly_C1['Mob dist'] > anomaly_C1['Thresh']
    anomaly_C1.index = data_test_df_C1.index
    anomaly_C1.head()
    
    anomaly_C2 = pd.DataFrame(index=data_test_df_C2.index)
    anomaly_C2['Mob dist']= dist_test_C2
    anomaly_C2['Thresh'] = threshold
    # If Mob dist above threshold: Flag as anomaly
    anomaly_C2['Anomaly'] = anomaly_C2['Mob dist'] > anomaly_C2['Thresh']
    anomaly_C2.index = data_test_df_C2.index
    anomaly_C2.head()
    
    anomaly_C3 = pd.DataFrame(index=data_test_df_C3.index)
    anomaly_C3['Mob dist']= dist_test_C3
    anomaly_C3['Thresh'] = threshold
    # If Mob dist above threshold: Flag as anomaly
    anomaly_C3['Anomaly'] = anomaly_C3['Mob dist'] > anomaly_C3['Thresh']
    anomaly_C3.index = data_test_df_C3.index
    anomaly_C3.head()
    
    anomaly_C4 = pd.DataFrame(index=data_test_df_C4.index)
    anomaly_C4['Mob dist']= dist_test_C4
    anomaly_C4['Thresh'] = threshold
    # If Mob dist above threshold: Flag as anomaly
    anomaly_C4['Anomaly'] = anomaly_C4['Mob dist'] > anomaly_C4['Thresh']
    anomaly_C4.index = data_test_df_C4.index
    anomaly_C4.head()

    final_scored = pd.concat([anomaly_train, anomaly_C1, anomaly_C2, anomaly_C3, anomaly_C4])
    print(final_scored)
except Exception:
    print('Cannot implement Anomaly detection using Mahalanobis distance metric')
    pass

CodePudding user response：

Per your comment, you have a namespace collision between a var cov_matrix and a function cov_matrix()

Change that line to e.g.

matrix, inv_matrix = cov_matrix(data=data_train)

And update your code accordingly, or rename cov_matrix(). A good convention is that functions which return things should have verbs in their name, e.g. generate_cov_matrix() or calculate_cov_matrix().*

(Yes, as written the code should run once, since AFAICS you don't call cov_matrix() again after that, but I'm guessing you're using a persistent interpreter session and evaluating the code again once cov_matrix() has been overwritten.)

*This convention assumes that functions are there to have side effects, and return things exceptionally. Of course if you are writing functionally, and having side effects is the exception not the rule, you would likely want to invert it, or follow another convention entirely.

CodePudding user response：

My guess is that you are running into an issue where you have a variable named cov_matrix and a function named cov_matrix. At some point I think you overwrote the function with the variable, which is a numpy.ndarray. Later you try calling the function cov_matrix(), but the object is actually the variable, i.e. the numpy array.