import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler,normalize
from sklearn.metrics import silhouette_score
newdf = pd.read_csv("D:\DATASETS/CC_GENERAL.csv")
x = newdf.drop('CUST_ID',axis = 1)
x.fillna(method = 'ffill',inplace = True)
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)
x_normalized = normalize(x_scaled)
#CONVERTING THE NUMPY ARRAY INTO A PANDAS DATAFRAME
x_normalized = pd.DataFrame(x_normalized)
#REDUCING THE DIMENTIONALITY OF THE DATA!
pca = PCA(n_components= 2)
x_principal = pca.fit_transform(x_normalized)
x_principal = pd.DataFrame(x_normalized)
x_principal = ['P1','P2']
ac2 = AgglomerativeClustering(n_clusters = 2)
plt.figure(figsize = (6,6))
plt.scatter(x_principal['P1'],x_principal['P2'])
c= ac2.fit_predict((x_principal),cmap = 'rainbow')
plt.show()
and this is my error:
TypeError Traceback (most recent call last)
<ipython-input-61-56f631c43c3e> in <module>
3 #visualizing the cluster
4 plt.figure(figsize = (6,6))
----> 5 plt.scatter(x_principal['P1'],x_principal['P2'])
6 c= ac2.fit_predict((x_principal),cmap = 'rainbow')
7 plt.show()
TypeError: list indices must be integers or slices, not str
CodePudding user response:
If you are trying to update the columns names for x_principal, which is more likely, you should be using x_principal.columns = ['P1, 'P2']
, right now you are assigning those values, which is overwriting the data
CodePudding user response:
x_principal
is a list containing two strings P1 and P2. So x_principal['P1']
is wrong. You can not index list elements with the element itself.