Home > Enterprise >  How to visualize cluster boundaries
How to visualize cluster boundaries

Time:12-10

I generated several datasets, and using classifiers, I predicted the distribution of clusters. I need to draw boundaries between clusters on the chart. In the form of lines or in the form of filled areas - it does not matter. Please let me know if there is any way to do this.

My code:



import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import make_moons, make_circles
from sklearn.model_selection import train_test_split


n_sample = 2000

def make_square(n_sample):
    data=np.array([0,[]])
    data[0] = np.random.sample((n_sample,2))
    for i in range(n_sample):
        if data[0][i][0] > 0.5 and data[0][i][1] > 0.5 or data[0][i][0] < 0.5 and data[0][i][1] < 0.5:
            data[1].append(1)
        else:
            data[1].append(0)
    return data



datasets = [
    make_circles(n_samples=n_sample, noise=0.09, factor=0.5),
    make_square(n_sample),
    make_moons(n_samples=n_sample, noise=0.12),
    
]

ks=[]
for data in datasets:
    
    X,y = data[0],data[1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=33) 
    classifier = KNeighborsClassifier(n_neighbors=1) 
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    acc =  classifier.score(X_test, y_test)

    
    accs = []
    for i in range(1, 8):
        knn = KNeighborsClassifier(n_neighbors=i)
        knn.fit(X_train, y_train)
        pred_i = knn.predict(X_test)
        acc0 =  knn.score(X_test, y_test)
        accs.append(acc0)
    plt.figure(figsize=(12, 6))
    plt.plot(range(1, 8), accs, color='red', linestyle='dashed', marker='o',
            markerfacecolor='blue', markersize=10)
    plt.title('accs Score K Value')
    plt.xlabel('K Value')
    plt.ylabel('accs Score')
    print("Max Score:", max(accs), "k=",accs.index(max(accs)) 1)
    ks.append(accs.index(max(accs)) 1)

for i in range(3):
    data = datasets[i]
    k = ks[i]
    X,y = data[0],data[1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=33) 
    classifier = KNeighborsClassifier(n_neighbors=k) 
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    plt.figure(figsize=(9,9))
    plt.title("Test")
    plt.scatter(X_test[:,0], X_test[:,1], c=y_test)
    
    plt.figure(figsize=(9,9))
    plt.title("Predict")
    plt.scatter(X_test[:,0], X_test[:,1], c=y_pred)
    

Example output:

Noisy moons dataset showing class a two class classification problem and a margin that roughly separates the purple points from the yellow points.

  • Related