How to modify the code using for loop in MATLAB-CodePudding

I have the following code in which first I read the dataset. Then I apply k-means clustering to the dataset. I want to modify the code using a for loop. I have three clusters. I want to plot them using for loop also calculate the mean of the first column of every cluster. I also want them to be using a single for loop instead of manually written code. How can I do that? Can anybody help me with that?

Dataset

0.119349659383,2765187888.188327790000,-50.272277924288,0.000010124208
0.119639999551,2780553879.583636760000,-45.173332876699,0.000015075661
0.119899673836,2765356033.223678110000,-50.327888424563,0.000010123978
0.120209965074,2780981089.939126490000,-45.152589356947,0.000015059274
0.120449679454,2765635512.158593650000,-50.363949423158,0.000010131346

dataset= readmatrix('newdata.txt');
clust = zeros(size(dataset,1),5);
for i=1:5
clust(:,i) = kmeans(dataset,i,'emptyaction','singleton',...
        'replicate',5);
figure;
[silh4,h] = silhouette(dataset,clust(:,i));
end
eva = evalclusters(dataset,clust,'silhouette');
K=eva.OptimalK;
[idx,C,sumdist] = kmeans(dataset,3,'Display','final','Replicates',5);
figure
gscatter(dataset(:,1),dataset(:,2),idx,'bgm')
hold on
plot(C(:,1),C(:,2),'kx')
legend('Cluster 1','Cluster 2','Cluster 3','Cluster Centroid')
%% This code to be using For Loop
dataset_idx=zeros(size(dataset,1));
dataset_idx=dataset(:,:);
dataset_idx(:,5)=idx;
cluster1 = dataset_idx(dataset_idx(:,5) == 1,:); 
cluster2 = dataset_idx(dataset_idx(:,5) == 2,:); 
cluster3 = dataset_idx(dataset_idx(:,5) == 3,:); 
figure;
scatter(cluster1(:,1),cluster1(:,2))

legend('Cluster 1')
title('Cluster 1')
figure;
scatter(cluster2(:,1),cluster2(:,2))

legend('Cluster 2')
title('Cluster 2')
figure;
scatter(cluster3(:,1),cluster3(:,2))

legend('Cluster 3')
title('Cluster 3')
%% This code to be using For Loop Instead of manually written so much lines
T=cluster1(:,1);
DeltaT = diff(T);
Mcluster1Timeseries = mean(DeltaT);
formatSpec = 'Mean DeltaT of Cluster 1 is N ';
fprintf(formatSpec,Mcluster1Timeseries)
Mcluster1Frequncy = mean(cluster1(:,2));
formatSpec = 'Mean Frequncy of Cluster 1 is N ';
fprintf(formatSpec,Mcluster1Frequncy)
Mcluster1Amplitude = max(cluster1(:,3));
formatSpec = 'Max Amplitude of Cluster 1 is %4.4f ';
fprintf(formatSpec,Mcluster1Amplitude)
Mcluster1PW = mean(cluster1(:,4));
formatSpec = 'Mean Pulse Width of Cluster 1 is N ';
fprintf(formatSpec,Mcluster1PW)
T2=cluster2(:,1);
DeltaT2 = diff(T2);
Mcluster2Timeseries = mean(DeltaT2);
formatSpec = 'Mean DeltaT of Cluster 2 is N ';
fprintf(formatSpec,Mcluster2Timeseries)
Mcluster2Frequncy = mean(cluster2(:,2));
formatSpec = 'Mean Frequncy of Cluster 2 is N ';
fprintf(formatSpec,Mcluster2Frequncy)
Mcluster2Amplitude = max(cluster2(:,3));
formatSpec = 'Max Amplitude of Cluster 2 is %4.4f ';
fprintf(formatSpec,Mcluster2Amplitude)
Mcluster2PW = mean(cluster2(:,4));
formatSpec = 'Mean Pulse Width of Cluster 2 is N ';
fprintf(formatSpec,Mcluster2PW)
T3=cluster3(:,1);
DeltaT3 = diff(T3);
Mcluster3Timeseries = mean(DeltaT3);
formatSpec = 'Mean DeltaT of Cluster 3 is N ';
fprintf(formatSpec,Mcluster3Timeseries)
Mcluster3Frequncy = mean(cluster3(:,2));
formatSpec = 'Mean Frequncy of Cluster 3 is N ';
fprintf(formatSpec,Mcluster3Frequncy)
Mcluster3Amplitude = max(cluster3(:,3));
formatSpec = 'Max Amplitude of Cluster 3 is %4.4f ';
fprintf(formatSpec,Mcluster3Amplitude)
Mcluster3PW = mean(cluster3(:,4));
formatSpec = 'Mean Pulse Width of Cluster 3 is N ';
fprintf(formatSpec,Mcluster3PW)

CodePudding user response：

In the below I've used a cell structure, though in this case you could also use a 3-dimensional array if you're so inclined. In order to get the "Cluster 1/2/3" labels to match, I just used string formatting a bit more.

Here's what I came up with.

dataset= readmatrix('newdata.txt');
clust = zeros(size(dataset,1),5);
for i=1:5
clust(:,i) = kmeans(dataset,i,'emptyaction','singleton',...
        'replicate',5);
figure;
[silh4,h] = silhouette(dataset,clust(:,i));
end
eva = evalclusters(dataset,clust,'silhouette');
K=eva.OptimalK;
[idx,C,sumdist] = kmeans(dataset,3,'Display','final','Replicates',5);
figure
gscatter(dataset(:,1),dataset(:,2),idx,'bgm')
hold on
plot(C(:,1),C(:,2),'kx')
legend('Cluster 1','Cluster 2','Cluster 3','Cluster Centroid')

dataset_idx=zeros(size(dataset,1));
dataset_idx=dataset(:,:);
dataset_idx(:,5)=idx;

clusters = cell(3,1);
for i = 1:3
    clusters{i} = dataset_idx(dataset_idx(:,5) == i,:);
    figure;
    scatter(clusters{i}(:,1),clusters{i}(:,2))
    legend(sprintf('Cluster %d',i))
    title(sprintf('Cluster %d',i))
end


for i = 1:3
    T = clusters{i}(:,1);
    DeltaT = diff(T);
    MclusterTimeseries = mean(DeltaT);
    formatSpec = 'Mean DeltaT of Cluster %d is N ';
    fprintf(formatSpec,i,MclusterTimeseries)
    MclusterFrequncy = mean(clusters{i}(:,2));
    formatSpec = 'Mean Frequncy of Cluster %d is N ';
    fprintf(formatSpec,i,MclusterFrequncy)
    MclusterAmplitude = max(clusters{i}(:,3));
    formatSpec = 'Max Amplitude of Cluster %d is %4.4f ';
    fprintf(formatSpec,i,MclusterAmplitude)
    Mcluster1PW = mean(clusters{i}(:,4));
    formatSpec = 'Mean Pulse Width of Cluster %d is N ';
    fprintf(formatSpec,i,Mcluster1PW)
end

Here is a further modification so that the command window output is more readable.

dataset= readmatrix('newdata.txt');
clust = zeros(size(dataset,1),5);
for i=1:5
clust(:,i) = kmeans(dataset,i,'emptyaction','singleton',...
        'replicate',5);
figure;
[silh4,h] = silhouette(dataset,clust(:,i));
end
eva = evalclusters(dataset,clust,'silhouette');
K=eva.OptimalK;
[idx,C,sumdist] = kmeans(dataset,3,'Display','final','Replicates',5);
figure
gscatter(dataset(:,1),dataset(:,2),idx,'bgm')
hold on
plot(C(:,1),C(:,2),'kx')
legend('Cluster 1','Cluster 2','Cluster 3','Cluster Centroid')

dataset_idx=zeros(size(dataset,1));
dataset_idx=dataset(:,:);
dataset_idx(:,5)=idx;

clusters = cell(3,1);
for i = 1:3
    clusters{i} = dataset_idx(dataset_idx(:,5) == i,:);
    figure;
    scatter(clusters{i}(:,1),clusters{i}(:,2))
    legend(sprintf('Cluster %d',i))
    title(sprintf('Cluster %d',i))
end


for i = 1:3
    T = clusters{i}(:,1);
    fprintf('\nCLUSTER %d:\n',i)
    DeltaT = diff(T);
    MclusterTimeseries = mean(DeltaT);
    formatSpec = 'Mean DeltaT of Cluster %d is N\n';
    fprintf(formatSpec,i,MclusterTimeseries)
    MclusterFrequncy = mean(clusters{i}(:,2));
    formatSpec = 'Mean Frequncy of Cluster %d is N\n';
    fprintf(formatSpec,i,MclusterFrequncy)
    MclusterAmplitude = max(clusters{i}(:,3));
    formatSpec = 'Max Amplitude of Cluster %d is %4.4f\n';
    fprintf(formatSpec,i,MclusterAmplitude)
    Mcluster1PW = mean(clusters{i}(:,4));
    formatSpec = 'Mean Pulse Width of Cluster %d is N\n';
    fprintf(formatSpec,i,Mcluster1PW)
end

Output text for the second of these scripts:

Replicate 1, 1 iterations, total sum of distances = 1.05391e 11.
Replicate 2, 1 iterations, total sum of distances = 1.02249e 11.
Replicate 3, 1 iterations, total sum of distances = 1.05391e 11.
Replicate 4, 1 iterations, total sum of distances = 1.02249e 11.
Replicate 5, 1 iterations, total sum of distances = 1.30309e 11.
Best total sum of distances = 1.02249e 11

CLUSTER 1:
Mean DeltaT of Cluster 1 is 5.500100e-04
Mean Frequncy of Cluster 1 is 2.765393e 09
Max Amplitude of Cluster 1 is -50.2723
Mean Pulse Width of Cluster 1 is 1.012651e-05

CLUSTER 2:
Mean DeltaT of Cluster 2 is  NaN
Mean Frequncy of Cluster 2 is 2.780981e 09
Max Amplitude of Cluster 2 is -45.1526
Mean Pulse Width of Cluster 2 is 1.505927e-05

CLUSTER 3:
Mean DeltaT of Cluster 3 is  NaN
Mean Frequncy of Cluster 3 is 2.780554e 09
Max Amplitude of Cluster 3 is -45.1733
Mean Pulse Width of Cluster 3 is 1.507566e-05