I'm doing some test work. I am facing the following error:
Error in knn(train = Data_train, test = Data_test, cl = Data_train_label, :
'train' and 'class' have different lengths
Reproducible code:
# Load data
library(data.table)
Data = fread('https://raw.githubusercontent.com/justmarkham/DAT5/master/data/auto_mpg.txt', stringsAsFactors = FALSE)
Data
str(Data)
# K-mean
Data$cylinders = factor(Data$cylinders, levels = c('3', '4', '5', '6', '8'), label = c('3_cylinders', '4_cylinders', '5_cylinders',
'6_cylinders', '8_cylinders'))
round(prop.table(table(Data$cylinders))*100, digits = 1)
Data = Data[, c('cylinders', 'mpg', 'horsepower', 'displacement', 'weight', 'acceleration', 'model_year', 'origin', 'car_name')]
head(Data)
summary(Data[, c('mpg', 'horsepower')])
normalize = function(x) {return ((x-min(x) / max(x) - min(x)))}
Data_n = as.data.frame(lapply(Data[, 2:8], normalize))
smp_size = floor(0.8 * nrow(Data))
set.seed(123)
train_ind = sample(seq_len(nrow(Data)), size = smp_size)
Data_train = Data_n[train_ind, ]
Data_test = Data_n[-train_ind, ]
dim(Data_train)
dim(Data_test)
Data_train_label = Data[train_ind, 1]
Data_test_label = Data[-train_ind, 1]
length(Data_train_label)
length(Data_test_label)
#install.packages("class")
library('class')
Data_test_pred = knn(train = Data_train, test = Data_test, cl = Data_train_label, k=19)
#install.packages("gmodels")
library(gmodels)
CrossTable(x = Data_test_label$cylinders, y= Data_test_pred, prop.chisq=FALSE)
Obviously the thing is that when checking the length, I get the following:
> length(Data_train_label)
[1] 1
> length(Data_test_label)
[1] 1
and should receive :
> length(Data_train_label)
[1] 313
> length(Data_test_label)
[1] 79
Which is rather strange, I looked at other questions on this topic, but I didn’t find anything that could help me. Maybe Data_train_label
needs to be converted into a vector?
CodePudding user response:
The argument cl
for your classes is expecting a vector rather than data frame.
The following code should work:
Data_test_pred = knn(
train = Data_train, test = Data_test,
cl = as.vector(as.matrix(Data_train_label)), # Convert labels to vector
k=19
)
CodePudding user response:
It`s work for me
Data_train_label = Data[train_ind, 1]
Data_test_label = Data[-train_ind, 1]
length(Data_train_label$cylinders)
length(Data_test_label$cylinders)
#install.packages("class")
library('class')
Data_test_pred = knn(train = Data_train, test = Data_test, cl = Data_train_label$cylinders, k=19)