I made a PCA plot with autoplot(), but I wanted to have ellipses around only 2 of the groups instead of all 3. Therefore I switched to ggplot. However, it seems that my axes are different between autoplot and ggplot methods. Look at the difference between p1 and p2:
library(ggplot2)
library(ggfortify)
library(tidyr)
x <- iris[1:4]
pc <- prcomp(x)
df <- cbind(pc$x[,1:2], iris[,5]) %>% as.data.frame()
df$PC1 <- as.numeric(df$PC1)
df$PC2 <- as.numeric(df$PC2)
df$V3 <- as.factor(df$V3)
#ggplot method
p1 <- ggplot(df, aes(PC1, PC2, colour = V3))
geom_point(size = 3, aes(shape = V3))
stat_ellipse(geom = "polygon", aes(fill = after_scale(alpha(colour, 0))),
data = df[df$V3 == "1" | df$V3 == "2",], size = 1)
p1
#autoplot method
y <- prcomp(x)
x2 <- as.data.frame(cbind(x, iris[,5]))
x2$`iris[, 5]` <- as.factor(x2$`iris[, 5]`)
p2<- autoplot(y,
data = x2,
colour = 'iris[, 5]',
label = F,
shape = 'iris[, 5]',
size = 2)
p2
Created on 2022-02-22 by the reprex package (v2.0.1)
Why do I get different axes?
CodePudding user response:
In the autoplot method, the principal components are scaled, so to get the same result you would do:
x <- iris[1:4]
pc <- prcomp(x)
df <- cbind(pc$x[,1:2], iris[,5]) %>% as.data.frame()
df$PC1 <- as.numeric(df$PC1) / (pc$sdev[1] * sqrt(nrow(iris)))
df$PC2 <- as.numeric(df$PC2) / (pc$sdev[2] * sqrt(nrow(iris)))
df$V3 <- as.factor(df$V3)
#ggplot method
p1 <- ggplot(df, aes(PC1, PC2, colour = V3))
geom_point(size = 3, aes(shape = V3))
stat_ellipse(geom = "polygon", aes(fill = after_scale(alpha(colour, 0))),
data = df[df$V3 == "1" | df$V3 == "2",], size = 1)
p1