I am trying to add labels to sit above box plots. For example, in this example, instead of NA, I would want the label above A to say "total number of var3 = 11" and over B "total number of var3 = 34". In my real data, numbers are produced, but they bear no relation to the original data set (I cannot work out how they could possibly be calculated from the original data, so I must be doing something wrong!).
var1<- c("A", "B", "A", "B", "B", "B", "A", "B", "B")
var2<- as.numeric(c(4:12))
var3<- as.numeric(c(1:9))
df<- data.frame(var1, var2, var3)
stat_box_data <- function(y, upper_limit = max(df$var2) * 1.15 ) {
return(
data.frame(
y = 0.95* upper_limit,
label = paste('number of var1 =', length(y), '\n',
'total number of var3 =', sum(df$var3[y])
)
)
)
}
ggplot(df, aes(var1, var2))
geom_boxplot()
stat_summary( fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9)
df%>% group_by (var1) %>% summarise (sum = sum(var3))
You can automate this a little bit using this
group1 <- df%>%
filter(var1 == "A")
group2 <- df %>%
filter(var1 == "B")
stat_box_data <- function(y,upper_limit = max(df$var2) * 1.15, y2 = df[c(1,3)]) {
return(
data.frame(
y = 0.95* upper_limit,
label = paste('number of var1 =', length(y), '\n',
'total number of var3 =', ifelse(sum(df$var1 == "A") < length(y), sum(group2$var3), sum(group1$var3)) , '\n'
#print(z)
)
)
)
}
ggplot(df, aes(var1, var2, group = var1))
geom_boxplot()
stat_summary( fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9)
CodePudding user response:
You could get the result you want using this rather convoluted method.
library(dplyr)
library(ggplot2)
var1<- c("A", "B", "A", "B", "B", "B", "A", "B", "B")
var2<- as.numeric(c(4:12))
var3<- as.numeric(c(1:9))
df<- data.frame(var1, var2, var3)
stat_box_data <- function(y, upper_limit = max(df$var2) * 1.15) {
return(
data.frame(
y = 0.95* upper_limit,label = paste('count =', length(y), '\n',
'mean =', sum(df$var3[match(y, df$var2)]), '\n'
)
)
)
}
d<-df%>% group_by (var1) %>% summarise (sum = sum(var3)) %>% pull(sum)
ggplot(df, aes(var1, var2))
geom_boxplot()
stat_summary(fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9)