wondering how I can order the clusters on y-axis by decreasing count of kiwi?
df = data.frame()
df = data.frame(matrix(df, nrow=200, ncol=2))
colnames(df) <- c("cluster", "name")
df$cluster <- sample(20, size = nrow(df), replace = TRUE)
df$fruit <- sample(c("banana", "apple", "orange", "kiwi", "plum"), size = nrow(df), replace = TRUE)
p = ggplot(df, aes(x = as.factor(cluster), fill = as.factor(fruit)))
geom_bar(stat = 'count')
theme_classic()
coord_flip()
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20))
theme(legend.text = element_text(size = 20))
xlab("Cluster")
ylab("Fruit count")
labs( fill = "")
p
CodePudding user response:
I would probably do this as a data manipulation before plotting. Note I have moved kiwi to the first position in the stacking order so we can see the bars getting smaller as we move down the y axis.
library(tidyverse)
df %>%
mutate(cluster = factor(cluster,
names(sort(table(fruit == 'kiwi', cluster)[2,]))),
fruit = factor(fruit, c('kiwi', 'apple', 'banana',
'orange', 'plum'))) %>%
ggplot(aes(x = cluster, fill = fruit))
geom_bar(position = position_stack(reverse = TRUE))
theme_classic()
coord_flip()
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20))
theme(legend.text = element_text(size = 20))
scale_fill_manual(values = c('olivedrab', 'yellowgreen', 'yellow2',
'orange2', 'plum4'))
xlab("Cluster")
ylab("Fruit count")
labs( fill = "")
CodePudding user response:
No need to modify the data, just use x = reorder(cluster, fruit=='kiwi', sum)
in aes()
(instead of as.factor(cluster)
).
ggplot(df, aes(x = reorder(cluster, fruit=='kiwi', sum),
fill = as.factor(fruit)))
geom_bar(stat = 'count')
theme_classic()
coord_flip()
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20))
theme(legend.text = element_text(size = 20))
xlab('Cluster')
ylab('Fruit count')
labs(fill = '')
CodePudding user response:
Maybe there would be a more efficient way to do this, but one possibility is to count the number of times kiwi occurs for each cluster, then arrange the cluster varaible by that. Note that in this example there can be NAs for the number of kiwis (so we set those instances to 0).
order <- df %>%
# count how many times kiwi occurs per cluster
count(fruit, cluster) %>% filter(fruit == 'kiwi')
df <- df %>%
# join the counts to the original df by cluster
left_join(order %>% select(cluster, n)) %>%
# if na make zero (otherwise NAs appear at the top of the plot)
mutate(n = ifelse(is.na(n), 0, n),
# arrange the clusters by n
cluster = fct_reorder(as.factor(cluster), n))
and then then your plotting function should give the desired output.
CodePudding user response:
Compute total kiwis by group, then convert cluster to a factor ordered by this grouped kiwi total. Using dplyr and forcats::fct_reorder()
:
set.seed(13)
library(dplyr)
library(forcats)
df <- df %>%
group_by(cluster) %>%
mutate(n_kiwi = sum(fruit == "kiwi")) %>%
ungroup() %>%
mutate(cluster = fct_reorder(factor(cluster), n_kiwi))
p = ggplot(df, aes(x = cluster, fill = fruit))
geom_bar(stat = 'count')
theme_classic()
coord_flip()
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20))
theme(legend.text = element_text(size = 20))
xlab("Cluster")
ylab("Fruit count")
labs( fill = "")
p