Home > Back-end >  Shortening codes for aggregation and plot
Shortening codes for aggregation and plot

Time:10-01

I have a data which is like this:

a=c(2015,2010,2010,2010,2015)

b=c(100,20,50,40,170)

c=c(20,40,10,50,60)

d=cbind(a,b,c)

I want a function that is as short as possible to produce below table(count is the summation based on a) and gives the output a plot(ggplot) on which the x-axis is the column "a" each of which has two bars(2010 with b and c and 2015 with b and c) and the y axis is "count":

a count col
2010 110 b
2010 100 c
2015 270 b
2015 80 c

CodePudding user response:

This is not as short as more experienced people in R could get, but this is how I would do it:

library(ggplot2)
library(tidyr)

a <- c(2015,2010,2010,2010,2015)
b <- c(100,20,50,40,170)
c <- c(20,40,10,50,60)
d <- data.frame(a,b,c)
joined <- aggregate(d[,c(2,3)], by = list(d$a), FUN = sum)

data_long <- gather(joined, condition, measurement, b,c, factor_key=TRUE)
colnames(data_long) <- c("year","col", "count")

data_long <- data_long[order(data_long$year, data_long$col),]
data_long$year <- as.factor(data_long$year)
data_long$col <- as.factor(data_long$col)

ggplot(data_long, aes(x = year, y = count,fill=col))  
  geom_bar(stat="identity",position="dodge")
  

CodePudding user response:

A possible solution:

library(dplyr)
library(ggplot2)

a=c(2015,2010,2010,2010,2015)
b=c(100,20,50,40,170)
c=c(20,40,10,50,60)
d=cbind(a,b,c)

d <- as.data.frame(d)

myplot <- function(d)
{
  d1 <- d %>% 
    group_by(a) %>% 
    summarise(count = sum(b)) %>%
    mutate(col = "b")
  
  d2 <- d %>% 
    group_by(a) %>% 
    summarise(count = sum(c)) %>% 
    mutate(col = "c")
  
  # This is your table
  z <- rbind(d1,d2) %>% 
    arrange(a)
  
  ggplot()  
    geom_bar(data = z, aes(x = a, y = count, fill = col), 
             position = "dodge", stat = "identity")  
    scale_x_continuous(breaks=unique(a))
}

myplot(d)

EDITED:

An even shorter solution:

library(dplyr)
library(ggplot2)
library(purrr)

a=c(2015,2010,2010,2010,2015)
b=c(100,20,50,40,170)
c=c(20,40,10,50,60)
d=cbind(a,b,c)

d <- as.data.frame(d)

myplot <- function(d)
{
  z <- map_df(c("b","c"),
              ~ d %>% 
                group_by(a) %>% 
                summarise(count = sum(!!sym(.x))) %>%
                mutate(col = .x))
  
  ggplot()  
    geom_bar(data = z, aes(x = a, y = count, fill = col), 
             position = "dodge", stat = "identity")  
    scale_x_continuous(breaks=unique(a))
}

myplot(d)
  • Related