I have the following dataset:
df <- structure(list(var = c("a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b"),
beta_2 = c(-0.0441739987111475, -0.237256549142376, -0.167105040977351,
-0.140660549127359, -0.0623609020878716, -0.279740636040755,
-0.0211523654970921, 0.135368375550385, -0.0612770247281429,
-0.13183964102725, 0.363736380163624, -0.0134490092107583,
-0.0179957210095045, -0.00897746346470879, -0.0588242539401108,
-0.0571976057977875, -0.0290052449275881, 0.263181562031473,
0.00398338217426211, 0.0945495450635497), beta_3 = c(8.54560737016843e-05,
-0.0375859675101865, -0.0334219898732454, 0.0332275634691021,
6.41499442849741e-05, -0.0200724300602369, 8.046644459034e-05,
0.0626880671346749, 0.066218613897726, 0.0101268565262127,
0.44671567722757, 0.180543425234781, 0.526177616390516, 0.281245231195401,
-0.0362628519010746, 0.0609803646123324, 0.104137160504616,
0.804375133555955, 0.211218123083386, 0.824756942938928),
beta_4 = c(-8.50289708803184e-06, 0.0376601781861706, 0.104418586040791,
-0.0949557776511923, 2.11896613386966e-05, 0.0969765824620132,
4.95280289930771e-06, -0.0967836292162074, -0.132623370126544,
0.0579395551175153, -0.140392004360494, 0.00950912868877355,
-0.388317615535003, -0.0282634228070272, 0.0547116932731301,
0.0119441792873249, -0.0413015877795695, -0.720387490330028,
-0.0321860166581817, -0.627489324697221)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -20L))
df
# # A tibble: 20 × 4
# var beta_2 beta_3 beta_4
# <chr> <dbl> <dbl> <dbl>
# 1 a -0.0442 0.0000855 -0.00000850
# 2 a -0.237 -0.0376 0.0377
# 3 a -0.167 -0.0334 0.104
# 4 a -0.141 0.0332 -0.0950
# 5 a -0.0624 0.0000641 0.0000212
# ...
I would like to summarise each beta_
column grouped by var
so that I have the mean of beta_2
, beta_3
and beta_4
in a single cell.
I can do it with the following code:
df %>%
pivot_longer(!var) %>%
group_by(var, name) %>%
summarise(mean_beta = mean(value) %>% round(2), .groups = "drop") %>%
aggregate(mean_beta ~ var, ., function(x) paste0(x, collapse = ", ")) %>%
as_tibble()
# # A tibble: 2 × 2
# var mean_beta
# <chr> <chr>
# 1 a -0.1, 0.01, 0
# 2 b 0.05, 0.34, -0.19
I'm looking for a more straightforward, tidyverse
-only solution. I have tried using map
inside summarise
but couldn't get what I wanted. Any idea?
CodePudding user response:
You may do the following -
library(dplyr)
df %>%
group_by(var) %>%
summarise(mean_beta = cur_data() %>%
summarise(across(.fns =
~.x %>% mean(na.rm = TRUE) %>% round(2))) %>%
unlist() %>% toString())
# var mean_beta
# <chr> <chr>
#1 a -0.1, 0.01, 0
#2 b 0.05, 0.34, -0.19
cur_data()
provides the sub-data within each group as dataframe that can be summarised for each column and concatenated together.
CodePudding user response:
Another possible solution:
library(tidyverse)
df %>%
group_by(var) %>%
summarise(across(everything(), mean)) %>%
{bind_cols(var=.$var, mean_betas=apply(., 1, \(x) str_c(x[-1], collapse = ", ")))}
#> # A tibble: 2 × 2
#> var mean_betas
#> <chr> <chr>
#> 1 a "-0.10101983, 0.008141079, -0.002735024"
#> 2 b " 0.05400016, 0.340388682, -0.190217246"