my data
data <- structure(list(col1 = 1:9, col2 = 10:18, col3 = 16:24, col4 = 67:75,
col5 = c(19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L), GROUP = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA,
-9L))
Func:
combination <- list(c(1, 2), c(1, 3), c(1, 4),c(2,3),c(2,4),c(2,5),c(3,4),c(3,5))
wilcox.fun <- function(dat) {
do.call(rbind, lapply(combination, function(x) {
test <- wilcox.test(dat[[x[1]]], dat[[x[2]]], paired=TRUE)
data.frame(Test = sprintf('%s by %s', x[1],x[2]),
#W = round(test$statistic,4),
med = paste(median(dat[[x[1]]]),median(dat[[x[2]]])),
p = test$p.value)
}))
}
result <- purrr::map_df(split(data, data$GROUP), wilcox.fun, .id = 'Group')
I have a function that takes values from pairwise combinations and finds med
and p
.
I am not satisfied with the format in which it outputs the result.what I want to get:
resulte <- structure(list(Group = c(1L, 1L, 1L, 1L, 1L), Test = 1:5, med = c(5L,
14L, 20L, 71L, 19L), p = c("1 by 2: 0,00335343645494632\n1 by 3: 0,00335343645494632\n1 by 4: 0,00335343645494632",
"2 by 3: 0,00335343645494632\n2 by 4: 0,00335343645494632\n2 by 5: 0,00390625\n",
"3 by 4: 0,00335343645494632\n3 by 5: 0,325204163250902", NA,
NA)), class = "data.frame", row.names = c(NA, -5L))
CodePudding user response:
Maybe you are looking for map_dfr
? I also edited the function a bit:
library(tidyverse)
data <- structure(list(col1 = 1:9, col2 = 10:18, col3 = 16:24, col4 = 67:75,
col5 = c(19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L), GROUP = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA,
-9L))
#add another group here for example
data2 <- bind_rows(data, data |> mutate(GROUP = 2))
wilcox.fun <- function(dat){
t(combn(paste0("col", 1:5), 2)) |>
as.data.frame() |>
mutate(test = map2(V1, V2,
\(x, y) wilcox.test(dat[,x], dat[,y], paired=TRUE)),
p = map_dbl(test, \(x) x$p.value ),
Test = glue::glue("{V1} by {V2}")) |>
select(Test, p)
}
map_dfr(split(data2, data2$GROUP), wilcox.fun, .id = 'Group')
#> Group Test p
#> 1 1 col1 by col2 0.003353436
#> 2 1 col1 by col3 0.003353436
#> 3 1 col1 by col4 0.003353436
#> 4 1 col1 by col5 0.003906250
#> 5 1 col2 by col3 0.003353436
#> 6 1 col2 by col4 0.003353436
#> 7 1 col2 by col5 0.003906250
#> 8 1 col3 by col4 0.003353436
#> 9 1 col3 by col5 0.325204163
#> 10 1 col4 by col5 0.003906250
#> 11 2 col1 by col2 0.003353436
#> 12 2 col1 by col3 0.003353436
#> 13 2 col1 by col4 0.003353436
#> 14 2 col1 by col5 0.003906250
#> 15 2 col2 by col3 0.003353436
#> 16 2 col2 by col4 0.003353436
#> 17 2 col2 by col5 0.003906250
#> 18 2 col3 by col4 0.003353436
#> 19 2 col3 by col5 0.325204163
#> 20 2 col4 by col5 0.003906250
EDIT
Here is some code to get the requested format:
library(tidyverse)
result <- tibble(Group = unique(data$GROUP),
Test = 1:length(colnames(data)[grepl("col", colnames(data))]),
med = map2_dbl(Test, Group, \(x, y) median(data[data$GROUP == y,x])),
p = map2_chr(Test, Group, \(x,y){
my_dat <- data[data$GROUP == y,]
as.data.frame(t(combn(paste0("col", 1:5), 2))) |>
filter(V1 == paste0("col", x))|>
mutate(test = map2(V1, V2,
\(q, r) wilcox.test(my_dat[,q], my_dat[,r], paired=TRUE)),
p = map_dbl(test, \(j) j$p.value ),
out = paste0(x, " by ", {min(c(x 1,5))}:5, ": ", p, "\n")) |>
pull(out) |>
paste(collapse = "")
}))