Home > Back-end >  Put all results from pairwise.t.test in a data.frame with cross_df
Put all results from pairwise.t.test in a data.frame with cross_df

Time:07-24

I'm trying to put the result obtained with pairwise.t.test into a table to use it later in a shinyApp to my analysis.

I have this data:

seguro1=structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20), tmp_habilit = c(20, 21, 25, 
25, 18, 23, 9, 12, 15, 14, 15, 10, 8, 7, 11, 10, 7, 9, 1, 3), 
estciv = c(3, 3, 3, 2, 2, 1, 3, 2, 1, 2, 1, 3, 2, 2, 1, 2, 
2, 1, 3, 1), nmultas = c(1, 0, 2, 3, 2, 2, 6, 4, 3, 2, 5, 
5, 4, 13, 15, 9, 6, 10, 8, 5), group = structure(c(2L, 2L, 
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L), .Label = c("2", "1", "3"), class = "factor"), 
group1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1", 
"2", "3"), class = "factor")), .Names = c("id", "tmp_habilit", 
"estciv", "nmultas", "group", "group1"), row.names = c(NA, -20L
), class = "data.frame")

And this function:

varNames <- names(seguro1)[2:4] # valor da coluna de cada variável desejável


res <- sapply(varNames, function(x) pairwise.t.test(seguro1[, x],seguro1$group,p.adj='bonferroni'), simplify = FALSE)

And output:

res[[1]]

#Pairwise comparisons using t tests with pooled SD
#
#data:  seguro1[, x] and seguro1$group
#
#  2       1
#1 4.8e-05 -
#3 0.0091  4.3e-07
#
#P value adjustment method: bonferroni

But, I would like to put these results in a table, like this:

# A tibble: 18 x 5
#   variaveis   grupos group1 group2     p.value
#   <chr>       <chr>  <chr>  <chr>        <dbl>
# 1 tmp_habilit group  1      2      0.0000727  
# 2 tmp_habilit group  3      2      0.0273     
# 3 tmp_habilit group  3      1      0.000000428
# 4 estciv      group  1      2      1          
# 5 estciv      group  3      2      1          
# 6 estciv      group  3      1      0.540      
# 7 nmultas     group  1      2      0.226      
# 8 nmultas     group  3      2      0.00178    
# 9 nmultas     group  3      1      0.0000488  
#10 tmp_habilit group1 2      1      0.0000727  
#11 tmp_habilit group1 3      1      0.000000428
#12 tmp_habilit group1 3      2      0.0273     
#13 estciv      group1 2      1      1          
#14 estciv      group1 3      1      0.540      
#15 estciv      group1 3      2      1          
#16 nmultas     group1 2      1      0.226      
#17 nmultas     group1 3      1      0.0000488  
#18 nmultas     group1 3      2      0.00178   

I tried this:

library(purrr)
library(dplyr)
library(tidyr)

cross_df(list(variaveis = variaveis, grupos = grupos)) %>%
  mutate(ttest = map2(
    variaveis, grupos, ~pairwise.t.test(seguro1[[.x]], seguro1[[.y]], "bonferroni"))
  ) %>%
  mutate(ttesttidy = map(ttest, broom::tidy)) %>%
  unnest(ttesttidy)

I tried add variables, like this:

list(variaveis = seguro1[c(2:4)], grupos = seguro1[c(5)]) %>%
cross_df() %>%
  mutate(ttest = map2(
    variaveis, grupos, ~pairwise.t.test(seguro1[[.x]], 
seguro1[[.y]], "bonferroni"))
  ) %>%
  mutate(ttesttidy = map(ttest, broom::tidy)) %>%
  unnest(ttesttidy)

But it does not work. I would like to do as shown in the table above, to improve the output of my analysis.

CodePudding user response:

Assuming grupos and variaveis are

grupos <- grep("group", names(seguro1), value = TRUE)
variaveis <- varNames

Then, the code is working fine

library(dplyr)
library(purrr)
library(tidyr)
library(broom)
crossing(variaveis = variaveis, grupos = grupos) %>% 
   mutate(ttest = map2(
    variaveis, grupos, ~pairwise.t.test(seguro1[[.x]], 
      seguro1[[.y]], "bonferroni"))
  ) %>%
   mutate(ttesttidy = map(ttest, broom::tidy)) %>% 
   unnest(ttesttidy)

-output

# A tibble: 18 × 6
   variaveis   grupos ttest      group1 group2     p.value
   <chr>       <chr>  <list>     <chr>  <chr>        <dbl>
 1 estciv      group  <prws.hts> 1      2      1          
 2 estciv      group  <prws.hts> 3      2      1          
 3 estciv      group  <prws.hts> 3      1      0.540      
 4 estciv      group1 <prws.hts> 2      1      1          
 5 estciv      group1 <prws.hts> 3      1      0.540      
 6 estciv      group1 <prws.hts> 3      2      1          
 7 nmultas     group  <prws.hts> 1      2      0.226      
 8 nmultas     group  <prws.hts> 3      2      0.00178    
 9 nmultas     group  <prws.hts> 3      1      0.0000488  
10 nmultas     group1 <prws.hts> 2      1      0.226      
11 nmultas     group1 <prws.hts> 3      1      0.0000488  
12 nmultas     group1 <prws.hts> 3      2      0.00178    
13 tmp_habilit group  <prws.hts> 1      2      0.0000727  
14 tmp_habilit group  <prws.hts> 3      2      0.0273     
15 tmp_habilit group  <prws.hts> 3      1      0.000000428
16 tmp_habilit group1 <prws.hts> 2      1      0.0000727  
17 tmp_habilit group1 <prws.hts> 3      1      0.000000428
18 tmp_habilit group1 <prws.hts> 3      2      0.0273     

In the OP's updated code with cross_df, the combinations are done on the column values instead of the column names and thus when we extract with seguro1 ([[), it would give subscript out of bounds

> list(variaveis = seguro1[c(2:4)], grupos = seguro1[c(5)]) 
$variaveis
   tmp_habilit estciv nmultas
1           20      3       1
2           21      3       0
3           25      3       2
4           25      2       3
5           18      2       2
6           23      1       2
7            9      3       6
8           12      2       4
9           15      1       3
10          14      2       2
11          15      1       5
12          10      3       5
13           8      2       4
14           7      2      13
15          11      1      15
16          10      2       9
17           7      2       6
18           9      1      10
19           1      3       8
20           3      1       5

$grupos
   group
1      1
2      1
3      1
4      1
5      1
6      1
7      2
8      2
9      2
10     2
11     2
12     2
13     2
14     3
15     3
16     3
17     3
18     3
19     3
20     3
  • Related