Home > Enterprise >  How to compute the between-group mean difference?
How to compute the between-group mean difference?

Time:11-30

By starting from this code, I would like to compute the between 7-time-point difference:

data <- out %>%
  group_by(tests0, GROUP) %>%
  summarise(
    all = list(across(starts_with("score")) %>%
                 {
                   tibble(
                     means   = data.frame(map(., ~ mean(.x, na.rm = TRUE)) %>% set_names(., str_replace(names(.), "\\D ", "mean"))),
                     stderrs = data.frame(map(., ~ std.error(.x, na.rm = TRUE)) %>% set_names(., str_replace(names(.), "\\D ", "stederr"))),
                     ttest1   = data.frame(possibly(~ reduce(., ~ t.test(.x, .y, paired = TRUE))["estimate"], NA)(.)),
                     ttest2   = data.frame(possibly(~ reduce(., ~ t.test(.x, .y, paired = TRUE))["p.value"], NA)(.))
                   )
                 })
  )>%
  unnest(all)

# Groups:   tests0 [6]
   tests0     GROUP    means$mean0 $mean7 stderrs$stederr0 $stederr7 ttest1$estimate ttest2$p.value
   <fct>      <fct>          <dbl>  <dbl>            <dbl>     <dbl>           <dbl>          <dbl>
 1 ADAS_CogT0 CONTROL         12.6   13.6            0.525     0.662          -1.15         0.00182
 2 ADAS_CogT0 TRAINING        14.0   12.6            0.613     0.570           1.40         0.00295
 3 PVF_T0     CONTROL         32.1   31.3            1.22      1.45            0.498        0.636  
 4 PVF_T0     TRAINING        31.6   34.3            1.37      1.51           -2.48         0.0102 
 5 ROCF_CT0   CONTROL         29.6   30.3            0.893     0.821          -0.180        0.835  
 6 ROCF_CT0   TRAINING        30.1   29.5            0.906     0.929           0.489        0.615  
 7 ROCF_IT0   CONTROL         12.8   12.2            0.563     0.683           0.580        0.356  
 8 ROCF_IT0   TRAINING        10.9   12.3            0.735     0.768          -1.44         0.0238 
 9 ROCF_RT0   CONTROL         12.1   12.5            0.725     0.797          -0.370        0.598  
10 ROCF_RT0   TRAINING        10.5   10.9            0.746     0.742          -0.534        0.370  
11 SVF_T0     CONTROL         35.5   34              1.05      1.15            1.42         0.107  
12 SVF_T0     TRAINING        34.1   32.9            1.04      1.16            0.962        0.231 

Or better to add a column reporting mean7column-difference betwenn group (CONTROL and TRAINING). Do you what I should enter intoo the code above.

Here the dataset, if someone would like to try to figure out a possible solution.

Thanks in advance

> dput(head(out, 100))
structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 
93, 94, 95, 96, 97, 98, 99, 100), GROUP = structure(c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = c("CONTROL", "TRAINING"), class = "factor"), 
    Gender = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 
    2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
    2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
    2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
    2L), .Label = c("M", "F"), class = "factor"), Age = c(74, 
    76, 81, 74, 69, 72, 75, 83, 78, 72, 82, 68, 72, 72, 73, 80, 
    69, 72, 70, 80, 75, 80, 78, 74, 82, 74, 80, 82, 78, 81, 66, 
    71, 70, 79, 78, 73, 72, 77, 77, 71, 83, 74, 70, 71, 77, 69, 
    67, 64, 79, 71, 77, 77, 73, 67, 68, 79, 81, 67, 84, 75, 80, 
    73, 68, 74, 77, 79, 79, 72, 73, 78, 76, 78, 77, 74, 78, 77, 
    77, 82, 77, 70, 77, 81, 79, 75, 74, 78, 69, 77, 73, 77, 70, 
    79, 70, 72, 77, 72, 71, 71, 73, 81), Education = c(18, 4, 
    8, 5, 8, 11, 5, 5, 4, 8, 8, 12, 5, 18, 13, 5, 13, 13, 5, 
    5, 13, 5, 3, 8, 17, 5, 8, 5, 5, 8, 17, 8, 18, 18, 13, 13, 
    13, 13, 15, 17, 8, 5, 5, 13, 8, 5, 11, 13, 8, 8, 8, 5, 13, 
    8, 5, 17, 8, 12, 13, 5, 8, 8, 8, 5, 3, 8, 18, 5, 8, 13, 8, 
    5, 17, 8, 5, 17, 5, 8, 11, 8, 8, 5, 12, 3, 8, 8, 8, 13, 5, 
    5, 8, 8, 13, 5, 5, 8, 13, 5, 8, 12), tests0 = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("ADAS_CogT0", 
    "PVF_T0", "ROCF_CT0", "ROCF_IT0", "ROCF_RT0", "SVF_T0"), class = "factor"), 
    score0 = c(14.66, 15.33, 17.33, 19, 7.66, 12.6, 18.67, 14.99, 
    17.99, 17.33, 13.66, 16.99, 10.66, 9.66, 14.99, 15.66, 13.33, 
    4.33, 14.33, 15.99, 16.33, 10.66, 14.66, 10.66, 19.33, 17.66, 
    15.99, 20.66, 20.6, 17, 10.33, 6.33, 6.66, 19.99, 13.33, 
    24.33, 12.33, 10.33, 12.33, 9.66, 10.99, 13.99, 23, 6.32, 
    11.32, 13.99, 14.66, 8.99, 14.33, 9.99, 7.33, 15.66, 14, 
    7.99, 23.32, 14.66, 9.99, 5.66, 6.99, 11.66, 10.33, 6.99, 
    19.32, NA, 10, 17.66, 13.66, 10.32, NA, NA, 8.66, 9, 6.99, 
    14.99, 9.66, 13.66, 15.32, 12, 14, 13.66, 11.99, 15.66, 16, 
    15, 16.99, 20, 11, 7.99, 8.33, 8.32, 14.99, 18.66, 10.33, 
    11.99, 9.32, 17, 14.33, 14.66, 16.6, 9.99), tests7 = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("ADAS_CogT7", 
    "PVF_T7", "ROCF_CT7", "ROCF_IT7", "ROCF_RT7", "SVF_T7"), class = "factor"), 
    score7 = c(16, 9.32, 21.33, 17, 8.32, 11, 14.99, 10.99, 17, 
    18.33, 13.32, 14.34, 8.99, 7, 11.99, 15.33, 6.99, 5.33, 12.32, 
    13, 21.32, 7.99, 13.33, 11.99, 17.32, 16.32, 16.33, 14.66, 
    18.99, 17.33, 7.99, 9.33, 10.99, NA, 12.99, 16.33, 21.66, 
    9, 9.34, 8.66, 8.33, 13.66, 15.66, 6.66, 10.99, 13.33, 13.33, 
    7.99, 11.99, 11.32, 7.33, 9.66, 6.99, NA, 15.99, 15.66, 14.66, 
    6.32, 7, 11, 14, 10.33, 24.66, NA, 14.99, NA, 15.99, 9.32, 
    NA, NA, 9.99, 9.33, 7.66, 17.33, 10.32, 16, 17, 12.99, 15, 
    14.33, 10, 14.99, 19, 13.99, 19.33, NA, 10, 6.99, 11.66, 
    6.66, 14.33, 16, 8.66, 10, NA, 20, 14.99, 19.66, 26.66, 8.99
    )), row.names = c(NA, -100L), class = c("tbl_df", "tbl", 
"data.frame"))
> 

CodePudding user response:

what_you_have %>%
  group_by(tests0) %>%
  arrange(GROUP) %>%
  mutate(mean7_ctrl_train_diff = diff(`$mean7`))

Using this data:

what_you_have = read.table(text = '  tests0     GROUP    means$mean0 $mean7 stderrs$stederr0 $stederr7 ttest1$estimate ttest2$p.value
 1 ADAS_CogT0 CONTROL         12.6   13.6            0.525     0.662          -1.15         0.00182
 2 ADAS_CogT0 TRAINING        14.0   12.6            0.613     0.570           1.40         0.00295
 3 PVF_T0     CONTROL         32.1   31.3            1.22      1.45            0.498        0.636  
 4 PVF_T0     TRAINING        31.6   34.3            1.37      1.51           -2.48         0.0102 
 5 ROCF_CT0   CONTROL         29.6   30.3            0.893     0.821          -0.180        0.835  
 6 ROCF_CT0   TRAINING        30.1   29.5            0.906     0.929           0.489        0.615  
 7 ROCF_IT0   CONTROL         12.8   12.2            0.563     0.683           0.580        0.356  
 8 ROCF_IT0   TRAINING        10.9   12.3            0.735     0.768          -1.44         0.0238 
 9 ROCF_RT0   CONTROL         12.1   12.5            0.725     0.797          -0.370        0.598  
10 ROCF_RT0   TRAINING        10.5   10.9            0.746     0.742          -0.534        0.370  
11 SVF_T0     CONTROL         35.5   34              1.05      1.15            1.42         0.107  
12 SVF_T0     TRAINING        34.1   32.9            1.04      1.16            0.962        0.231', header = T, check.names = FALSE)

CodePudding user response:

The data is created with columns as data.frame.

> str(data)
grouped_df [2 × 6] (S3: grouped_df/tbl_df/tbl/data.frame)
 $ tests0 : Factor w/ 6 levels "ADAS_CogT0","PVF_T0",..: 1 1
 $ GROUP  : Factor w/ 2 levels "CONTROL","TRAINING": 1 2
 $ means  :'data.frame':    2 obs. of  2 variables:
  ..$ mean0: num [1:2] 12.5 14
  ..$ mean7: num [1:2] 13.5 12.6
 $ stderrs:'data.frame':    2 obs. of  2 variables:
  ..$ stederr0: num [1:2] 0.574 0.613
  ..$ stederr7: num [1:2] 0.77 0.57
 $ ttest1 :'data.frame':    2 obs. of  1 variable:
  ..$ estimate: num [1:2] -1.24 1.4
 $ ttest2 :'data.frame':    2 obs. of  1 variable:
  ..$ p.value: num [1:2] 0.00471 0.00295
 - attr(*, "groups")= tibble [1 × 2] (S3: tbl_df/tbl/data.frame)
  ..$ tests0: Factor w/ 6 levels "ADAS_CogT0","PVF_T0",..: 1
  ..$ .rows : list<int> [1:1] 
  .. ..$ : int [1:2] 1 2
  .. ..@ ptype: int(0) 
  ..- attr(*, ".drop")= logi TRUE

So, we may need to unpack those data.frame columns before doing the diff

library(dplyr)
library(tidyr)
data  %>% 
   unpack(where(is.data.frame)) %>%
   mutate(mean7diff = c(NA, diff(mean7))) %>%
   ungroup

-output

# A tibble: 2 × 9
  tests0     GROUP    mean0 mean7 stederr0 stederr7 estimate p.value mean7diff
  <fct>      <fct>    <dbl> <dbl>    <dbl>    <dbl>    <dbl>   <dbl>     <dbl>
1 ADAS_CogT0 CONTROL   12.5  13.5    0.574    0.770    -1.24 0.00471    NA    
2 ADAS_CogT0 TRAINING  14.0  12.6    0.613    0.570     1.40 0.00295    -0.878

CodePudding user response:

Not sure if this is what you are looking for?

out %>% 
    drop_na() %>% 
    select(-c(ID, Gender, Age, Education)) %>% 
    group_by(tests0) %>% 
    ungroup() %>% 
    pivot_wider(names_from = GROUP, values_from = contains('score'), values_fn = mean) %>% 
    transmute(
        tests0 = tests0,
        diff0 = score0_TRAINING - score0_CONTROL,
        diff7 = score7_TRAINING - score7_CONTROL
    )
# A tibble: 1 x 3
  tests0     diff0  diff7
  <fct>      <dbl>  <dbl>
1 ADAS_CogT0  1.76 -0.878
  • Related