Home > database >  Get coefficents from a list of tibble
Get coefficents from a list of tibble

Time:10-06

I have a list of tibbles (too many to do it manually) which are made up of data I need to tabulate. I want to get the p-values, but let say I want to get multiple columns and multiple rows from sthg like this:

sample$ppara
# A tibble: 9 × 8
effect   group    term            estimate std.error    df statistic   p.value
  <chr>    <chr>    <chr>              <dbl>     <dbl> <dbl>     <dbl>     <dbl>
1 fixed    NA       (Intercept)      8.23      0.572     129   14.4     1.27e-28
2 fixed    NA       edad0           -0.00300   0.00771   129   -0.389   6.98e- 1
3 fixed    NA       time            -0.00579   0.0980     93   -0.0591  9.53e- 1
4 fixed    NA       grup_int         0.104     0.116     129    0.895   3.73e- 1
5 fixed    NA       time:grup_int   -0.0453    0.0457     93   -0.990   3.25e- 1
6 ran_pars id       sd_(Intercept)   0.819    NA          NA   NA      NA       
7 ran_pars id       cor_time.(Inte… -0.827    NA          NA   NA      NA       
8 ran_pars id       sd_time          0.309    NA          NA   NA      NA       
9 ran_pars Residual sd_Observation   0.356    NA          NA   NA      NA       

Specifically I want to retain the name of variable (ppara) when tabulating results

I am looking for 2 approaches: old base syntax and/or apply and the purrr::map functions

#1
out <- map2_df(.x = sample,
               .y = names(sample),
               .f = ~ {
                 temp <- sample[[.]]$p.value
                 })

#2
out <- map2_df(.x = sample,
               .y = names(sample),
               .f = ~ {
                 temp <- sample[[.x]]$p.value
                 })



sapply(sample, function(x) x[5,8])

Desired output

Var     term         p.value
ppara   time          0.048
ppara   grup_int       0.7
pparg   time          0.03
pparg   grup_int       0.9
lrp1     time         0.048
lrp1   grup_int       0.7

The database

sample <- 
list(ppara = structure(list(effect = c("fixed", "fixed", "fixed", 
"fixed", "fixed", "ran_pars", "ran_pars", "ran_pars", "ran_pars"
), group = c(NA, NA, NA, NA, NA, "id", "id", "id", "Residual"
), term = c("(Intercept)", "edad0", "time", "grup_int", "time:grup_int", 
"sd_(Intercept)", "cor_time.(Intercept)", "sd_time", "sd_Observation"
), estimate = c(8.23144341495959, -0.00299788328827405, -0.00578981152447268, 
0.103780244730389, -0.0452613750584519, 0.818909993116146, -0.827336729479646, 
0.30909890538813, 0.355517251103403), std.error = c(0.571643342465992, 
0.00770825512293783, 0.0979783085964459, 0.116001719438183, 0.0457364054219781, 
NA, NA, NA, NA), df = c(129, 129, 93, 129, 93, NA, NA, NA, NA
), statistic = c(14.399613891155, -0.388918534799542, -0.0590927890817121, 
0.894644021080163, -0.989613736384761, NA, NA, NA, NA), p.value = c(1.27025495304043e-28, 
0.697977615162918, 0.95300499469296, 0.372643612464504, 0.324930741925854, 
NA, NA, NA, NA)), row.names = c(NA, -9L), class = c("tbl_df", 
"tbl", "data.frame")), ppard = structure(list(effect = c("fixed", 
"fixed", "fixed", "fixed", "fixed", "ran_pars", "ran_pars", "ran_pars", 
"ran_pars"), group = c(NA, NA, NA, NA, NA, "id", "id", "id", 
"Residual"), term = c("(Intercept)", "edad0", "time", "grup_int", 
"time:grup_int", "sd_(Intercept)", "cor_time.(Intercept)", "sd_time", 
"sd_Observation"), estimate = c(5.18359198577722, 0.00228679471444601, 
-0.135473971604774, 0.0593901933594388, -0.00917122634564429, 
0.615877528429973, -0.768824349334309, 0.260993158259707, 0.312749339804677
), std.error = c(0.43022445547674, 0.00587133152604662, 0.0733591723862023, 
0.0798423671078272, 0.0340855128294131, NA, NA, NA, NA), df = c(148, 
148, 138, 148, 138, NA, NA, NA, NA), statistic = c(12.0485758533489, 
0.389484856084391, -1.84672164636163, 0.743843093720308, -0.269065229898206, 
NA, NA, NA, NA), p.value = c(9.96588328228921e-24, 0.697477383279218, 
0.066929504853735, 0.45815070193322, 0.788281346016441, NA, NA, 
NA, NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", 
"data.frame")), pparg = structure(list(effect = c("fixed", "fixed", 
"fixed", "fixed", "fixed", "ran_pars", "ran_pars", "ran_pars", 
"ran_pars"), group = c(NA, NA, NA, NA, NA, "id", "id", "id", 
"Residual"), term = c("(Intercept)", "edad0", "time", "grup_int", 
"time:grup_int", "sd_(Intercept)", "cor_time.(Intercept)", "sd_time", 
"sd_Observation"), estimate = c(11.7849185400239, 0.00500482392946355, 
-0.0287950497053358, 0.0531237373210473, -0.0307355823546849, 
0.850075520195338, -0.547821112109215, 0.258949971203219, 0.376723078311319
), std.error = c(0.694859635848458, 0.00979221254717485, 0.0803747844308631, 
0.10509839899698, 0.0374101925206616, NA, NA, NA, NA), df = c(148, 
148, 136, 148, 136, NA, NA, NA, NA), statistic = c(16.9601426418069, 
0.511102460792428, -0.358259744137849, 0.505466665791681, -0.821583111012051, 
NA, NA, NA, NA), p.value = c(1.61500691299714e-36, 0.610040924262626, 
0.720704418237478, 0.613983052245881, 0.41275215624961, NA, NA, 
NA, NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", 
"data.frame")))

CodePudding user response:

Probably the easiest way (if all your tables have the same columns) is to first join all the tibbles and then select the columns you want to keep. If you use dplyr::bind_rows() it automatically generates a column with your list names. So this:

sample <- dplyr::bind_rows(sample, .id = "var")
subsample <- dplyr::select(sample, var, term, p.value)

Should give you the desired results

CodePudding user response:

Here is another simple approach:

f <- function(d,cols = c("time", "grup_int")) filter(d,term %in% cols) %>% select(term,p.value)

map(sample, f) %>% bind_rows(.id = "Var")

Output:

  Var   term     p.value
  <chr> <chr>      <dbl>
1 ppara time      0.953 
2 ppara grup_int  0.373 
3 ppard time      0.0669
4 ppard grup_int  0.458 
5 pparg time      0.721 
6 pparg grup_int  0.614
  • Related