I have a list of tibbles (too many to do it manually) which are made up of data I need to tabulate. I want to get the p-values, but let say I want to get multiple columns and multiple rows from sthg like this:
sample$ppara
# A tibble: 9 × 8
effect group term estimate std.error df statistic p.value
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 fixed NA (Intercept) 8.23 0.572 129 14.4 1.27e-28
2 fixed NA edad0 -0.00300 0.00771 129 -0.389 6.98e- 1
3 fixed NA time -0.00579 0.0980 93 -0.0591 9.53e- 1
4 fixed NA grup_int 0.104 0.116 129 0.895 3.73e- 1
5 fixed NA time:grup_int -0.0453 0.0457 93 -0.990 3.25e- 1
6 ran_pars id sd_(Intercept) 0.819 NA NA NA NA
7 ran_pars id cor_time.(Inte… -0.827 NA NA NA NA
8 ran_pars id sd_time 0.309 NA NA NA NA
9 ran_pars Residual sd_Observation 0.356 NA NA NA NA
Specifically I want to retain the name of variable (ppara) when tabulating results
I am looking for 2 approaches: old base syntax and/or apply and the purrr::map functions
#1
out <- map2_df(.x = sample,
.y = names(sample),
.f = ~ {
temp <- sample[[.]]$p.value
})
#2
out <- map2_df(.x = sample,
.y = names(sample),
.f = ~ {
temp <- sample[[.x]]$p.value
})
sapply(sample, function(x) x[5,8])
Desired output
Var term p.value
ppara time 0.048
ppara grup_int 0.7
pparg time 0.03
pparg grup_int 0.9
lrp1 time 0.048
lrp1 grup_int 0.7
The database
sample <-
list(ppara = structure(list(effect = c("fixed", "fixed", "fixed",
"fixed", "fixed", "ran_pars", "ran_pars", "ran_pars", "ran_pars"
), group = c(NA, NA, NA, NA, NA, "id", "id", "id", "Residual"
), term = c("(Intercept)", "edad0", "time", "grup_int", "time:grup_int",
"sd_(Intercept)", "cor_time.(Intercept)", "sd_time", "sd_Observation"
), estimate = c(8.23144341495959, -0.00299788328827405, -0.00578981152447268,
0.103780244730389, -0.0452613750584519, 0.818909993116146, -0.827336729479646,
0.30909890538813, 0.355517251103403), std.error = c(0.571643342465992,
0.00770825512293783, 0.0979783085964459, 0.116001719438183, 0.0457364054219781,
NA, NA, NA, NA), df = c(129, 129, 93, 129, 93, NA, NA, NA, NA
), statistic = c(14.399613891155, -0.388918534799542, -0.0590927890817121,
0.894644021080163, -0.989613736384761, NA, NA, NA, NA), p.value = c(1.27025495304043e-28,
0.697977615162918, 0.95300499469296, 0.372643612464504, 0.324930741925854,
NA, NA, NA, NA)), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame")), ppard = structure(list(effect = c("fixed",
"fixed", "fixed", "fixed", "fixed", "ran_pars", "ran_pars", "ran_pars",
"ran_pars"), group = c(NA, NA, NA, NA, NA, "id", "id", "id",
"Residual"), term = c("(Intercept)", "edad0", "time", "grup_int",
"time:grup_int", "sd_(Intercept)", "cor_time.(Intercept)", "sd_time",
"sd_Observation"), estimate = c(5.18359198577722, 0.00228679471444601,
-0.135473971604774, 0.0593901933594388, -0.00917122634564429,
0.615877528429973, -0.768824349334309, 0.260993158259707, 0.312749339804677
), std.error = c(0.43022445547674, 0.00587133152604662, 0.0733591723862023,
0.0798423671078272, 0.0340855128294131, NA, NA, NA, NA), df = c(148,
148, 138, 148, 138, NA, NA, NA, NA), statistic = c(12.0485758533489,
0.389484856084391, -1.84672164636163, 0.743843093720308, -0.269065229898206,
NA, NA, NA, NA), p.value = c(9.96588328228921e-24, 0.697477383279218,
0.066929504853735, 0.45815070193322, 0.788281346016441, NA, NA,
NA, NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl",
"data.frame")), pparg = structure(list(effect = c("fixed", "fixed",
"fixed", "fixed", "fixed", "ran_pars", "ran_pars", "ran_pars",
"ran_pars"), group = c(NA, NA, NA, NA, NA, "id", "id", "id",
"Residual"), term = c("(Intercept)", "edad0", "time", "grup_int",
"time:grup_int", "sd_(Intercept)", "cor_time.(Intercept)", "sd_time",
"sd_Observation"), estimate = c(11.7849185400239, 0.00500482392946355,
-0.0287950497053358, 0.0531237373210473, -0.0307355823546849,
0.850075520195338, -0.547821112109215, 0.258949971203219, 0.376723078311319
), std.error = c(0.694859635848458, 0.00979221254717485, 0.0803747844308631,
0.10509839899698, 0.0374101925206616, NA, NA, NA, NA), df = c(148,
148, 136, 148, 136, NA, NA, NA, NA), statistic = c(16.9601426418069,
0.511102460792428, -0.358259744137849, 0.505466665791681, -0.821583111012051,
NA, NA, NA, NA), p.value = c(1.61500691299714e-36, 0.610040924262626,
0.720704418237478, 0.613983052245881, 0.41275215624961, NA, NA,
NA, NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl",
"data.frame")))
CodePudding user response:
Probably the easiest way (if all your tables have the same columns) is to first join all the tibbles and then select the columns you want to keep. If you use dplyr::bind_rows()
it automatically generates a column with your list names. So this:
sample <- dplyr::bind_rows(sample, .id = "var")
subsample <- dplyr::select(sample, var, term, p.value)
Should give you the desired results
CodePudding user response:
Here is another simple approach:
f <- function(d,cols = c("time", "grup_int")) filter(d,term %in% cols) %>% select(term,p.value)
map(sample, f) %>% bind_rows(.id = "Var")
Output:
Var term p.value
<chr> <chr> <dbl>
1 ppara time 0.953
2 ppara grup_int 0.373
3 ppard time 0.0669
4 ppard grup_int 0.458
5 pparg time 0.721
6 pparg grup_int 0.614