let's say I have the following data:
A <- rep(c("A1", "A2", "A3"), 10)
B <- sample(1:5, size=30, replace=TRUE)
C <- sample(c("yes", "no"), size=30, replace=TRUE)
I want to create a table of all three vars, where "A" defines the colums and B and C define the rows.
In rbase, I would do it like this:
data.frame(rbind(table(B, A),
table(C, A)))
This would give the following output:
A1 A2 A3
1 0 3 3
2 2 1 0
3 2 1 2
4 2 4 3
5 4 1 2
no 5 7 3
yes 5 3 7
Variable "A" defines the columns, and variables "B" and "C" define the rows.
My questions are:
How would I do this in tidyverse with dplyr, group_by, summarize?
Is it possible to add the proportions to every cell, so that it would look like this:
A1 A2 A3
1 0 (0%) 3 (1%) 3 (10%)
2 2 (1%) 1 (0,5) 0 (0%)
3 2 (1%) 1 (0,5) 2 (5%)
4 2 (2%) 4 (2%) 3 (3%)
5 4 (6%) 1 (1%) 2 (2%)
no 5 (7%) 7 (4%) 3 (3%)
yes 5 (3%) 3 (2%) 7 (10%)
Thx in advance....
CodePudding user response:
A <- rep(c("A1", "A2", "A3"), 10)
B <- sample(1:5, size=30, replace=TRUE)
C <- sample(c("yes", "no"), size=30, replace=TRUE)
total_n <- 30
library(tidyverse)
tibble(A,B = as.character(B),C) %>%
pivot_longer(c(B,C), names_to = "original_col", values_to = "label") %>%
count(A, label, name = "n") %>%
complete(A, label, fill = list(n=0)) %>%
transmute(A, label, val = sprintf("%s (%s%%)", n, formatC(100* n/total_n, digits = 2))) %>%
pivot_wider(names_from = A, values_from = val)
#> # A tibble: 7 × 4
#> label A1 A2 A3
#> <chr> <chr> <chr> <chr>
#> 1 1 0 ( 0%) 2 (6.7%) 2 (6.7%)
#> 2 2 3 ( 10%) 2 (6.7%) 2 (6.7%)
#> 3 3 4 ( 13%) 1 (3.3%) 2 (6.7%)
#> 4 4 3 ( 10%) 1 (3.3%) 4 ( 13%)
#> 5 5 0 ( 0%) 4 ( 13%) 0 ( 0%)
#> 6 no 2 (6.7%) 4 ( 13%) 5 ( 17%)
#> 7 yes 8 ( 27%) 6 ( 20%) 5 ( 17%)
Created on 2022-09-15 by the reprex package (v2.0.1)
CodePudding user response:
You can use dplyr::across()
and loop over each column:
set.seed(123)
A <- rep(c("A1", "A2", "A3"), 10)
B <- sample(1:5, size=30, replace=TRUE)
C <- sample(c("yes", "no"), size=30, replace=TRUE)
dat <- data.frame(rbind(table(B, A),
table(C, A)))
library(dplyr)
dat %>%
transmute(across(starts_with("A"),
~ paste0(.x, " (", round(.x / sum(dat), 2) * 100, "%)")))
#> A1 A2 A3
#> 1 3 (5%) 3 (5%) 0 (0%)
#> 2 1 (2%) 2 (3%) 3 (5%)
#> 3 4 (7%) 4 (7%) 1 (2%)
#> 4 2 (3%) 0 (0%) 2 (3%)
#> 5 0 (0%) 1 (2%) 4 (7%)
#> no 5 (8%) 0 (0%) 4 (7%)
#> yes 5 (8%) 10 (17%) 6 (10%)
Created on 2022-09-15 by the reprex package (v2.0.1)
CodePudding user response:
using janitor
set.seed(123)
library(magrittr)
library(janitor)
A <- rep(c("A1", "A2", "A3"), 10)
B <- sample(1:5, size=30, replace=TRUE)
C <- sample(c("yes", "no"), size=30, replace=TRUE)
dat <- data.frame(rbind(table(B, A),
table(C, A)))
dat %>%
adorn_totals("col") %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns(position = "front")
#> A1 A2 A3 Total
#> 3 3 (100.0%) 0 (0.0%) 3 (100.0%)
#> 1 2 (40.0%) 3 (60.0%) 5 (100.0%)
#> 4 4 (80.0%) 1 (20.0%) 5 (100.0%)
#> 2 0 (0.0%) 2 (100.0%) 2 (100.0%)
#> 0 1 (20.0%) 4 (80.0%) 5 (100.0%)
#> 5 0 (0.0%) 4 (100.0%) 4 (100.0%)
#> 5 10 (62.5%) 6 (37.5%) 16 (100.0%)
Created on 2022-09-15 with reprex v2.0.2