I was trying to do some contingency tables but I don't know how to pull out the column names when I use mapply function.
This is an example of the data:
set.seed(123) ## for sake of reproducibility
n <- 10
dat <- data.frame(balance=factor(paste("DM", 1:n)),
credit_history=sample(c("repaid", "critical"), 10, replace = TRUE),
purpose=sample(c("yes", "no"), 10, replace = TRUE),
employment_rate=sample(c("0-1 yrs", "1-4 yrs", ">4 yrs"), 10, replace = TRUE),
personal_status=sample(c("married", "single"), 10, replace=TRUE),
other_debtors=sample(c("guarantor", "none"), 10, replace= TRUE),
default=sample(c("yes", "no"), 10, replace = TRUE))
dt1 <- dat[ , c(1:6)]
dt2 <- dat[ , "default"]
mapply(function(x, y) table(x, y), dt1, MoreArgs=list(dt2))
The code above just show "x" and "y" and I would like to display e.g. "default" and "balance". Can someone give me some advice?
CodePudding user response:
1) Use the dnn argument of table:
f <- function(...) table(..., dnn = c("balance", "default"))
mapply(f, dt1, MoreArgs=list(dt2))
giving:
$balance
default
balance no yes
DM 1 0 1
DM 10 1 0
DM 2 0 1
DM 3 0 1
DM 4 1 0
DM 5 1 0
DM 6 0 1
DM 7 1 0
DM 8 0 1
DM 9 0 1
$credit_history
default
balance no yes
critical 2 2
repaid 2 4
$purpose
default
balance no yes
no 2 3
yes 2 3
$employment_rate
default
balance no yes
>4 yrs 2 0
0-1 yrs 1 4
1-4 yrs 1 2
$personal_status
default
balance no yes
married 3 3
single 1 3
$other_debtors
default
balance no yes
guarantor 4 3
none 0 3
2) or maybe you wanted this:
f <- function(x, y, z) table(x, z, dnn = c(y, "default"))
mapply(f, dt1, names(dt1), MoreArgs=list(dt2))
giving:
$balance
default
balance no yes
DM 1 0 1
DM 10 1 0
DM 2 0 1
DM 3 0 1
DM 4 1 0
DM 5 1 0
DM 6 0 1
DM 7 1 0
DM 8 0 1
DM 9 0 1
$credit_history
default
credit_history no yes
critical 2 2
repaid 2 4
$purpose
default
purpose no yes
no 2 3
yes 2 3
$employment_rate
default
employment_rate no yes
>4 yrs 2 0
0-1 yrs 1 4
1-4 yrs 1 2
$personal_status
default
personal_status no yes
married 3 3
single 1 3
$other_debtors
default
other_debtors no yes
guarantor 4 3
none 0 3
3) This output as a table appears as Table 1 in many papers and there are many R packages for producing such tables. A long list of such packages can be found here: https://github.com/kaz-yos/tableone#similar-or-complementary-projects . We show the use of one of them, the tableone package, below but try all of those listed if desired.
library(tableone)
dat2 <- transform(dat, balance = factor(as.character(balance),
levels = paste("DM", 1:10)))
tab1 <- CreateTableOne(strata = "default", data = dat2)
print(tab1, showAllLevels = TRUE)
giving:
Stratified by default
level no yes p test
n 4 6
balance (%) DM 1 0 ( 0.0) 1 (16.7) 0.350
DM 2 0 ( 0.0) 1 (16.7)
DM 3 0 ( 0.0) 1 (16.7)
DM 4 1 ( 25.0) 0 ( 0.0)
DM 5 1 ( 25.0) 0 ( 0.0)
DM 6 0 ( 0.0) 1 (16.7)
DM 7 1 ( 25.0) 0 ( 0.0)
DM 8 0 ( 0.0) 1 (16.7)
DM 9 0 ( 0.0) 1 (16.7)
DM 10 1 ( 25.0) 0 ( 0.0)
credit_history (%) critical 2 ( 50.0) 2 (33.3) 1.000
repaid 2 ( 50.0) 4 (66.7)
purpose (%) no 2 ( 50.0) 3 (50.0) 1.000
yes 2 ( 50.0) 3 (50.0)
employment_rate (%) >4 yrs 2 ( 50.0) 0 ( 0.0) 0.143
0-1 yrs 1 ( 25.0) 4 (66.7)
1-4 yrs 1 ( 25.0) 2 (33.3)
personal_status (%) married 3 ( 75.0) 3 (50.0) 0.895
single 1 ( 25.0) 3 (50.0)
other_debtors (%) guarantor 4 (100.0) 3 (50.0) 0.324
none 0 ( 0.0) 3 (50.0)
CodePudding user response:
This should work:
set.seed(123) ## for sake of reproducibility
n <- 10
dat <- data.frame(balance=factor(paste("DM", 1:n)),
credit_history=sample(c("repaid", "critical"), 10, replace = TRUE),
purpose=sample(c("yes", "no"), 10, replace = TRUE),
employment_rate=sample(c("0-1 yrs", "1-4 yrs", ">4 yrs"), 10, replace = TRUE),
personal_status=sample(c("married", "single"), 10, replace=TRUE),
other_debtors=sample(c("guarantor", "none"), 10, replace= TRUE),
default=sample(c("yes", "no"), 10, replace = TRUE))
dt1 <- dat[ , c(1:6)]
dt2 <- dat[ , "default"]
mapply(function(x, y, z) {table(balance=x, default=y)}, dt1, MoreArgs=list(dt2))
#> $balance
#> default
#> balance no yes
#> DM 1 0 1
#> DM 10 1 0
#> DM 2 0 1
#> DM 3 0 1
#> DM 4 1 0
#> DM 5 1 0
#> DM 6 0 1
#> DM 7 1 0
#> DM 8 0 1
#> DM 9 0 1
#>
#> $credit_history
#> default
#> balance no yes
#> critical 2 2
#> repaid 2 4
#>
#> $purpose
#> default
#> balance no yes
#> no 2 3
#> yes 2 3
#>
#> $employment_rate
#> default
#> balance no yes
#> >4 yrs 2 0
#> 0-1 yrs 1 4
#> 1-4 yrs 1 2
#>
#> $personal_status
#> default
#> balance no yes
#> married 3 3
#> single 1 3
#>
#> $other_debtors
#> default
#> balance no yes
#> guarantor 4 3
#> none 0 3
mapply(function(x, y, z) {table(x, y, dnn=c(z,'default'))}, dt1,z=names(dt1), MoreArgs=list(dt2))
#> $balance
#> default
#> balance no yes
#> DM 1 0 1
#> DM 10 1 0
#> DM 2 0 1
#> DM 3 0 1
#> DM 4 1 0
#> DM 5 1 0
#> DM 6 0 1
#> DM 7 1 0
#> DM 8 0 1
#> DM 9 0 1
#>
#> $credit_history
#> default
#> credit_history no yes
#> critical 2 2
#> repaid 2 4
#>
#> $purpose
#> default
#> purpose no yes
#> no 2 3
#> yes 2 3
#>
#> $employment_rate
#> default
#> employment_rate no yes
#> >4 yrs 2 0
#> 0-1 yrs 1 4
#> 1-4 yrs 1 2
#>
#> $personal_status
#> default
#> personal_status no yes
#> married 3 3
#> single 1 3
#>
#> $other_debtors
#> default
#> other_debtors no yes
#> guarantor 4 3
#> none 0 3
Created on 2021-12-06 by the reprex package (v2.0.1)