Home > Software design >  R: Display all responses of the same type for mulitple variables in one data frame
R: Display all responses of the same type for mulitple variables in one data frame

Time:11-04

I have a datafile that looks something like this.

id <- c(1001, 1002, 1003, 1004, 1005, 1006)
var1 <- c(1, 0, 1, 0, 1, 1)
var2 <- c(1, 1, 1, 1, 1, 0)
var3 <- c(0, 0, 1, 1, 1, 0)
file <- data.frame (id, var1, var2, var3)
file

We have several different variables that are all scored in the same way, in this case 0 and 1. I want to count up all the responses of 1 and percentages for each variable and export that as a dataframe.

Here is what I want the final product to look like

variable response count percent
var1     1        4    66.67
var2     1        5    83.33
var3     1        3    50.00

I could generate table and probability tables for each individual variable and manually copy over the information by hand like this.

table (file$var1, exclude = FALSE)
table (file$var2, exclude = FALSE)
table (file$var3, exclude = FALSE)

Unfortunately, I have a lot of variables that I need to count up this way so this would not work. Is there a function or a way to set up a for loop.

CodePudding user response:

We could loop over the 'var' column names, get the table, and add the proportions, subset and rbind in base R

out <- do.call(rbind, lapply(grep("var", names(file), value = TRUE), 
  function(nm) cbind(variable = nm, 
     subset(transform(as.data.frame(table(file[[nm]])), 
      percent = proportions(Freq)), Var1 == 1))))
row.names(out) <- NULL
names(out)[2:3] <- c('response', 'count')

-output

> out
  variable response count   percent
1     var1        1     4 0.6666667
2     var2        1     5 0.8333333
3     var3        1     3 0.5000000

Or in tidyverse

library(dplyr)
library(tidyr)
file %>% 
  select(-id) %>%
  pivot_longer(everything(), names_to = 'variable',
   values_to = 'response') %>% 
 count(variable, response, name = 'count') %>%  
 group_by(variable) %>% 
 mutate(percent = proportions(count)) %>% 
 ungroup %>%
 filter(response == 1)

-output

# A tibble: 3 × 4
  variable response count percent
  <chr>       <dbl> <int>   <dbl>
1 var1            1     4   0.667
2 var2            1     5   0.833
3 var3            1     3   0.5  

CodePudding user response:

What you want is colSums and rowSums of your variables.

sapply(c(colSums, colMeans), \(f) f(file[-1]))
#      [,1]      [,2]
# var1    4 0.6666667
# var2    5 0.8333333
# var3    3 0.5000000

Or slightly refined

v <- c('var1', 'var2', 'var3')
data.frame(variable=v, 
           mapply(\(f, z) f(file[v]*z), list(count=colSums, percent=colMeans), c(1, 100)),
           row.names=NULL)
#   variable count  percent
# 1     var1     4 66.66667
# 2     var2     5 83.33333
# 3     var3     3 50.00000

Data:

file <- structure(list(id = c(1001, 1002, 1003, 1004, 1005, 1006), var1 = c(1, 
0, 1, 0, 1, 1), var2 = c(1, 1, 1, 1, 1, 0), var3 = c(0, 0, 1, 
1, 1, 0)), class = "data.frame", row.names = c(NA, -6L))

CodePudding user response:

Another tidyverse solution:

library(tidyverse)

summarise(file, across(contains("var"), sum), response = 1) |>
  pivot_longer(-response, names_to = "variable", values_to = "count") |>
  mutate(percent = (count/nrow(file))*100) 
#> # A tibble: 3 x 4
#>   response variable count percent
#>      <dbl> <chr>    <dbl>   <dbl>
#> 1        1 var1         4    66.7
#> 2        1 var2         5    83.3
#> 3        1 var3         3    50
  •  Tags:  
  • r
  • Related