I am using a 'user function' to generate the frequencies for each type of 'apply-all' type question.
This is my user function:
# adapted from: https://stackoverflow.com/questions/9265003/analysis-of-multiple-response
multfreqtable = function(data, question.prefix) {
z = length(question.prefix)
temp = vector("list", z)
for (i in 1:z) {
a = grep(question.prefix[i], names(data)) # Find the columns with the questions
b = sum(data[, a] != 0, na.rm = T) # Find the total number of responses
d = colSums(data[, a] != 0, na.rm = T) # Find the totals for each question
e = sum(rowSums(data[, a]) != 0, na.rm = T) # Find the number of respondents
f = as.numeric(c(d, b)) # d b as a vector. This is your overfall frequency
temp[[i]] = data.frame(
question = c(sub(question.prefix[i],
"", names(d)), "Total"),
freq = f,
percent = (f / b) * 100,
percentofcases = (f / e) * 100,
denom = e)
names(temp)[i] = question.prefix[i]
}
temp
}
A subset of my data (my actual data contains MANY questions)
library(readr)
library(dplyr)
library(magrittr)
data <- read_table('q3_1_both q3_2_both q3_3_both q3_4_c4 q3_5_c4 q3_6_both q3_7_both q3_8_both q3_9_both q3_10_c4 q3_11_c4 q3_13_bothnew q3_14_c3 q3_15_c3
0 0 1 NA NA 0 0 1 1 NA NA 1 1 1
0 0 0 NA NA 0 0 1 1 NA NA 1 0 1
0 0 1 NA NA 1 1 1 0 NA NA 1 1 1
0 1 0 0 1 0 0 0 0 0 0 1 NA NA
0 0 1 1 1 0 0 0 0 1 0 1 NA NA
0 0 1 1 1 0 1 1 0 0 0 1 NA NA
1 1 1 0 1 0 1 1 0 0 1 1 NA NA
1 1 1 1 1 0 1 1 1 1 1 1 NA NA
0 0 1 1 1 1 0 0 0 1 0 1 NA NA
0 0 1 0 0 0 0 0 0 0 0 0 NA NA
1 1 0 1 1 1 1 1 0 0 0 1 NA NA
') %>% mutate(across(matches('q'),as.numeric)) %>% as_tibble(.)
Calling the user function on specific questions based on regex match:
str(data)
out_list <- multfreqtable(data, c("q3_[0-9]*_c3","q3_[0-9]*_c4","q3_[0-9]*_.*both$"))
out_list
do.call(rbind.data.frame, out_list)
This works well, except that the column names do not show in the output:
I would like to see the actual question name in a new column. I tried playing around with the user function to add the name, but it kept throwing errors---any suggestion?
This is what I want:
CodePudding user response:
In your function you could add another column containing the variable names:
multfreqtable = function(data, question.prefix) {
z = length(question.prefix)
temp = vector("list", z)
for (i in 1:z) {
a = grep(question.prefix[i], names(data)) # Find the columns with the questions
b = sum(data[, a] != 0, na.rm = T) # Find the total number of responses
d = colSums(data[, a] != 0, na.rm = T) # Find the totals for each question
e = sum(rowSums(data[, a]) != 0, na.rm = T) # Find the number of respondents
f = as.numeric(c(d, b)) # d b as a vector. This is your overfall frequency
temp[[i]] = data.frame(
question = c(sub(question.prefix[i],
"", names(d)), "Total"),
col_name = c(names(data)[a], ""),
freq = f,
percent = (f / b) * 100,
percentofcases = (f / e) * 100,
denom = e)
names(temp)[i] = question.prefix[i]
}
temp
}
library(dplyr)
library(magrittr)
out_list <- multfreqtable(data, c("q3_[0-9]*_c3","q3_[0-9]*_c4","q3_[0-9]*_.*both$"))
do.call(rbind.data.frame, out_list)
#> question col_name freq percent percentofcases denom
#> q3_[0-9]*_c3.1 q3_14_c3 2 40.000000 66.66667 3
#> q3_[0-9]*_c3.2 q3_15_c3 3 60.000000 100.00000 3
#> q3_[0-9]*_c3.3 Total 5 100.000000 166.66667 3
#> q3_[0-9]*_c4.1 q3_4_c4 5 29.411765 71.42857 7
#> q3_[0-9]*_c4.2 q3_5_c4 7 41.176471 100.00000 7
#> q3_[0-9]*_c4.3 q3_10_c4 3 17.647059 42.85714 7
#> q3_[0-9]*_c4.4 q3_11_c4 2 11.764706 28.57143 7
#> q3_[0-9]*_c4.5 Total 17 100.000000 242.85714 7
#> q3_[0-9]*_.*both$.1 q3_1_both 3 9.090909 27.27273 11
#> q3_[0-9]*_.*both$.2 q3_2_both 4 12.121212 36.36364 11
#> q3_[0-9]*_.*both$.3 q3_3_both 8 24.242424 72.72727 11
#> q3_[0-9]*_.*both$.4 q3_6_both 3 9.090909 27.27273 11
#> q3_[0-9]*_.*both$.5 q3_7_both 5 15.151515 45.45455 11
#> q3_[0-9]*_.*both$.6 q3_8_both 7 21.212121 63.63636 11
#> q3_[0-9]*_.*both$.7 q3_9_both 3 9.090909 27.27273 11
#> q3_[0-9]*_.*both$.8 Total 33 100.000000 300.00000 11