Home > OS >  Adding column name to user function
Adding column name to user function

Time:02-23

I am using a 'user function' to generate the frequencies for each type of 'apply-all' type question.

This is my user function:

# adapted from: https://stackoverflow.com/questions/9265003/analysis-of-multiple-response
multfreqtable = function(data, question.prefix) {
  z = length(question.prefix)
  temp = vector("list", z)
  
  for (i in 1:z) {
    a = grep(question.prefix[i], names(data))   # Find the columns with the questions
    b = sum(data[, a] != 0, na.rm = T)   # Find the total number of responses
    d = colSums(data[, a] != 0, na.rm = T)   # Find the totals for each question
    e = sum(rowSums(data[, a]) != 0, na.rm = T)   # Find the number of respondents
    f = as.numeric(c(d, b))   # d   b as a vector. This is your overfall frequency
    temp[[i]] = data.frame(
      question = c(sub(question.prefix[i],
                       "", names(d)), "Total"),
      freq = f,
      percent = (f / b) * 100,
      percentofcases = (f / e) * 100,
      denom = e)
    names(temp)[i] = question.prefix[i]
  }
  temp
}

A subset of my data (my actual data contains MANY questions)

library(readr)
library(dplyr)
library(magrittr)
data <- read_table('q3_1_both   q3_2_both   q3_3_both   q3_4_c4 q3_5_c4 q3_6_both   q3_7_both   q3_8_both   q3_9_both   q3_10_c4    q3_11_c4    q3_13_bothnew   q3_14_c3    q3_15_c3
0   0   1   NA  NA  0   0   1   1   NA  NA  1   1   1
0   0   0   NA  NA  0   0   1   1   NA  NA  1   0   1
0   0   1   NA  NA  1   1   1   0   NA  NA  1   1   1
0   1   0   0   1   0   0   0   0   0   0   1   NA  NA
0   0   1   1   1   0   0   0   0   1   0   1   NA  NA
0   0   1   1   1   0   1   1   0   0   0   1   NA  NA
1   1   1   0   1   0   1   1   0   0   1   1   NA  NA
1   1   1   1   1   0   1   1   1   1   1   1   NA  NA
0   0   1   1   1   1   0   0   0   1   0   1   NA  NA
0   0   1   0   0   0   0   0   0   0   0   0   NA  NA
1   1   0   1   1   1   1   1   0   0   0   1   NA  NA
') %>% mutate(across(matches('q'),as.numeric)) %>% as_tibble(.)

Calling the user function on specific questions based on regex match:

str(data)
out_list <- multfreqtable(data, c("q3_[0-9]*_c3","q3_[0-9]*_c4","q3_[0-9]*_.*both$"))
out_list
do.call(rbind.data.frame, out_list)  

This works well, except that the column names do not show in the output:

enter image description here

I would like to see the actual question name in a new column. I tried playing around with the user function to add the name, but it kept throwing errors---any suggestion?

This is what I want:

enter image description here

CodePudding user response:

In your function you could add another column containing the variable names:

multfreqtable = function(data, question.prefix) {
  z = length(question.prefix)
  temp = vector("list", z)
  
  for (i in 1:z) {
    a = grep(question.prefix[i], names(data))   # Find the columns with the questions
    b = sum(data[, a] != 0, na.rm = T)   # Find the total number of responses
    d = colSums(data[, a] != 0, na.rm = T)   # Find the totals for each question
    e = sum(rowSums(data[, a]) != 0, na.rm = T)   # Find the number of respondents
    f = as.numeric(c(d, b))   # d   b as a vector. This is your overfall frequency
    
    temp[[i]] = data.frame(
      question = c(sub(question.prefix[i],
                       "", names(d)), "Total"),
      col_name = c(names(data)[a], ""),
      freq = f,
      percent = (f / b) * 100,
      percentofcases = (f / e) * 100,
      denom = e)
    names(temp)[i] = question.prefix[i]
  }
  temp
}

library(dplyr)
library(magrittr)

out_list <- multfreqtable(data, c("q3_[0-9]*_c3","q3_[0-9]*_c4","q3_[0-9]*_.*both$"))

do.call(rbind.data.frame, out_list)  
#>                     question  col_name freq    percent percentofcases denom
#> q3_[0-9]*_c3.1                q3_14_c3    2  40.000000       66.66667     3
#> q3_[0-9]*_c3.2                q3_15_c3    3  60.000000      100.00000     3
#> q3_[0-9]*_c3.3         Total              5 100.000000      166.66667     3
#> q3_[0-9]*_c4.1                 q3_4_c4    5  29.411765       71.42857     7
#> q3_[0-9]*_c4.2                 q3_5_c4    7  41.176471      100.00000     7
#> q3_[0-9]*_c4.3                q3_10_c4    3  17.647059       42.85714     7
#> q3_[0-9]*_c4.4                q3_11_c4    2  11.764706       28.57143     7
#> q3_[0-9]*_c4.5         Total             17 100.000000      242.85714     7
#> q3_[0-9]*_.*both$.1          q3_1_both    3   9.090909       27.27273    11
#> q3_[0-9]*_.*both$.2          q3_2_both    4  12.121212       36.36364    11
#> q3_[0-9]*_.*both$.3          q3_3_both    8  24.242424       72.72727    11
#> q3_[0-9]*_.*both$.4          q3_6_both    3   9.090909       27.27273    11
#> q3_[0-9]*_.*both$.5          q3_7_both    5  15.151515       45.45455    11
#> q3_[0-9]*_.*both$.6          q3_8_both    7  21.212121       63.63636    11
#> q3_[0-9]*_.*both$.7          q3_9_both    3   9.090909       27.27273    11
#> q3_[0-9]*_.*both$.8    Total             33 100.000000      300.00000    11
  • Related