I am looking to hide values from the output table if the frequency of data in the respective variable is less than 4 .
lets say if the number of records in column hp, mpg, qsec is less than 4 than the mean or median should be masked with "--"
i am trying like below but not working.
In this case if valid_n is less than 4 than mean , median should be shown as "--"
library(expss)
data <- data.frame(
gender = c(1, 2, 1),
sector = c(3, 3, 1),
col1 = c(12, 15, 22),
col2 = c(33, NA, 41),
col3 = c(1, 1, 0),
col4 = c(1, 0, 0),
col5 = c(1, 2, 1)
)
data$col3 <- factor(data$col3, levels = 1, labels = "Management")
data$col4 <- factor(data$col4, levels = 1, labels = "HR")
lst <- list(data$col4,data$col3)
mean_mask =3
median_mask =3
fun1 <- function(dataset,var_list,banner1){
perc_25 <- function(x, ...){unname(quantile(x, .25, na.rm=TRUE))}
perc_75 <- function(x, ...){unname(quantile(x, .75, na.rm=TRUE))}
mask_m<-function(x,N){
x= ifelse(N<mean_mask,"--",x)
}
mask_me<-function(x,N){
x= ifelse(N<median_mask,"--",x)
}
dataset<-dataset[var_list] %>% as.data.frame()
first_col_param <- head(var_list,1)
second_col_param <- tail(var_list,1)
var_lab(colnames(dataset)[ncol(dataset)]) <- ""
mr <- parse(text=paste0("mrset(",
first_col_param ," %to% ",second_col_param,")"))
t1<- cross_fun(dataset,
eval(mr),
col_vars = banner1,
fun = combine_functions("Mean" = mask_m(mean,5),
"Median" = mask_me(median,5),
"Max"= max,
"Min"=min,
"25th Perc" = perc_25,
"75th Perc" = perc_75,
"Valid N" = valid_n
))
t1
}
mask = 5
t1 <- fun1(dataset=data,"col1",banner1=lst)
the required output should be look like below
CodePudding user response:
Perhaps this helps
fun1 <- function(dataset,var_list,banner1){
perc_25 <- function(x, ...){unname(quantile(x, .25, na.rm=TRUE))}
perc_75 <- function(x, ...){unname(quantile(x, .75, na.rm=TRUE))}
dataset<-dataset[var_list] %>% as.data.frame()
first_col_param <- head(var_list,1)
second_col_param <- tail(var_list,1)
var_lab(colnames(dataset)[ncol(dataset)]) <- ""
mr <- parse(text=paste0("mrset(",
first_col_param ," %to% ",second_col_param,")"))
fun_replace_valid_n <- function(x, n) {
dat <- cur_data_all()
func_name <- dat$func_name
if(x[func_name == "Valid N"] < n) {
replace(x, func_name %in% c("Mean", "Median"), "--")
} else x
}
t1<- cross_fun(dataset,
eval(mr),
col_vars = banner1,
fun = combine_functions("Mean" = mean,
"Median" = median,
"Max"= max,
"Min"=min,
"25th Perc" = perc_25,
"75th Perc" = perc_75,
"Valid N" = valid_n
))
t1 <- as.data.frame(t1) %>%
tidyr::separate(row_labels, into = c('grp', 'func_name'), sep = "\\|") %>%
dplyr::group_by(grp) %>%
dplyr::mutate(across(where(is.numeric), fun_replace_valid_n, n = 4)) %>%
dplyr::ungroup() %>%
tidyr::unite(row_labels, grp, func_name, sep = "|") %>%
as.etable
t1
}
-testing
> fun1(dataset=data,"col1",banner1=lst)
| | | HR | Management |
| ---- | --------- | -- | ---------- |
| col1 | Mean | -- | -- |
| | Median | -- | -- |
| | Max | 12 | 15 |
| | Min | 12 | 12 |
| | 25th Perc | 12 | 12.75 |
| | 75th Perc | 12 | 14.25 |
| | Valid N | 1 | 2 |