combine cells into one in a group cycle-CodePudding

My data.frame

 data <- structure(list(col1 = c(125L, 654L, 896L, 154L, 865L, 148L), 
        col2 = c(489L, 657L, 198L, 269L, 789L, 456L), col3 = c(741L, 
        852L, 963L, 987L, 951L, 632L), col4 = c(124L, 785L, 874L, 
        965L, 563L, 145L), col5 = c(963L, 146L, 259L, 367L, 365L, 
        189L), col6 = c(741L, 777L, 100L, 200L, 956L, 452L), col7 = c(456L, 
        666L, 300L, 778L, 888L, 999L), col8 = c(254L, 732L, 400L, 
        500L, 600L, 700L), col9 = c(555L, 638L, 127L, 489L, 545L, 
        54L), col10 = c(921L, 549L, 111L, 222L, 354L, 355L), GROUP = c(1L, 
        2L, 3L, 1L, 2L, 3L)), class = "data.frame", row.names = c(NA, 
    -6L))

Function:

combination <- list(c(1,2),c(3,4),c(5,6))
wilcox.fun <- function(data, id_groups, combination){
  mark.list <- list()
  result_list <- list()
  for (g in id_groups){
    df = as.matrix(data[data$GROUP %in% g,])
    df <- df[,unique(unlist(combination))]
    med <- paste(apply(df, 2, median))
    result <- data.frame(matrix(NA,ncol=length(med) 2, nrow=1))
    result[1,] <- c(g, med, NA)
    for (k in 1:(length(combination))) {
      i <- combination[[k]][1]
      j <- combination[[k]][2]
      test <- wilcox.test(df[,i], df[,j],conf.int = TRUE)
      diff.1 <- -round(test$estimate, 2)
      result[k,length(med) 2] <- paste0(i,"-", j,": ",diff.1)
    }
    result_list[[g]] <- result
  }
  result_new <- do.call(rbind, result_list)
  
  names(result_new) <- c("GROUP", as.character(unique(unlist(colnames(df)))), "dif")
  return(result_new)
}

result <- wilcox.fun(data, c("1", "2"),combination)
result

I want to get the values of the "diff" column to be in the same cell by group:

what I would like to get:

Group	col1	col2	col3	col4	col5	col6	diff
1	139.5	379	864	544.5	665	470.5	1-2: 239.5, 3-4: -319.5, 5-6: -194.5
2	759.5	723	901.5	674	255.5	866.5	1-2: -36.5, 3-4: -227.5, 5-6: 611

CodePudding user response：

After you calculate the dif for each k, use paste() to combine everything together and then get rid of the unnecessary rows.

wilcox.fun <- function(data, id_groups, combination){
  mark.list <- list()
  result_list <- list()
  for (g in id_groups){
    df = as.matrix(data[data$GROUP %in% g,])
    df <- df[,unique(unlist(combination))]
    med <- paste(apply(df, 2, median))
    result <- data.frame(matrix(NA,ncol=length(med) 2, nrow=1))
    result[1,] <- c(g, med, NA)
    for (k in 1:(length(combination))) {
      i <- combination[[k]][1]
      j <- combination[[k]][2]
      test <- wilcox.test(df[,i], df[,j],conf.int = TRUE)
      diff.1 <- -round(test$estimate, 2)
      result[k,length(med) 2] <- paste0(i,"-", j,": ",diff.1)
    }
    # Merge the dif vals together
    result[1, length(med)   2] = paste(
      result[, length(med)   2], 
      collapse = ", "
    )
    # Only keep the first row
    result_list[[g]] <- result[1, ]
  }
  result_new <- do.call(rbind, result_list)
  
  names(result_new) <- c("GROUP", as.character(unique(unlist(colnames(df)))), "dif")
  return(result_new)
}

Output

> wilcox.fun(data, c("1", "2"),combination)
  GROUP  col1 col2  col3  col4  col5  col6                                  dif
1     1 139.5  379   864 544.5   665 470.5 1-2: 239.5, 3-4: -319.5, 5-6: -194.5
2     2 759.5  723 901.5   674 255.5 866.5    1-2: -36.5, 3-4: -227.5, 5-6: 611

CodePudding user response：

The diff column seems to be simply the difference of the medians, so not sure why you're defining the complicated wilcox.fun. You can get diff quicker in the following way:

library(dplyr)
data %>% 
    group_by(GROUP) %>% 
    summarise(diff = paste0(
        "1-2: ", median(col2) - median(col1),
        ", 3-4: ", median(col4) - median(col3),
        ", 5-6: ", median(col6) - median(col5)))
## A tibble: 3 × 2
#  GROUP diff
#  <int> <chr>
#1     1 1-2: 239.5, 3-4: -319.5, 5-6: -194.5
#2     2 1-2: -36.5, 3-4: -227.5, 5-6: 611
#3     3 1-2: -195, 3-4: -288, 5-6: 52