Home > Enterprise >  RShiny working with data frame column names from selectInput
RShiny working with data frame column names from selectInput

Time:05-19

I am working with creating an interactive app in RShiny which can display grouped boxplots as well as tables showing the Kruskal-Wallis test results for sample groups which are based on the columns selected by a user when they upload a CSV file.

Ideally, when a user uploads their CSV, they can select which columns they wish to use as the X and Y variables, the class label, and the group label for the grouped box plots and statistical calculations.

The code needs to be as general as possible to allow for different input columns. However, I am having great difficulty with understanding the syntax required to access the data in the selected columns using the input$colName syntax.

For example:

When I try to get a table showing the kruskal-wallis result for each group in the column selected for the sample groups (input$groupCol) using the following dplyr syntax, the output is incorrect since it is somehow not using the correct column values:

KWtable <- copyCleanedDF  %>% group_by([copyCleanedDF[,input$groupCol]) %>% kruskal_test([copyCleanedDF[,input$numCol] ~ [copyCleanedDF[,input$varCol])

When I break the code up by first subsetting each group and calculating the KW for each group, it works correctly, however then it is hard-coded and will not work on any group variable column:

# subset team A and test KW
ASubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "A",])
krusyTeamA <- kruskal_test(ASubset[,input$numCol] ~ ASubset[,input$varCol], data = ASubset)
# add in a column showing it is tested in team A samples only
krusyTeamA$groupVariable = "Team A"

# subset team B and test KW
BSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "B",])
krusyTeamB <- kruskal_test(BSubset[,input$numCol] ~ BSubset[,input$varCol], data = BSubset)
# add in a column showing it is tested in team B samples only
krusyTeamB$groupVariable = "Team B"

# subset team C and test KW
CSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "C",])
krusyTeamC <- kruskal_test(CSubset[,input$numCol] ~ CSubset[,input$varCol], data = CSubset)
# add in a column showing it is tested in team C samples only
krusyTeamC$groupVariable = "Team C"

# put the three tables together

KWtable <- rbind(krusyTeamA,krusyTeamB,krusyTeamC)

The column names are also appearing strange, with the syntax showing instead of the actual column name (e.g. it appears as cleanData()[,input$varCol)] instead of "Genotype", so I would like to understand better how to handle this data.

I have included the full Rshiny code and example CSV file to be able to reproduce this code.

library(shiny)
library(datasets)
library(plotly)
library(dplyr)
library(reticulate)
library(DT)
library(ggplot2)
library(tidyverse)
library(rstatix)
library(dplyr)


ui <- shinyUI(fluidPage(
titlePanel("TargetID Median Levels"),
tabsetPanel(
  tabPanel("Upload File",
         titlePanel("Uploading Files"),
         sidebarLayout(
           sidebarPanel(
             fileInput('file1', 'Browse and select your CSV file',
                       accept=c('text/csv', 
                                'text/comma-separated-values,text/plain', 
                                '.csv')),
             
             # added interface for uploading data from
             # http://shiny.rstudio.com/gallery/file-upload.html
             tags$br(),
             checkboxInput('header', 'Column headers', TRUE),
             selectInput('varCol', 'X Variable', ""),
             selectInput('numCol', 'Select the Y Variable,...)', "", selected = ""),
             selectInput('classCol', 'Select the class label,...)', "", selected = ""),
             selectInput('groupCol', 'Select the group label,...)', "", selected = ""),
             selectInput("plot.type","Plot Type:",
                            list(boxplot = "boxplot")#, histogram = "histogram", density = "density")
             ),
             
             radioButtons('sep', 'Delimiter',
                          c(Semicolon=';',
                            Comma=',',
                            Tab='\t'),
                          ','),
             radioButtons('quote', 'Quote',
                          c(None='',
                            'Double Quote'='"',
                            'Single Quote'="'"),
                          '"')
             
           ),
           mainPanel(
             
             h3("Uploaded data"),
             
             dataTableOutput('table1'),
             
             h3(""),
             
             h3("Boxplot with Median Levels"),
             
             plotlyOutput('MyPlot'),
             
             h3("Kruskal-Wallis H Test"),
             
             dataTableOutput('table2')
             
             
           )
         )
 )
 )
 )
 )

server <- shinyServer(function(input, output, session) {
# added "session" because updateSelectInput requires it
options(warn=-1)
# options(encoding="UTF-8")

data <- reactive({ 
req(input$file1) ## ?req #  require that the input is available

# get the input file uploaded to the server side

inFile <- input$file1 

# read the input file as a data frame into R

inputDF <- read.csv(inFile$datapath, header = input$header, sep = input$sep,
                    quote = input$quote, stringsAsFactors = TRUE)


inputDF[inputDF=="NA"] <- NA # convert missing value strings to NAs recognised by R

# clean up the no-calls and in-phase genotypes from the variant genotypes columns

inputDF[inputDF == "./." | inputDF == ".|."] <- NA # convert the no-calls to missing (NA)
inputDF[inputDF == "0|0"] <- "0/0" # change in-phase wildtype
inputDF[inputDF == "0|1"] <- "0/1" # change in-phase heterzygous
inputDF[inputDF == "1|1"] <- "1/1" # change in-phase homo alt

# Update inputs (you could create an observer with both updateSel...)
# You can also constraint your choices. If you wanted select only numeric
# variables you could set "choices = sapply(df, is.numeric)"
# It depends on what do you want to do later on.

updateSelectInput(session, inputId = 'numCol', label = 'Numerical variable (e.g. LapTime...)',
                  choices = names(inputDF), selected = names(inputDF)[4])
updateSelectInput(session, inputId = 'varCol', label = 'Sample genotypes for a variant',
                  choices = names(inputDF), selected = names(inputDF)[1])
updateSelectInput(session, inputId = 'classCol', label = 'Class label (e.g. Sex)',
                  choices = names(inputDF), selected = names(inputDF)[3])
updateSelectInput(session, inputId = 'groupCol', label = 'Group label (e.g. Team)',
                  choices = names(inputDF), selected = names(inputDF)[2])

return(inputDF)
})


# display the first output table with the uploaded data
output$table1 <- renderDataTable({
req(input$file1)
datatable(
  data(),
  filter = "top",
  selection = "none", #this is to avoid select rows if you click on the rows
  rownames = FALSE,
  extensions = 'Buttons',
  
  options = list(
    scrollX = TRUE,
    autoWidth = TRUE,
    dom = 'Blrtip',
    buttons =
      list(I('colvis'), 'copy', 'print', list(
        extend = 'collection',
        buttons = list(list(extend = 'csv', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible")),
                       list(extend = 'excel', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible"))),
        text = 'Download'
      )),
    lengthMenu = list(c(10, 30, 50, -1),
                      c('10', '30', '50', 'All'))
  ),
  class = "display"
 )
 })

cleanData <- reactive({

req(input$file1)

# save the selected dataframe and subset to have only the selected columns

copyDF <- data.frame(data())

# remove any rows with nas in the 4 selected columns

cleanedDF <- copyDF %>% drop_na(c(input$varCol, input$numCol, input$classCol, input$groupCol))

return(cleanedDF)
})

kwData <- reactive({

req(input$file1)

copyCleanedDF <- data.frame(cleanData())

# get the freq count for each group to plot N
# using dplyr function to create a frequency table to match the grouped plt
# this will enable the freq counts to be added to the plot

myFreqs <- copyCleanedDF %>% group_by(copyCleanedDF[input$groupCol], copyCleanedDF[input$varCol],copyCleanedDF[input$classCol]) %>% summarize(Freq=n())  

groupVariable <- as.character(input$groupCol)

# subset team A and test KW
ASubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "A",])
krusyTeamA <- kruskal_test(ASubset[,input$numCol] ~ ASubset[,input$varCol], data = ASubset)
# add in a column showing it is tested in team A samples only
krusyTeamA$groupVariable = "Team A"

# subset team B and test KW
BSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "B",])
krusyTeamB <- kruskal_test(BSubset[,input$numCol] ~ BSubset[,input$varCol], data = BSubset)
# add in a column showing it is tested in team B samples only
krusyTeamB$groupVariable = "Team B"

# subset team C and test KW
CSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "C",])
krusyTeamC <- kruskal_test(CSubset[,input$numCol] ~ CSubset[,input$varCol], data = CSubset)
# add in a column showing it is tested in team C samples only
krusyTeamC$groupVariable = "Team C"

# put the three tables together

KWtable <- rbind(krusyTeamA,krusyTeamB,krusyTeamC)

### this part of the code doesn't work 

#KWtable <- copyCleanedDF  %>% group_by(copyCleanedDF[,input$groupCol]) %>% kruskal_test(copyCleanedDF[,input$numCol] ~ copyCleanedDF[,input$varCol])

# then paste the KW p-value to the team label in the main dataframe
# to include it in the plot image

#plotDF <- merge(copyCleanedDF,KWtable,by=("input$groupCol")) # doesn't work 

#plotDF$input$groupCol <- paste0(plotDF$input$groupCol, "\n", plotDF$method, " p=", plotDF$p)

return(KWtable)
})

freqData <- reactive({

req(input$file1)

copyCleanedDF <- data.frame(cleanData())

# get the freq count for each group to plot N
# using dplyr function to create a frequency table to match the grouped plt
# this will enable the freq counts to be added to the plot

myFreqs <- copyCleanedDF %>% group_by(copyCleanedDF[input$groupCol],   copyCleanedDF[input$varCol],copyCleanedDF[input$classCol]) %>% summarize(Freq=n())  

return(myFreqs)
})

# display grouped boxplots

output$MyPlot <- renderPlotly({

req(input$file1)

if(input$plot.type == "boxplot"){
  pl <-  ggplot(cleanData(), aes(x=cleanData()[,input$varCol], y=cleanData()[,input$numCol], fill=cleanData()[,input$classCol]))  
    stat_boxplot(geom ='errorbar')   # add error bars
    geom_boxplot()    
    facet_grid(~cleanData()[,input$groupCol],scale="free")

  pl <- pl   stat_summary(geom = 'text', label = paste("n=", freqData()$Freq), fun = max, vjust = -1, position = position_dodge(width=0.7))
  
  # This is to change the y-axis depending on the plot to allow for N to show on the plot
  pl <- pl   scale_y_continuous(limits = function(x){
    c(min(x), ceiling(max(x) * 1.1))
  })
  
  pl %>%
    ggplotly() %>%
    layout(boxmode = "group", autosize = TRUE, boxgroupgap=0.002, boxgap=0.01)
}

})

# display the second output table
output$table2 <- renderDataTable({
req(input$file1)
datatable(
  kwData(),
  filter = "top",
  selection = "none", #this is to avoid select rows if you click on the rows
  rownames = FALSE,
  extensions = 'Buttons',
  
  options = list(
    scrollX = TRUE,
    autoWidth = TRUE,
    dom = 'Blrtip',
    buttons =
      list(I('colvis'), 'copy', 'print', list(
        extend = 'collection',
        buttons = list(list(extend = 'csv', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible")),
                       list(extend = 'excel', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible"))),
        text = 'Download'
      )),
    lengthMenu = list(c(10, 30, 50, -1),
                      c('10', '30', '50', 'All'))
  ),
  class = "display"
)
})


})

shinyApp(ui, server)

Does anyone have an idea of what is happening and how I can go about improving my code?

Thanks very much in advance

Example CSV file

CodePudding user response:

Here's a small example that would use a character list like input to do the analysis:

library(rstatix)
library(dplyr)
input <- list(groupCol = 'supp', numCol = 'len', varCol = 'dose')
input
#> $groupCol
#> [1] "supp"
#> 
#> $numCol
#> [1] "len"
#> 
#> $varCol
#> [1] "dose"
ToothGrowth %>% 
  group_by(!!sym(input$groupCol)) %>% 
  kruskal_test(reformulate(input$varCol, response=input$numCol))
#> # A tibble: 2 × 7
#>   supp  .y.       n statistic    df          p method        
#> * <fct> <chr> <int>     <dbl> <int>      <dbl> <chr>         
#> 1 OJ    len      30      18.5     2 0.0000958  Kruskal-Wallis
#> 2 VC    len      30      25.1     2 0.00000359 Kruskal-Wallis

Created on 2022-05-18 by the reprex package (v2.0.1)

  • Related