Home > Enterprise >  Add group names based on several variables
Add group names based on several variables

Time:11-24

I would like to create a variable containing group names determined based on four other variables. Let me create a dummy table to put this into a better perspective:

classwork stream people others grouping1
High High High High Group1
High High High High Group1
High High High Low Group2
High High High Low Group2
High High Low Low Group3

The name of group is based on the data, obtained using this code

DATA.RH.TAM %>%
  group_by(C.level, S.level, P.level, O.level) %>%
  summarize(index = sum(index))

I would like to also create another variable (grouping2) based on only three other variables later.

DATA.RH.TAM %>%
  group_by(C.level, S.level, P.level) %>%
  summarize(index = sum(index))

Your help is much appreciated.

The following is my dataset:

DATA.RH.TAM <- structure(c("High", "High", "High", "Low", "High", "High", "Low", 
            "High", "High", "High", "Low", "High", "High", "High", "High", 
            "High", "High", "High", "High", "High", "High", "High", "High", 
            "High", "Low", "High", "High", "Low", "Low", "High", "High", 
            "High", "High", "High", "Low", "High", "High", "High", "High", 
            "Low", "Low", "Low", "High", "High", "High", "High", "High", 
            "Low", "Low", "Low", "Low", "High", "High", "High", "High", "Low", 
            "Low", "Low", "Low", "High", "High", "High", "Low", "High", "High", 
            "High", "High", "High", "High", "High", "High", "High", "Low", 
            "High", "Low", "Low", "High", "High", "Low", "High", "High", 
            "High", "Low", "High", "High", "High", "High", "High", "High", 
            "High", "Low", "High", "Low", "High", "High", "High", "High", 
            "Low", "Low", "Low", "High", "Low", "Low", "Low", "High", "Low", 
            "High", "Low", "Low", "High", "High", "High", "High", "High", 
            "High", "High", "Low", "Low", "High", "High", "Low", "Low", "Low", 
            "High", "High", "High", "High", "High", "High", "High", "High", 
            "High", "High", "High", "Low", "High", "Low", "High", "Low", 
            "High", "High", "Low", "Low", "High", "Low", "Low", "High", "High", 
            "High", "High", "High", "High", "High", "Low", "High", "High", 
            "High", "Low", "High", "Low", "High", "Low", "Low", "High", "Low", 
            "High", "Low", "High", "Low", "Low", "High", "High", "High", 
            "Low", "High", "High", "Low", "High", "High", "High", "Low", 
            "High", "High", "Low", "High", "High", "Low", "High", "High", 
            "Low", "Low", "Low", "Low", "Low", "High", "High", "Low", "Low", 
            "High", "High", "High", "High", "High", "Low", "Low", "Low", 
            "High", "Low", "High", "High", "Low", "Low", "Low", "Low", "Low", 
            "High", "High", "High", "High", "High", "Low", "High", "Low", 
            "High", "Low", "Low", "Low", "Low", "Low", "High", "Low", "Low", 
            "Low", "High", "Low", "High", "High", "High", "Low", "High", 
            "High", "High", "Low", "Low", "High", "High", "Low", "High", 
            "High", "High", "High", "High", "High", "Low", "Low", "Low", 
            "High", "Low", "Low", "Low", "Low", "High", "Low", "Low", "Low", 
            "High", "High", "High", "High", "High", "High", "High", "Low", 
            "Low", "High", "High", "Low", "Low", "Low", "Low", "High", "High", 
            "High", "High", "High", "Low", "Low", "Low", "Low", "Low", "Low", 
            "High", "High", "Low", "Low", "High", "High", "Low", "Low", "Low", 
            "Low", "Low", "High", "Low", "Low", "High", "High", "High", "Low", 
            "Low", "High", "High", "High", "High", "High", "High", "High", 
            "High", "Low", "High", "High", "High", "Low", "High", "Low", 
            "Low", "High", "High", "High", "Low", "Low", "High", "High", 
            "High", "High", "High", "Low", "High", "High", "Low", "High", 
            "High", "High", "High", "High", "High", "High", "High", "High", 
            "High", "High", "High", "High", "Low", "High", "High", "High", 
            "Low", "High", "Low", "Low", "Low", "Low", "Low", "High", "Low", 
            "High", "Low", "Low", "Low", "Low", "High", "High", "High", "Low", 
            "High", "High", "High", "Low", "High", "Low", "High", "High", 
            "High", "Low", "High", "High", "Low", "Low", "High", "High", 
            "High", "High", "High", "Low", "Low", "High", "High", "High", 
            "High", "High", "High", "Low", "High", "Low", "High", "High", 
            "High", "High", "Low", "Low", "High", "High", "High", "Low", 
            "Low", "High", "High", "High", "High", "Low", "Low", "High", 
            "High", "High", "High", "High", "High", "Low", "Low", "High", 
            "High", "Low", "Low", "Low", "High", "High", "High", "High", 
            "High", "High", "High", "Low", "Low", "High", "Low", "High", 
            "High", "High", "High", "Low", "High", "High", "High", "Low", 
            "High", "Low", "Low", "High", "High", "Low", "High", "High", 
            "High", "High", "High", "High", "High", "High", "High", "High", 
            "High", "Low", "High", "Low", "High", "High", "High", "Low", 
            "High", "High", "Low", "High", "High", "High", "Low", "Low", 
            "High", "High", "High", "High", "High", "Low", "High", "High", 
            "Low", "High", "High", "High", "High", "High", "High", "High", 
            "High", "High", "High", "High", "High", "Low", "High", "High", 
            "High", "High", "High", "High", "Low", "Low", "Low", "Low", "High", 
            "High", "Low", "High", "High", "Low", "High", "High", "High", 
            "High", "High", "Low", "High", "High", "High", "Low", "High", 
            "High", "High", "High", "Low", "Low", "High", "High", "Low", 
            "Low", "High", "High", "High", "High", "High", "High", "High", 
            "High", "High", "High", "High", "High", "High", "Low", "High", 
            "High", "High", "High", "High", "High", "Low", "Low", "High", 
            "High", "High", "Low", "High", "High", "High", "High", "High", 
            "Low", "High", "High", "High", "High", "High", "High", "High", 
            "Low", "Low", "High", "High", "Low", "Low", "High", "High", "High", 
            "High", "High", "High", "High", "High", "High", "Low", "High", 
            "Low", "High", "High", "High", "High", "High", "High", "High", 
            "High", "High", "High", "Low", "Low", "High", "High", "Low", 
            "High", "High", "High", "High", "High", "High", "High", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
            "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), .Dim = c(156L, 
                                                                                  5L), .Dimnames = list(NULL, c("classwork", "stream", "people", 
                                                                                                                "others", "index")))

CodePudding user response:

library(data.table)
DT <- as.data.table(DATA.RH.TAM)
DT[, grouping1 := paste0("group", .GRP), by = .(classwork, stream, people, others)]
DT[, grouping2 := paste0("group", .GRP), by = .(classwork, stream, people)]
#      classwork stream people others index grouping1 grouping2
#   1:      High   High   High   High     1    group1    group1
#   2:      High    Low   High   High     1    group2    group2
#   3:      High   High   High   High     1    group1    group1
#   4:       Low    Low   High   High     1    group3    group3
#   5:      High   High   High    Low     1    group4    group1
# ---                                                         
# 152:      High   High   High   High     1    group1    group1
# 153:      High    Low   High   High     1    group2    group2
# 154:       Low    Low   High   High     1    group3    group3
# 155:      High   High   High   High     1    group1    group1
# 156:      High   High   High   High     1    group1    group1

CodePudding user response:

Using dplyr, you can use mutate with case_when. In comparison with data.table, you need to specify all the combinations though. (Please note that I have only used the head of your dataset.)

DATA.RH.TAM <- head(as.data.frame(DATA.RH.TAM))

DATA.RH.TAM <- DATA.RH.TAM %>%
mutate(Grouping = case_when
       ((classwork == "High" & stream == "High" & people == "High" & others == "High") ~ "Group 1",
        (classwork == "High" & stream == "Low" & people == "High" & others == "High") ~ "Group 2",
        (classwork == "Low" & stream == "Low" & people == "High" & others == "High") ~ "Group 3",
        (classwork == "High" & stream == "High" & people == "High" & others == "Low") ~ "Group 4",
        (classwork == "High" & stream == "Low" & people == "High" & others == "High") ~ "Group 5",
        FALSE ~ "Group X"
       ))

CodePudding user response:

library(tidyverse)

DATA.RH.TAM %>%
  as_tibble() %>%
  #Combine the group columns
  unite("Group1", classwork:others, remove = FALSE) %>%
  unite("Group2", classwork:people, remove = FALSE) %>%
  #Rename the groups with an index
  mutate(Group1 = paste0("Group", Group1 %>% factor() %>% as.numeric()),
         Group2 = paste0("Group", Group2 %>% factor() %>% as.numeric())
                         )
  • Related