Split columns into several columns by condition R-CodePudding

I would like to split a column into several columns according to conditions.

For example, if the character starts with P it will go to one column and if it starts with F or C it will go to other columns.

df <- data.frame(
  "name" = c("a", "b"),
  "ID" = c("P_GO_10;C_GO_23;C_GO_32", "P_GO_65;F_GO15;C_GO_97;F_GO_87"))

  name   ID
1    a P_GO_10;C_GO_23;C_GO_32
2    b P_GO_65;F_GO15;C_GO_97;F_GO_87



df_new <- data.frame(
  "name" = c("a", "b"),
  "ID_P" = c("P_GO_10", "P_GO_65"),
  "ID_C" = c("C_GO_23;C_GO_32","C_GO_97"),
  "ID_F" = c(NA, "F_GO_87"))

  name ID_P     ID_C               ID_F
1    a P_GO_10  C_GO_23;C_GO_32    <NA>
2    b P_GO_65  C_GO_97            F_GO_87

CodePudding user response：

Not really exact result that you are requesting but there is no other answer so far.

Using tidyverse we can do this:

df %>% 
  separate_rows(ID, sep = ";") %>% 
  mutate(ID_P = ifelse(grepl("^P", ID), ID, NA),
         ID_C = ifelse(grepl("^C", ID), ID, NA), 
         ID_F = ifelse(grepl("^F", ID), ID, NA)) %>% 
  select(-ID) %>% 
  pivot_longer(-name, names_to = "ID", values_to = "values") %>%
  drop_na() %>% 
  pivot_wider(names_from = ID,
              values_from = values,
              values_fn = list, 
              values_fill = list(values = NA))

CodePudding user response：

A not especially elegant base R solution:

df <- data.frame(
  "name" = c("a", "b"),
  "ID" = c("P_GO_10;C_GO_23;C_GO_32", "P_GO_65;F_GO15;C_GO_97;F_GO_87"))

id2 <- substr(unlist(id1 <- strsplit(df$ID, ";")), 1, 1)
m <- matrix(NA_character_, nrow(df), length(uid <- unique(id2)))
m[matrix(c(rep.int(1:nrow(df), lengths(id1)), match(id2, uid)), ncol = 2)] <- unlist(id1)
cbind(df, setNames(as.data.frame(m), paste0("ID_", uid)))
#>   name                             ID    ID_P    ID_C    ID_F
#> 1    a        P_GO_10;C_GO_23;C_GO_32 P_GO_10 C_GO_32    <NA>
#> 2    b P_GO_65;F_GO15;C_GO_97;F_GO_87 P_GO_65 C_GO_97 F_GO_87