I would like to split a column into several columns according to conditions.
For example, if the character starts with P it will go to one column and if it starts with F or C it will go to other columns.
df <- data.frame(
"name" = c("a", "b"),
"ID" = c("P_GO_10;C_GO_23;C_GO_32", "P_GO_65;F_GO15;C_GO_97;F_GO_87"))
name ID
1 a P_GO_10;C_GO_23;C_GO_32
2 b P_GO_65;F_GO15;C_GO_97;F_GO_87
df_new <- data.frame(
"name" = c("a", "b"),
"ID_P" = c("P_GO_10", "P_GO_65"),
"ID_C" = c("C_GO_23;C_GO_32","C_GO_97"),
"ID_F" = c(NA, "F_GO_87"))
name ID_P ID_C ID_F
1 a P_GO_10 C_GO_23;C_GO_32 <NA>
2 b P_GO_65 C_GO_97 F_GO_87
CodePudding user response:
Not really exact result that you are requesting but there is no other answer so far.
Using tidyverse
we can do this:
df %>%
separate_rows(ID, sep = ";") %>%
mutate(ID_P = ifelse(grepl("^P", ID), ID, NA),
ID_C = ifelse(grepl("^C", ID), ID, NA),
ID_F = ifelse(grepl("^F", ID), ID, NA)) %>%
select(-ID) %>%
pivot_longer(-name, names_to = "ID", values_to = "values") %>%
drop_na() %>%
pivot_wider(names_from = ID,
values_from = values,
values_fn = list,
values_fill = list(values = NA))
CodePudding user response:
A not especially elegant base R solution:
df <- data.frame(
"name" = c("a", "b"),
"ID" = c("P_GO_10;C_GO_23;C_GO_32", "P_GO_65;F_GO15;C_GO_97;F_GO_87"))
id2 <- substr(unlist(id1 <- strsplit(df$ID, ";")), 1, 1)
m <- matrix(NA_character_, nrow(df), length(uid <- unique(id2)))
m[matrix(c(rep.int(1:nrow(df), lengths(id1)), match(id2, uid)), ncol = 2)] <- unlist(id1)
cbind(df, setNames(as.data.frame(m), paste0("ID_", uid)))
#> name ID ID_P ID_C ID_F
#> 1 a P_GO_10;C_GO_23;C_GO_32 P_GO_10 C_GO_32 <NA>
#> 2 b P_GO_65;F_GO15;C_GO_97;F_GO_87 P_GO_65 C_GO_97 F_GO_87