I have data set like this:
df<-data.frame(ID=(1:5), cloumn1=c("AA","GG","AG","AA","AT"), cloumn2=c("AA","GG","AG","AA","AT"), stringsAsFactors=FALSE)
df
ID cloumn1 cloumn2
1 AA AA
2 GG GG
3 AG AG
4 AA AA
5 AT AT
I want to separate each column into 2 letters so the output will look something like this:
ID cloumn1.A cloumn1.B cloumn2.A cloumn2.B
1 A A A A
2 G G G G
3 A G A G
4 A A A A
5 A T A T
Can you help me please?
CodePudding user response:
library(tidyverse)
df %>%
pivot_longer(-ID) %>%
mutate(tmp = str_split(value, pattern = "")) %>%
unnest(tmp) %>%
group_by(ID, name) %>%
mutate(id_row = LETTERS[row_number()]) %>%
pivot_wider(id_cols = c(ID, name), names_from =c(name, id_row), values_from = tmp, names_sep = ".") %>%
ungroup()
#> # A tibble: 5 x 5
#> ID cloumn1.A cloumn1.B cloumn2.A cloumn2.B
#> <int> <chr> <chr> <chr> <chr>
#> 1 1 A A A A
#> 2 2 G G G G
#> 3 3 A G A G
#> 4 4 A A A A
#> 5 5 A T A T
data
df <-
data.frame(
ID = (1:5),
cloumn1 = c("AA", "GG", "AG", "AA", "AT"),
cloumn2 = c("AA", "GG", "AG", "AA", "AT"),
stringsAsFactors = FALSE
)
Created on 2021-11-05 by the reprex package (v2.0.1)
data.table
library(data.table)
setDT(df)
melt(data = df, id.vars = "ID") %>%
.[, list(value = unlist(strsplit(value, split = ""))), by = list(ID, variable)] %>%
.[, id_row := LETTERS[rowid(ID, variable)]] %>%
dcast(formula = ID ~ variable id_row, value.var = "value")
ID cloumn1_A cloumn1_B cloumn2_A cloumn2_B
1: 1 A A A A
2: 2 G G G G
3: 3 A G A G
4: 4 A A A A
5: 5 A T A T
CodePudding user response:
Uisng strsplit
.
cbind(df[1], do.call(cbind.data.frame, lapply(df[-1], function(x)
do.call(rbind, strsplit(x, '')))))
# ID cloumn1.1 cloumn1.2 cloumn2.1 cloumn2.2
# 1 1 A A A A
# 2 2 G G G G
# 3 3 A G A G
# 4 4 A A A A
# 5 5 A T A T
CodePudding user response:
Yet another solution, tidyverse
-based:
library(tidyverse)
df<-data.frame(ID=(1:5), column1=c("AA","GG","AG","AA","AT"), column2=c("AA","GG","AG","AA","AT"), stringsAsFactors=FALSE)
df %>%
mutate(
across(
starts_with("column"), ~
str_split(get(cur_column()), "(?<=[A-Z])(?=[A-Z])", simplify = T),
.names="{.col}_sep"), column1 = NULL, column2 = NULL)
#> ID column1_sep.1 column1_sep.2 column2_sep.1 column2_sep.2
#> 1 1 A A A A
#> 2 2 G G G G
#> 3 3 A G A G
#> 4 4 A A A A
#> 5 5 A T A T
Another possibility, based on a pivot_longer
followed by a pivot_wider
:
library(tidyverse)
df<-data.frame(ID=(1:5), column1=c("AA","GG","AG","AA","AT"), column2=c("AA","GG","AG","AA","AT"), stringsAsFactors=FALSE)
df %>%
pivot_longer(-ID) %>%
separate(value, into=LETTERS[1:2], sep= "(?<=[A-Z])(?=[A-Z])") %>%
pivot_wider(ID, names_from = "name", values_from = c(A,B),
names_glue = "{name}.{.value}") %>%
relocate(column1.B,.before=column2.A)
#> # A tibble: 5 × 5
#> ID column1.A column1.B column2.A column2.B
#> <int> <chr> <chr> <chr> <chr>
#> 1 1 A A A A
#> 2 2 G G G G
#> 3 3 A G A G
#> 4 4 A A A A
#> 5 5 A T A T