Here is a smaple of data set:
df<-read.table (text="Id Name Surname Group A1 A2 A3 Type1 Gen1 B1 B2 B3 Type2 Gen2
116 Matt Dabi A 4 4 4 AB N 3 3 4 G N
116 Matt Dabi D 4 3 2 D N 4 3 2 G N
116 Matt Dabi Q NA NA NA NA NA NA NA NA NA NA
116 Matt Dabi B NA NA NA NA NA NA NA NA NA NA
", header=TRUE)
<iframe name="sif1" sandbox="allow-forms allow-modals allow-scripts" frameborder="0"></iframe>
Id Name Surname Group A1 A2 A3 Type1 Gen1
116 Matt Dabi A 4 4 4 AB N
116 Matt Dabi D 4 3 2 D N
116 Matt Dabi Q 3 3 4 G N
116 Matt Dabi B 4 3 2 G N
<iframe name="sif2" sandbox="allow-forms allow-modals allow-scripts" frameborder="0"></iframe>
I have tried df %>% na.omit()
CodePudding user response:
Here another approach using pivot_wider
:
df %>%
pivot_longer(starts_with(c("A", "B")), names_to="ID") %>%
na.omit() %>%
mutate(ID=sub("B", "A", ID)) %>%
pivot_wider(names_from=c(ID))
In case of more columns, one can use:
df %>%
pivot_longer(matches("[A-Z][1-9]"), names_to="ID") %>%
na.omit() %>%
mutate(ID = sub("[B-Z]", "A", ID)) %>%
pivot_wider(names_from = ID)
given that the rows are uniquely identified otherwise.
# A tibble: 4 x 7
Id Name Surname Group A1 A2 A3
<int> <chr> <chr> <chr> <int> <int> <int>
1 116 Matt Dabi A 4 4 4
2 116 Matt Dabi D 4 3 2
3 116 Matt Dabi Q 4 3 3
4 116 Matt Dabi B 4 2 4
CodePudding user response:
One option to achieve your desired result would be to make use of tidyr::pivot_longer
like so:
library(tidyr)
library(dplyr)
df %>%
pivot_longer(matches("^[A-Z]\\d$"), names_to = c("set", ".value"), names_pattern = "(.)(\\d)$", values_drop_na = TRUE) %>%
select(-set) %>%
rename_with(.fn = ~ paste0("A", .x), .cols = matches("^\\d"))
#> # A tibble: 6 × 7
#> Id Name Surname Group A1 A2 A3
#> <int> <chr> <chr> <chr> <int> <int> <int>
#> 1 116 Matt Dabi A 4 4 4
#> 2 116 Matt Dabi D 4 3 2
#> 3 116 Matt Dabi Q 4 3 3
#> 4 116 Matt Dabi B 4 2 4
#> 5 116 Matt Dabi Q 4 3 3
#> 6 116 Matt Dabi B 4 2 4
DATA
df <- structure(list(Id = c(116L, 116L, 116L, 116L, 116L, 116L), Name = c(
"Matt",
"Matt", "Matt", "Matt", "Matt", "Matt"
), Surname = c(
"Dabi",
"Dabi", "Dabi", "Dabi", "Dabi", "Dabi"
), Group = c(
"A", "D",
"Q", "B", "Q", "B"
), A1 = c(4L, 4L, NA, NA, NA, NA), A2 = c(
4L,
3L, NA, NA, NA, NA
), A3 = c(4L, 2L, NA, NA, NA, NA), B1 = c(
NA,
NA, 4L, 4L, NA, NA
), B2 = c(NA, NA, 3L, 2L, NA, NA), B3 = c(
NA,
NA, 3L, 4L, NA, NA
), C1 = c(NA, NA, NA, NA, 4L, 4L), C2 = c(
NA,
NA, NA, NA, 3L, 2L
), C3 = c(NA, NA, NA, NA, 3L, 4L)), class = "data.frame", row.names = c(
NA,
-6L
))
CodePudding user response:
With multiple patterns, one option is to create a spec
with build_longer_spec
and then use pivot_longer_spec
library(tidyr)
library(dplyr)
library(stringr)
spec <- df %>%
build_longer_spec(cols = matches("^[A-Za-z] \\d $"),
names_to = ".value", names_pattern = ".*(\\d )") %>%
mutate(`.value` = case_when(str_detect(`.name`, "^[AB]\\d $") ~
str_c('A', `.value`),
str_detect(`.name`, 'Type') ~ 'Type1',
str_detect(`.name`, 'Gen') ~ 'Gen1'))
pivot_longer_spec(df, spec, values_drop_na = TRUE)
# A tibble: 4 × 9
Id Name Surname Group A1 A2 A3 Type1 Gen1
<int> <chr> <chr> <chr> <int> <int> <int> <chr> <chr>
1 116 Matt Dabi A 4 4 4 AB N
2 116 Matt Dabi A 3 3 4 G N
3 116 Matt Dabi D 4 3 2 D N
4 116 Matt Dabi D 4 3 2 G N