I have two columns x
and y
. I want to mark the first occurrence of a value that occurs either in x
OR y
.
library(dplyr)
tibble(x = c(1, 2, 2, 3, 7), y = c(7, 7, 8, 9, 10))
#> # A tibble: 5 × 2
#> x y
#> <dbl> <dbl>
#> 1 1 7
#> 2 2 7
#> 3 2 8
#> 4 3 9
#> 5 7 10
Created on 2021-11-15 by the reprex package (v0.3.0)
My expected output should look like:
#> # A tibble: 5 × 4
#> x y first_occurance_x first_occurance_y
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 7 T T
#> 2 2 7 T F
#> 3 2 8 F T
#> 4 3 9 T T
#> 5 7 10 F T
Created on 2021-11-15 by the reprex package (v0.3.0)
CodePudding user response:
cbind(df, t(array(!duplicated(c(t(df))), rev(dim(df)))))
x y 1 2
1 1 7 TRUE TRUE
2 2 7 TRUE FALSE
3 2 8 FALSE TRUE
4 3 9 TRUE TRUE
5 7 10 FALSE TRUE
CodePudding user response:
We may pivot to long format and then use duplicated
on the 'long' format data and then reshape back to 'wide' format with pivot_wider
library(dplyr)
library(tidyr)
library(stringr)
df1 <- df1 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = c(x, y)) %>%
mutate(value = !duplicated(value),
name = str_c('first_occurance_', name)) %>%
pivot_wider(names_from = name, values_from = value) %>%
select(-rn) %>%
bind_cols(df1, .)
-output
df1
# A tibble: 5 × 4
x y first_occurance_x first_occurance_y
<dbl> <dbl> <lgl> <lgl>
1 1 7 TRUE TRUE
2 2 7 TRUE FALSE
3 2 8 FALSE TRUE
4 3 9 TRUE TRUE
5 7 10 FALSE TRUE
data
df1 <- tibble(x = c(1, 2, 2, 3, 7), y = c(7, 7, 8, 9, 10))
CodePudding user response:
A possible solution:
library(tidyverse)
df <- tibble(x = c(1, 2, 2, 3, 7), y = c(7, 7, 8, 9, 10))
df %>%
group_by(x) %>%
mutate(firstx = ifelse(row_number() == 1, T, F)) %>%
ungroup %>%
group_by(y) %>%
mutate(firsty = ifelse(row_number() == 1, T, F)) %>%
ungroup %>%
mutate(
aux1 = x %in% lead(y), aux2 = y %in% lead(x),
firstx = ifelse(aux1 & row_number() != 1, F, firstx),
firsty = ifelse(aux2 & row_number() != 1, F, firsty),
aux1 = NULL, aux2 = NULL)
#> # A tibble: 5 × 4
#> x y firstx firsty
#> <dbl> <dbl> <lgl> <lgl>
#> 1 1 7 TRUE TRUE
#> 2 2 7 TRUE FALSE
#> 3 2 8 FALSE TRUE
#> 4 3 9 TRUE TRUE
#> 5 7 10 FALSE TRUE