I have two vectors that are binned. Basically, I want a function to find the union and intersection of these two vectors (output). It seems there is no function that supports this feature. Any idea of how i can carry out the desired output vector?
example1 <- c("18--25", "26--30", "31--50", "51 ")
example2 <- c("18--23", "24--30", "31--65", "66 ")
output <- c("18--23", "24--25", "26--30", "31--50", "51--65", "66 ")
CodePudding user response:
We can remove duplicates and combine a sorted vector every 2 elements like this (R version 4.0 and later for pipe |>
):
f <- function(x, y, sep, max){
m <- paste0("\\", max)
gsub(m, "", c(x, y)) |>
strsplit(sep, fixed = T) |>
unlist(use.names = F) |>
sort() |>
unique() |>
as.numeric() |>
(\(.) tapply(., gl(length(.), 2, length(.)), paste, collapse = sep, simplify = T))() |>
(\(.) .[!is.na(.)])() |>
as.character() |>
(\(.) {.[length(.)] <- paste0(.[length(.)], max) ; .})()
}
# for older R versions
f <- function(x, y, sep, max){
x <- gsub(paste0("\\", max), "", c(x, y))
x <- as.numeric(unique(sort(unlist(strsplit(x, sep, T), use.names = F))))
x <- tapply(x, gl(length(x), 2L, length(x)), paste, collapse = sep, simplify = T)
x <- as.character(x[!is.na(x)])
x[length(x)] <- paste0(x[length(x)], max)
x
}
f(example1, example2, "--", " ")
[1] "18--23" "24--25" "26--30" "31--50" "51--65" "66 "