I have the following vectors:
v1 <- c("R", "H", "K") # * (asterisk sign)
v2 <- c("D", "E") # (plus sign)
v3 <- c("A") # - (minus sign)
Given another string, I'd like to count how many characters belong to v1
and v2
.
For example:
x1 <- "GMRRRARRRS"
# ***-***
# v1_count = 6
# v2_count = 0
# v3_count = 1
x2 <- "KMRDFRHRAE"
# * * ***-
# v1_count = 5
# v2_count = 2
# v3_count = 1
So any character that belongs to the vector will be counted as a single count.
The final output will be a data frame or tibble:
R,H,K D,E A
GMRRRARRRS 6 0 1
KMRDFRHRAE 5 2 1
How can I achieve that with R?
CodePudding user response:
library(tidyverse)
Sample data in a tibble or data frame
# A tibble: 2 x 1
string
<chr>
1 GMRRRARRRS
2 KMRDFRHRAE
Create a function to extract textual information
get_count <- function(string) {
v1 <- c("R", "H", "K")
v2 <- c("D", "E")
v3 <- c("A")
char <- string %>%
str_split("") %>%
getElement(1)
tibble(
"RHK" = length(char[char %in% v1]),
"DE" = length(char[char %in% v2]),
"A" = length(char[char %in% v3])
)
}
Mutate a new column data
with the function and unnest()
df %>%
mutate(data = map(string, get_count)) %>%
unnest(everything())
# A tibble: 2 x 4
string RHK DE A
<chr> <int> <int> <int>
1 GMRRRARRRS 6 0 1
2 KMRDFRHRAE 5 2 1
CodePudding user response:
library(stringr)
library(data.table)
v1 <- c('R', 'H', 'K')
v2 <- c('D', 'E')
v3 <- c('A')
x1 <- 'GMRRRARRRS'
x2 <- 'KMRDFRHRAE'
char_counts <- function(input_str) {
vars <- c('v1', 'v2', 'v3')
counts <- lapply(vars, function(x) {
(strsplit(input_str, '')[[ 1 ]] %in% get(x)) |>
sum()
})
df <- data.frame(input_str, unlist(counts) |> t())
cn <- lapply(vars, get) |> lapply(paste0, collapse = ',') |> unlist()
colnames(df) <- c('input', cn)
df
}
results_df <- lapply(c(x1, x2), char_counts) |>
data.table::rbindlist(fill = TRUE) |>
as.data.frame()
print(results_df)
input R,H,K D,E A
1 GMRRRARRRS 6 0 1
2 KMRDFRHRAE 5 2 1
CodePudding user response:
I think we can do this a little simpler. Maybe just:
library(tidyverse)
x1 <- "GMRRRARRRS"
x2 <- "KMRDFRHRAE"
count_v <- function(string){
tibble(text = string,
RHK = str_count(string, paste(c("R", "H", "K"), collapse = "|")),
DE = str_count(string, paste(c("D", "E"), collapse = "|")),
A = str_count(string, 'A'))
}
count_v(c(x1,x2))
#> # A tibble: 2 x 4
#> text RHK DE A
#> <chr> <int> <int> <int>
#> 1 GMRRRARRRS 6 0 1
#> 2 KMRDFRHRAE 5 2 1
or with a little more flexibility:
library(tidyverse)
x1 <- "GMRRRARRRS"
x2 <- "KMRDFRHRAE"
count_v <- function(string, checks){
bind_cols(tibble(text = string),
map_dfc(checks, \(x){
tibble("{paste(x, collapse = '')}" := str_count(string,
paste(x, collapse = "|")))
}) )
}
count_v(string = c(x1, x2),
checks = list(c("R", "H", "K"),
c("D", "E"),
c("A", "S"),
c("K", "F", "G")))
#> # A tibble: 2 x 5
#> text RHK DE AS KFG
#> <chr> <int> <int> <int> <int>
#> 1 GMRRRARRRS 6 0 2 1
#> 2 KMRDFRHRAE 5 2 1 2