here is my sample data
#data
a <- c(1,2,3,4,5,6)
b <- c('x','','x~y~z','z','','x')
#created the dataframe
c<-data.frame(a,b)
#viewing the dataframe
c
#output
a b
1 1 x
2 2
3 3 x~y~z
4 4 z
5 5
6 6 x
My Expected output is
this is what i tried using a function , I tried in creating a new function as below
getC <- function(df, col,y){
if(df[,col]==y){
return(y)
}else if(is.na(df[,col])){
return("")
}else if(strsplit(unlist(T_FRM_G[11,4]),'~')==y){
return(y)
break
}
}
#calling the functions and creating a new column x,y,z and the
functions should split the based on ~ of those respective cells.
c$x <- getC(c,"b","x")
c$y <- getC(c,"b","y")
c$z <- getC(c,"b","z")
the below is the error
> c$x <- getC(c,"b","x")
Warning message:
In if (df[, col] == y) { :
the condition has length > 1 and only the first element will be used
Please help why i am getting that error and what i should do to avoid those errors.
CodePudding user response:
The reason for the warning is based on the behavior of if/else
which is not vectorized i.e. it expects a vector of length 1. We could extract the elements as
library(dplyr)
library(purrr)
library(stringr)
library(tidyr)
c %>%
mutate(out = imap_dfc(setNames(c('x', 'y', 'z'),
c('x', 'y', 'z')), ~ str_extract(b, .x))) %>%
unnest_wider(out, names_sep = ".") %>%
rename_with(~ str_remove(.x, 'out\\.'))
-output
# A tibble: 6 × 5
a b x y z
<dbl> <chr> <chr> <chr> <chr>
1 1 "x" x <NA> <NA>
2 2 "" <NA> <NA> <NA>
3 3 "x~y~z" x y z
4 4 "z" <NA> <NA> z
5 5 "" <NA> <NA> <NA>
6 6 "x" x <NA> <NA>
Or another option is to create dummy cols and replace the 1s with column values
library(dplyr)
library(fastDummies)
dummy_cols(c, "b", split = "~") %>%
transmute(a, b, across(starts_with('b_'),
~ case_when(.x == 1 ~ str_remove(cur_column(), "b_"), TRUE ~ ""),
.names = "{str_remove(.col, 'b_')}"))
-output
a b x y z
1 1 x x
2 2
3 3 x~y~z x y z
4 4 z z
5 5
6 6 x x
If there are more columns, pass a vector
of column names
c <- data.frame(a,b, c = b)
dummy_cols(c, c("b", "c"), split = "~") %>%
transmute(a, b, c, across(contains("_"),
~case_when(.x == 1 ~ str_remove(cur_column(), "b_|c_"), TRUE ~ "")))
a b c b_x b_y b_z c_x c_y c_z
1 1 x x x x
2 2
3 3 x~y~z x~y~z x y z x y z
4 4 z z z z
5 5
6 6 x x x x