here it is my problem in the form of a reproducible example and my partial attempt of solution
# input
mydf_in<-data.frame(a=letters[6:10],
b=c("<0.5","2","<0.5", "9", "10"),
c=1:5,
d=6:10,
e=c("<0.8","12","<0.8", "<0.8", "<0.8"))
mydf_in
# output
# the desired final result
mydf_out<-data.frame(a=letters[6:10],
b=c(0.5,2,0.5,9,10),
b_flag=c(1,0,1,0,0),
c=1:5,
d=6:10,
e=c(0.8,12,0.8,0.8,0.8),
e_flag=c(1,0,1,1,1)
)
mydf_out
library(tidyverse)
mydf_in %>%
select(where(~ is.character(.x) &
any(str_detect(.x, "<")
)
)
) %>%
# in between here is missing the creation and
# the population of the flagging columns, i.e. "b_flag" and "e_flag"
mutate(across(everything(), ~ as.numeric(str_replace(.x, "<", ""))))
in short, what is missing in the between of the above code snippet, for each selected column:
- create a corresponding flagging column
- populate the rows of the flagging column with 1 or 0 depending on the presence of the sign "<" (see desired output)
CodePudding user response:
If we want to use the conditions explicitly, instead of select
use mutate
with the where
the condition - to create the 'flag' columns loop over the columns with across
and to change the column types use across
library(dplyr)
library(stringr)
mydf_in %>%
mutate(across(where(~ is.character(.x) &
any(str_detect(.x, fixed("<")))), ~
(str_detect(.x, fixed("<"))), .names = "{.col}_flag"),
across(where(~ is.character(.x) &
any(str_detect(.x, fixed("<")))), ~ readr::parse_number(.)))
-output
a b c d e b_flag e_flag
1 f 0.5 1 6 0.8 1 1
2 g 2.0 2 7 12.0 0 0
3 h 0.5 3 8 0.8 1 1
4 i 9.0 4 9 0.8 0 1
5 j 10.0 5 10 0.8 0 1
CodePudding user response:
You can use grepl()
to make a logical vector for the flag columns. as.interger
will give 1s and 0s instead of TRUE and FALSE. Then use gsub()
to clean up your initial columns.
library(dplyr)
mydf_in %>%
mutate(b_flag = grepl("<",mydf_in$b) %>% as.integer,
e_flag = grepl("<",mydf_in$e) %>% as.integer,
b = gsub("<","", mydf_in$b),
e = gsub("<","", mydf_in$b)) %>%
select(a,b,b_flag,c,d,e_flag)
a b b_flag c d e_flag
1 f 0.5 1 1 6 1
2 g 2 0 2 7 0
3 h 0.5 1 3 8 1
4 i 9 0 4 9 1
5 j 10 0 5 10 1
And if you happen to have several more columns that needed flags, you could use grepl()
in combination with lapply()
or sapply()
like this:
#make all flags
flags_columns<-mydf_in[,grepl("<",mydf_in)] %>% sapply(., grepl, pattern = "<") %>%
data.frame(.) %>%
sapply(., as.integer) %>% data.frame(.)%>%
rename_with(., ~paste0(.,"_flag"))
#remove "<" from all columns
edited_columns<- mydf_in[,grepl("<",mydf_in)] %>% lapply(., gsub, pattern = "<", replacement = "") %>%data.frame(.)
#gather any other columns
anything_else<- mydf_in[,!grepl("<",mydf_in)]
#make a dataframe
data.frame(c(flags_columns,edited_columns,anything_else)