I have a dataset df1
with columns from S_2018_p
to S_2021_p
where I want to replace the values >= 10 with 0. I would expect a dataset like df2
.
library(data.table)
df1 = data.table(
ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7"),
"string1" = c("x2", "g3", "n2", "m3", "2w", "ps2", "kg2"),
"S_2018_p" = c(3,5,11,3,9,22,6),
"S_2019_p" = c(3,5,6,21,1,4,0),
"S_2020_p" = c(0,4,13,9,16,7,9),
"S_2021_p" = c(4,0,3,8,5,4,6),
"string2" = c("si", "q2", "oq", "mx", "ix", "p2", "2q"))
ID string1 S_2018_p S_2019_p S_2020_p S_2021_p string2
1: a1 x2 3 3 0 4 si
2: a2 g3 5 5 4 0 q2
3: a3 n2 11 6 13 3 oq
4: a4 m3 3 21 9 8 mx
5: a5 2w 9 1 16 5 ix
6: a6 ps2 22 4 7 4 p2
7: a7 kg2 6 0 9 6 2q
df2 = data.table(
ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7"),
"string1" = c("x2", "g3", "n2", "m3", "2w", "ps2", "kg2"),
"S_2018_p" = c(3,5,0,3,9,0,6),
"S_2019_p" = c(3,5,6,0,1,4,0),
"S_2020_p" = c(0,4,0,9,0,7,9),
"S_2021_p" = c(4,0,3,8,5,4,6),
"string2" = c("si", "q2", "oq", "mx", "ix", "p2", "2q"))
ID string1 S_2018_p S_2019_p S_2020_p S_2021_p string2
1: a1 x2 3 3 0 4 si
2: a2 g3 5 5 4 0 q2
3: a3 n2 0 6 0 3 oq
4: a4 m3 3 0 9 8 mx
5: a5 2w 9 1 0 5 ix
6: a6 ps2 0 4 7 4 p2
7: a7 kg2 6 0 9 6 2q
I tried to do it with mutete_if, but I don't get the desired output.
df1 %>%
mutate_if(is.numeric, ~1 * (. >= 10))
CodePudding user response:
Dplyr (use the latest version) has a nice "across()" function to be used with mutate. Just be sure to update your dplyr package as it is quite recent
library(dplyr)
df1 %>% mutate(across(where(is.numeric), function(x) ifelse(x >= 10, 0, x)))
ID string1 S_2018_p S_2019_p S_2020_p S_2021_p string2
1: a1 x2 3 3 0 4 si
2: a2 g3 5 5 4 0 q2
3: a3 n2 0 6 0 3 oq
4: a4 m3 3 0 9 8 mx
5: a5 2w 9 1 0 5 ix
6: a6 ps2 0 4 7 4 p2
7: a7 kg2 6 0 9 6 2q
CodePudding user response:
You can use the apply
and ifelse
functions on the specific columns of interest. For example:
apply(df1[,c(3,4,5,6)], MARGIN = c(1,2), FUN = function(x) ifelse(x >= 10, 0, x))
The apply
function will work on the selected rows of the data frame (df1[,c(3,4,5,6)]
) and will apply the function FUN = ifelse(x >= 10, 0, x)
(if x is greater or equal to ten, replace it with 0, else, replace it with itself (don't replace it)) on each cells of the dataframe (MARGIN = c(1,2)
).
You can of course then replace the dataframe part with the corrected part:
df1[,c(3,4,5,6)] <- apply(df1[,c(3,4,5,6)], MARGIN = c(1,2), FUN = function(x) ifelse(x >= 10, 0, x))
CodePudding user response:
Another solution, based on dplyr
:
library(tidyverse)
library(data.table)
df1 = data.table(
ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7"),
"string1" = c("x2", "g3", "n2", "m3", "2w", "ps2", "kg2"),
"S_2018_p" = c(3,5,11,3,9,22,6),
"S_2019_p" = c(3,5,6,21,1,4,0),
"S_2020_p" = c(0,4,13,9,16,7,9),
"S_2021_p" = c(4,0,3,8,5,4,6),
"string2" = c("si", "q2", "oq", "mx", "ix", "p2", "2q"))
df1 %>%
mutate(
across(
where(is.numeric),
~ if_else(get(cur_column()) > 10, 0, get(cur_column()))))
#> ID string1 S_2018_p S_2019_p S_2020_p S_2021_p string2
#> 1: a1 x2 3 3 0 4 si
#> 2: a2 g3 5 5 4 0 q2
#> 3: a3 n2 0 6 0 3 oq
#> 4: a4 m3 3 0 9 8 mx
#> 5: a5 2w 9 1 0 5 ix
#> 6: a6 ps2 0 4 7 4 p2
#> 7: a7 kg2 6 0 9 6 2q
And yet another solution, based on purrr::map_if
:
library(tidyverse)
library(data.table)
df1 = data.table(
ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7"),
"string1" = c("x2", "g3", "n2", "m3", "2w", "ps2", "kg2"),
"S_2018_p" = c(3,5,11,3,9,22,6),
"S_2019_p" = c(3,5,6,21,1,4,0),
"S_2020_p" = c(0,4,13,9,16,7,9),
"S_2021_p" = c(4,0,3,8,5,4,6),
"string2" = c("si", "q2", "oq", "mx", "ix", "p2", "2q"))
df1 %>% map_if(is.numeric, ~ ifelse(.x > 10 , 0, .x)) %>% as.data.table
#> ID string1 S_2018_p S_2019_p S_2020_p S_2021_p string2
#> 1: a1 x2 3 3 0 4 si
#> 2: a2 g3 5 5 4 0 q2
#> 3: a3 n2 0 6 0 3 oq
#> 4: a4 m3 3 0 9 8 mx
#> 5: a5 2w 9 1 0 5 ix
#> 6: a6 ps2 0 4 7 4 p2
#> 7: a7 kg2 6 0 9 6 2q