How can I do the following:
- replace all values < 6 with NA,
- if there is only one NA in the row, replace the first instance of the minimum value with -99?
Some data that includes an ID variable and a total column:
library(tidyverse)
df <- data.frame(id = c(1,2,3,4,5), a = c(10,12,4,17,3), b = c(9,12,3,20,6), c = c(2,2,10,10,10), d = c(12,16,12,10,12))
df$total <- apply(df[,c(2:5)], 1, sum)
Giving
id a b c d total
1 10 9 2 12 33
2 12 12 2 16 42
3 4 3 10 12 29
4 17 20 10 10 57
5 3 6 10 12 31
My desired output is
id a b c d total
1 10 -99 NA 12 33
2 -99 12 NA 16 42
3 NA NA 10 12 29
4 17 20 10 10 57
5 NA -99 10 12 31
My attempt
df_mod <- df %>%
# Make <6 NA
mutate(
across(
.cols = 'a':'total',
~case_when(
.x < 6 ~ as.numeric(NA),
TRUE ~ .x
)
)
) %>%
# Add a count of NAs
rowwise() %>%
mutate(Count_NA = sum(is.na(cur_data()))) %>%
ungroup()
# Transpose and get row minimum
df_mod2 <- t(df_mod[,-c(1,ncol(df_mod))]) %>%
apply(., 2, function(a){
min <- min(a, na.rm = TRUE)
}
) %>%
cbind(df_mod, .) %>%
rename(., min = .) %>%
tibble(.)
# If count_NA = 1 replace the first instance of min
df_mod2 %>%
rowwise() %>%
mutate(
across(
.cols = 'a':'total',
~case_when(
Count_NA == 1 & .x == min ~ replace(.x, first(match(min, .x)), -99),
TRUE ~ .x)
)
) %>%
select(-'Count_NA', -'min')
Which gives the following
id a b c d total
1 10 -99 NA 12 33
2 -99 -99 NA 16 42
3 NA NA 10 12 29
4 17 20 10 10 57
5 NA -99 10 12 31
Thanks
CodePudding user response:
If you're willing to pivot rather than work rowwise, then this solution will work.
library(dplyr)
df %>%
pivot_longer(names_to = 'col',
values_to = 'val',
-c(id, total)) %>%
group_by(id) %>%
mutate(val2 = rank(val, ties.method = 'first'),
val = ifelse(val < 6, NA , val),
val = ifelse(sum(is.na(val)) == 1 & val2 == 2, -99, val)) %>%
select(-val2) %>%
pivot_wider(names_from = col,
values_from = val) %>%
relocate(total, .after = "d")
Here's the result:
# A tibble: 5 × 6
# Groups: id [5]
id a b c d total
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 10 -99 NA 12 33
2 2 -99 12 NA 16 42
3 3 NA NA 10 12 29
4 4 17 20 10 10 57
5 5 NA -99 10 12 31
CodePudding user response:
It is not clear what you mean by '2nd' minimum value because you replace minimum value. You can use data.table
:
library(data.table)
setDT(df)[
,
(cols) := transpose(
lapply(
transpose(lapply(.SD, function(x) fifelse(x < 6, NA_real_, x))),
function(x) if(sum(is.na(x)) == 1) replace(x, which.min(x), -99) else x
)
),
.SDcols = setdiff(names(df), "id")
]