I want to run the below function which normalises a number to every element in my dataframe
norm_fn <- function(raw_score, min_score, max_score){
if(raw_score <= min_score){
norm_score <- 1
} else if (raw_score >= max_score){
norm_score <- 1
} else {
norm_score <- ((raw_score - min_score)/(max_score - min_score))
}
return(norm_score)
}
set.seed(123)
dat <- data.frame(ID = 1:10,
col1 = runif(10),
col2 = runif(10),
col3 = runif(10))
mn <- 0.01;mx <- 0.8
dat[, 2:4] <- apply(dat[, 2:4], MARGIN = 2, FUN = norm_fn, min_score = mn, max_score = mx)
I get the error warning messages as well as looks like the function didn't work for the col2
and col3
:
1: In if (raw_score <= min_score) { :
the condition has length > 1 and only the first element will be used
2: In if (raw_score >= max_score) { :
the condition has length > 1 and only the first element will be used
3: In if (raw_score <= min_score) { :
the condition has length > 1 and only the first element will be used
4: In if (raw_score >= max_score) { :
the condition has length > 1 and only the first element will be used
5: In if (raw_score <= min_score) { :
the condition has length > 1 and only the first element will be used
6: In if (raw_score >= max_score) { :
the condition has length > 1 and only the first element will be used
CodePudding user response:
We may Vectorize
the function as the function use if/else
which are not vectorized
dat[2:4] <- lapply(dat[2:4], Vectorize(norm_fn), min_score = mn, max_score = mx)
-output
> dat
ID col1 col2 col3
1 1 0.35136395 1.00000000 1.0000000
2 2 0.98519637 0.56118248 0.8643081
3 3 0.50503408 0.84502612 0.7981099
4 4 1.00000000 0.71219418 1.0000000
5 5 1.00000000 0.11762618 0.8173491
6 6 0.04500823 1.00000000 0.8842158
7 7 0.65582973 0.29884523 0.6760329
8 8 1.00000000 0.04058169 0.7394203
9 9 0.68536078 0.40243129 0.3533668
10 10 0.56533511 1.00000000 0.1735616
Or the same approach with across
library(dplyr)
dat <- dat %>%
mutate(across(-ID, Vectorize(norm_fn), min_score = mn, max_score = mx))
dat
ID col1 col2 col3
1 1 0.35136395 1.00000000 1.0000000
2 2 0.98519637 0.56118248 0.8643081
3 3 0.50503408 0.84502612 0.7981099
4 4 1.00000000 0.71219418 1.0000000
5 5 1.00000000 0.11762618 0.8173491
6 6 0.04500823 1.00000000 0.8842158
7 7 0.65582973 0.29884523 0.6760329
8 8 1.00000000 0.04058169 0.7394203
9 9 0.68536078 0.40243129 0.3533668
10 10 0.56533511 1.00000000 0.1735616
CodePudding user response:
A tidyverse
approach
library(tidyverse)
dat %>%
rowwise() %>%
mutate(
across(.cols = col1:col3, norm_fn, min_score = mn, max_score = mx)
) %>%
ungroup()
#> # A tibble: 10 × 4
#> ID col1 col2 col3
#> <int> <dbl> <dbl> <dbl>
#> 1 1 0.351 1 1
#> 2 2 0.985 0.561 0.864
#> 3 3 0.505 0.845 0.798
#> 4 4 1 0.712 1
#> 5 5 1 0.118 0.817
#> 6 6 0.0450 1 0.884
#> 7 7 0.656 0.299 0.676
#> 8 8 1 0.0406 0.739
#> 9 9 0.685 0.402 0.353
#> 10 10 0.565 1 0.174