Home > OS >  apply function to each element in dataframe in R
apply function to each element in dataframe in R

Time:07-14

I want to run the below function which normalises a number to every element in my dataframe

norm_fn <- function(raw_score, min_score, max_score){
      if(raw_score <= min_score){
        norm_score <- 1
      } else if (raw_score >= max_score){
        norm_score <- 1
      } else {
        norm_score <- ((raw_score - min_score)/(max_score - min_score))     
      }
      return(norm_score)
}

set.seed(123)
dat <- data.frame(ID = 1:10,
                  col1 = runif(10),
                  col2 = runif(10),
                  col3 = runif(10))

mn <- 0.01;mx <- 0.8
dat[, 2:4] <- apply(dat[, 2:4], MARGIN = 2, FUN = norm_fn, min_score = mn, max_score = mx)
  

I get the error warning messages as well as looks like the function didn't work for the col2 and col3:

1: In if (raw_score <= min_score) { :
   the condition has length > 1 and only the first element will be used
2: In if (raw_score >= max_score) { :
   the condition has length > 1 and only the first element will be used
3: In if (raw_score <= min_score) { :
   the condition has length > 1 and only the first element will be used
4: In if (raw_score >= max_score) { :
   the condition has length > 1 and only the first element will be used
5: In if (raw_score <= min_score) { :
   the condition has length > 1 and only the first element will be used
6: In if (raw_score >= max_score) { :
   the condition has length > 1 and only the first element will be used

CodePudding user response:

We may Vectorize the function as the function use if/else which are not vectorized

dat[2:4] <- lapply(dat[2:4], Vectorize(norm_fn), min_score = mn, max_score = mx)

-output

> dat
   ID       col1       col2      col3
1   1 0.35136395 1.00000000 1.0000000
2   2 0.98519637 0.56118248 0.8643081
3   3 0.50503408 0.84502612 0.7981099
4   4 1.00000000 0.71219418 1.0000000
5   5 1.00000000 0.11762618 0.8173491
6   6 0.04500823 1.00000000 0.8842158
7   7 0.65582973 0.29884523 0.6760329
8   8 1.00000000 0.04058169 0.7394203
9   9 0.68536078 0.40243129 0.3533668
10 10 0.56533511 1.00000000 0.1735616

Or the same approach with across

library(dplyr)
dat <- dat %>% 
 mutate(across(-ID, Vectorize(norm_fn),  min_score = mn, max_score = mx))
dat
   ID       col1       col2      col3
1   1 0.35136395 1.00000000 1.0000000
2   2 0.98519637 0.56118248 0.8643081
3   3 0.50503408 0.84502612 0.7981099
4   4 1.00000000 0.71219418 1.0000000
5   5 1.00000000 0.11762618 0.8173491
6   6 0.04500823 1.00000000 0.8842158
7   7 0.65582973 0.29884523 0.6760329
8   8 1.00000000 0.04058169 0.7394203
9   9 0.68536078 0.40243129 0.3533668
10 10 0.56533511 1.00000000 0.1735616

CodePudding user response:

A tidyverse approach

library(tidyverse)

dat %>% 
  rowwise() %>% 
  mutate(
    across(.cols = col1:col3, norm_fn, min_score = mn, max_score = mx)
  ) %>% 
  ungroup()

#> # A tibble: 10 × 4
#>       ID   col1   col2  col3
#>    <int>  <dbl>  <dbl> <dbl>
#>  1     1 0.351  1      1    
#>  2     2 0.985  0.561  0.864
#>  3     3 0.505  0.845  0.798
#>  4     4 1      0.712  1    
#>  5     5 1      0.118  0.817
#>  6     6 0.0450 1      0.884
#>  7     7 0.656  0.299  0.676
#>  8     8 1      0.0406 0.739
#>  9     9 0.685  0.402  0.353
#> 10    10 0.565  1      0.174
  • Related