Home > OS >  Scale row values ignoring a column
Scale row values ignoring a column

Time:05-01

I have the following data.frame:

char   A     B     C
a      1     2     3
b     300  239   444
c      15   25    32

I need to normalize the data by transforming the row values into intervals between 0 and 1 ignoring the first column but keeping that in the results.

How could I do that?

CodePudding user response:

You can do this:

bind_cols(
  df %>% select(char),
  as.data.frame(t(apply(df[,-1],1,\(x) (x-min(x))/(max(x)-min(x))))))
)

Output:

  char        A         B C
1    a 0.000000 0.5000000 1
2    b 0.297561 0.0000000 1
3    c 0.000000 0.5882353 1

CodePudding user response:

Updated to reflect row-wise rescale:

library(tidyverse)
library(scales)
#> 
#> Attaching package: 'scales'
#> The following object is masked from 'package:purrr':
#> 
#>     discard
#> The following object is masked from 'package:readr':
#> 
#>     col_factor

tribble(
  ~char, ~A, ~B, ~C,
  "a", 1, 2, 3,
  "b", 300, 239, 444,
  "c", 15, 25, 32
) |> 
  pivot_longer(-char) |> 
  pivot_wider(names_from = char, values_from = value) |> 
  mutate(across(-name, rescale))
#> # A tibble: 3 × 4
#>   name      a     b     c
#>   <chr> <dbl> <dbl> <dbl>
#> 1 A       0   0.298 0    
#> 2 B       0.5 0     0.588
#> 3 C       1   1     1

Created on 2022-04-30 by the reprex package (v2.0.1)

CodePudding user response:

You can use scales::rescale() to rescale continuous vector to have specified minimum and maximum. The output range defaults to c(0, 1).

library(dplyr)
library(tidyr)

df %>%
  pivot_longer(A:C) %>%
  group_by(char) %>%
  mutate(value = scales::rescale(value)) %>%
  ungroup() %>%
  pivot_wider()

Version 2

df %>%
  rowwise() %>%
  mutate(x = list(scales::rescale(c_across(A:C))), .keep = "unused") %>%
  unnest_wider(x, names_sep = "")

Output

# # A tibble: 3 × 4
#   char      A     B     C
#   <chr> <dbl> <dbl> <dbl>
# 1 a     0     0.5       1
# 2 b     0.298 0         1
# 3 c     0     0.588     1

CodePudding user response:

Using pmin/pmax

mn <- do.call(pmin, df1[-1])
mx <- do.call(pmax, df1[-1])
df1[-1] <- (df1[-1] - mn)/(mx - mn)
df1
  char        A         B C
1    a 0.000000 0.5000000 1
2    b 0.297561 0.0000000 1
3    c 0.000000 0.5882353 1

Or another option is rescale with dapply

library(collapse)
library(scales)
df1[-1] <- dapply(df1[-1], MARGIN = 1, FUN = rescale)

-output

> df1
  char        A         B C
1    a 0.000000 0.5000000 1
2    b 0.297561 0.0000000 1
3    c 0.000000 0.5882353 1

data

df1 <- structure(list(char = c("a", "b", "c"), A = c(0, 0.297560975609756, 
0), B = c(0.5, 0, 0.588235294117647), C = c(1, 1, 1)), 
row.names = c(NA, 
-3L), class = "data.frame")
  • Related