I have the following data.frame:
char A B C
a 1 2 3
b 300 239 444
c 15 25 32
I need to normalize the data by transforming the row values into intervals between 0 and 1 ignoring the first column but keeping that in the results.
How could I do that?
CodePudding user response:
You can do this:
bind_cols(
df %>% select(char),
as.data.frame(t(apply(df[,-1],1,\(x) (x-min(x))/(max(x)-min(x))))))
)
Output:
char A B C
1 a 0.000000 0.5000000 1
2 b 0.297561 0.0000000 1
3 c 0.000000 0.5882353 1
CodePudding user response:
Updated to reflect row-wise rescale:
library(tidyverse)
library(scales)
#>
#> Attaching package: 'scales'
#> The following object is masked from 'package:purrr':
#>
#> discard
#> The following object is masked from 'package:readr':
#>
#> col_factor
tribble(
~char, ~A, ~B, ~C,
"a", 1, 2, 3,
"b", 300, 239, 444,
"c", 15, 25, 32
) |>
pivot_longer(-char) |>
pivot_wider(names_from = char, values_from = value) |>
mutate(across(-name, rescale))
#> # A tibble: 3 × 4
#> name a b c
#> <chr> <dbl> <dbl> <dbl>
#> 1 A 0 0.298 0
#> 2 B 0.5 0 0.588
#> 3 C 1 1 1
Created on 2022-04-30 by the reprex package (v2.0.1)
CodePudding user response:
You can use scales::rescale()
to rescale continuous vector to have specified minimum and maximum. The output range defaults to c(0, 1)
.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(A:C) %>%
group_by(char) %>%
mutate(value = scales::rescale(value)) %>%
ungroup() %>%
pivot_wider()
Version 2
df %>%
rowwise() %>%
mutate(x = list(scales::rescale(c_across(A:C))), .keep = "unused") %>%
unnest_wider(x, names_sep = "")
Output
# # A tibble: 3 × 4
# char A B C
# <chr> <dbl> <dbl> <dbl>
# 1 a 0 0.5 1
# 2 b 0.298 0 1
# 3 c 0 0.588 1
CodePudding user response:
Using pmin/pmax
mn <- do.call(pmin, df1[-1])
mx <- do.call(pmax, df1[-1])
df1[-1] <- (df1[-1] - mn)/(mx - mn)
df1
char A B C
1 a 0.000000 0.5000000 1
2 b 0.297561 0.0000000 1
3 c 0.000000 0.5882353 1
Or another option is rescale
with dapply
library(collapse)
library(scales)
df1[-1] <- dapply(df1[-1], MARGIN = 1, FUN = rescale)
-output
> df1
char A B C
1 a 0.000000 0.5000000 1
2 b 0.297561 0.0000000 1
3 c 0.000000 0.5882353 1
data
df1 <- structure(list(char = c("a", "b", "c"), A = c(0, 0.297560975609756,
0), B = c(0.5, 0, 0.588235294117647), C = c(1, 1, 1)),
row.names = c(NA,
-3L), class = "data.frame")