I have this data frame:
dat=structure(list(date = as.Date(c("1983-01-01", "1984-01-01",
"1984-02-01",
"1985-01-01", "1985-02-01", "1986-01-01")),
rig = c(68.1, 62.4,
67.5, 78.9, 81.7, 72.2), pass = c(9.57, 10.49, 11.97,
11.43, 9.54,
8.98)),
row.names = c(NA, 6L), class = "data.frame")
I would like that for each month (i) in year, compute the mean (m) data value for that month across all years and the standard deviation (std) of the data value for that month across all years and then replace values with (i-m)/std
we can do this to compute the mean and sd
> dat%>% mutate(month =month(as.Date(date))) %>% group_by(month)
%>%summarise(across(where(is.numeric), mean, na.rm = TRUE))%>% ungroup
# A tibble: 2 × 3
month rig pass
<dbl> <dbl> <dbl>
1 1 70.4 10.1
2 2 74.6 10.8
> dat%>% mutate(month =month(as.Date(date))) %>% group_by(month)
%>%summarise(across(where(is.numeric), sd, na.rm = TRUE))%>% ungroup
# A tibble: 2 × 3
month rig pass
<dbl> <dbl> <dbl>
1 1 6.95 1.07
2 2 10.0 1.72
How can i subtract and divide these values to each corresponding column:expected results, example of pass:
(9.57-10.1175)/1.073169 = -0.5101713
date rig pass
1 1983-01-01 -0.3310809 -0.5101713
2 1984-01-01 -1.151586 0.3471028
3 1984-02-01 -0.7071068 0.707107
4 1985-01-01 1.22356 1.223013
5 1985-02-01 0.7071068 -0.707107
6 1986-01-01 0.2591068 -1.059945
CodePudding user response:
Could you just use scale()
:
dat=structure(list(date = as.Date(c("1983-01-01", "1984-01-01",
"1984-02-01",
"1985-01-01", "1985-02-01", "1986-01-01")),
rig = c(68.1, 62.4,
67.5, 78.9, 81.7, 72.2), pass = c(9.57, 10.49, 11.97,
11.43, 9.54,
8.98)),
row.names = c(NA, 6L), class = "data.frame")
library(dplyr)
library(lubridate)
dat%>%
mutate(month =month(as.Date(date))) %>%
group_by(month) %>%
mutate(across(where(is.numeric), ~c(scale(.x))))
#> # A tibble: 6 × 4
#> # Groups: month [2]
#> date rig pass month
#> <date> <dbl> <dbl> <dbl>
#> 1 1983-01-01 -0.331 -0.510 1
#> 2 1984-01-01 -1.15 0.347 1
#> 3 1984-02-01 -0.707 0.707 2
#> 4 1985-01-01 1.22 1.22 1
#> 5 1985-02-01 0.707 -0.707 2
#> 6 1986-01-01 0.259 -1.06 1
Or, if you wanted to be more explicit about the transformation:
dat%>%
mutate(month =month(as.Date(date))) %>%
group_by(month) %>%
mutate(across(where(is.numeric), ~(.x - mean(.x))/sd(.x)))
#> # A tibble: 6 × 4
#> # Groups: month [2]
#> date rig pass month
#> <date> <dbl> <dbl> <dbl>
#> 1 1983-01-01 -0.331 -0.510 1
#> 2 1984-01-01 -1.15 0.347 1
#> 3 1984-02-01 -0.707 0.707 2
#> 4 1985-01-01 1.22 1.22 1
#> 5 1985-02-01 0.707 -0.707 2
#> 6 1986-01-01 0.259 -1.06 1
Created on 2023-01-17 by the reprex package (v2.0.1)