I've got this DF:
structure(list(Date = structure(c(18605, 18604, 18598, 18597,
18590, 18584, 18583, 18578, 18570, 18569, 18563, 18562, 18557,
18549, 18548, 18542, 18541, 18536, 18534, 18529, 18521, 18520,
18515, 18508, 18500, 18499, 18493, 18492, 18486, 18485, 18479,
18478, 18472, 18471, 18465, 18464, 18458, 18457, 18450, 18445,
18444, 18437, 18436, 18430, 18429, 18424, 18416, 18415, 18410,
18409, 18403, 18402, 18396, 18388, 18387, 18381, 18380, 18374,
18373, 18368, 18367, 18360, 18359, 18354, 18340, 18338, 18331,
18325, 18317, 18312, 18289, 18282, 18275, 18268), class = "Date"),
V1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.3, 0, 0, 0, 0, 0.4, 0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0.7, 0,
0, 0, 0, 0, 0.5, 0, 0, 0, 0, 0.3, 0, 0, 0, 0, 0, 0.4, 0,
0, 0, 0.3, 0, 0, 0, 0, 0, 0, 0, 0, 0.6, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), V2 = c(0, 0, 0.1, 0, 0, 0.1, 0, 0.2, 0, 0.2,
0.1, 0, 0.2, 0.2, 0, 0.1, 0, 0, 0.1, 0, 0.2, 0, 0, 0.4, 0.2,
0, 0.3, 0, 0.2, 0, 0.3, 0, 0.6, 0, 0.4, 0, 0, 0.2, 0, 0.4,
0.6, 0, 0.3, 0, 0.2, 0.7, 0, 0.1, 0.3, 0, 0.2, 0, 0, 0, 0.3,
0, 0.1, 0.3, 0, 0, 0.3, 0.2, 0, 0, 0, 0, 0.6, 0, 0.4, 0,
0.2, 0, 0, 0.2), V3 = c(0, 0.3, 0, 0.3, 0.4, 0, 0.2, 0, 0.3,
0, 0, 0.2, 0, 0, 0.2, 0, 0.2, 0, 0, 0.1, 0, 0.2, 0, 0, 0,
0.3, 0, 0, 0, 0.4, 0, 0.3, 0, 0.7, 0, 0.2, 0.5, 0.4, 0, 0.4,
0, 0.8, 0.4, 0, 0.2, 0.6, 0.3, 0.2, 0, 0, 0, 0.4, 0.4, 0,
0.2, 0.3, 0, 0.2, 0.3, 0.4, 0, 0.7, 0, 0, 1.4, 0, 0, 1.4,
0, 1, 0, 0, 0.3, 0), V4 = c(0, 0.4, 0, 0.1, 0.1, 0, 0.1,
0, 0, 0.1, 0, 0.1, 0.2, 0, 0.2, 0, 0.2, 0.3, 0, 0, 0, 0.2,
0.3, 0.3, 0, 0, 0, 0.5, 0, 0.6, 0, 0.7, 0, 0, 0, 1.2, 1,
0, 0.3, 0, 1.1, 0, 0, 0.4, 0, 0, 0, 0, 0.2, 0.2, 0, 0, 0.2,
0, 0, 0.1, 0, 0, 0, 0.2, 0.3, 0, 0.2, 0.3, 0, 1.8, 0, 0,
0, 0, 0, 0.2, 0, 0)), row.names = c(NA, -74L), class = c("tbl_df",
"tbl", "data.frame"))
I'd like to mutate column V1, V2, V3 and V4 to instead of showing the current values posted here, I'd instead like to show their differences from the mean average in their respective columns. So mean of V4 = 0.1635135, so the fourth value should be = 0.4-0.1635135 = 0.2364865.
I've tried doing it piecemeal (doing each column individually), by doing the following, but I keep getting computing errors:
df <- df %>% mutate(across(2, x - mean())
Does anyone have any advice on how I can finish this? Any help greatly appreciated
CodePudding user response:
Solution 1: Use a purrr
-style function in across()
df %>%
mutate(across(V1:V4, ~ .x - mean(.x)))
# # A tibble: 74 × 5
# Date V1 V2 V3 V4
# <date> <dbl> <dbl> <dbl> <dbl>
# 1 2020-12-09 -0.05 -0.128 -0.204 -0.164
# 2 2020-12-08 -0.05 -0.128 0.0959 0.236
# 3 2020-12-02 -0.05 -0.0284 -0.204 -0.164
# 4 2020-12-01 -0.05 -0.128 0.0959 -0.0635
# 5 2020-11-24 -0.05 -0.128 0.196 -0.0635
# ...
Solution 2: Select variables with across()
and pass it to scale(x, scale = FALSE)
df %>%
mutate(as_tibble(scale(across(V1:V4), scale = FALSE)))
# # A tibble: 74 × 5
# Date V1 V2 V3 V4
# <date> <dbl> <dbl> <dbl> <dbl>
# 1 2020-12-09 -0.05 -0.128 -0.204 -0.164
# 2 2020-12-08 -0.05 -0.128 0.0959 0.236
# 3 2020-12-02 -0.05 -0.0284 -0.204 -0.164
# 4 2020-12-01 -0.05 -0.128 0.0959 -0.0635
# 5 2020-11-24 -0.05 -0.128 0.196 -0.0635
# ...
CodePudding user response:
Please try the below code
dat2 <- dat %>% mutate(across(starts_with('V'), ~ .x-mean(.x)))