With the database pasted as example you can see how the following pipe to scale()
everything but the group of intervention (categorical variable), the column names change the name to: variable_name[,1]
, when I just want variable_name
. It seems that this behaviour is using the view()
function with the transformed df after scaling data. Whichever the reason is to specify [,1], I would prefer the notation to be removed
The code I've been using is:
library(dplyr)
#1st
df <- df %>%
dplyr::mutate(across(.cols = c("ppara":"lrp1"), .fns = scale)) # where c("ppara":"lrp1") is the range of quantitative continouos columns to apply scale
# 2nd
df %>% dplyr::mutate(across(.cols = c(3:52), .fns = scale, .names = "{.col}")))
#Entering the name of the df, seems to work and keep the names, but when I go to the view function
> df
id grup_int ppara ppard pparg nr1h3
1 50109018 LFD 2.88219852 0.21418973 1.29818239 0.40677902
2 50109019 LFD -0.09090266 -0.37583504 -0.17129420 0.24434046
3 50109025 LFD -0.51922065 -0.15165027 0.05554733 -0.42018092
4 50109026 NUTS NA -0.12085566 0.01702707 -0.01326412
5 50109027 LFD NA -0.14672313 -0.42809594 -0.82873851
6 50118001 OO NA 0.68103605 0.25670869 0.49210029
But when I do view(df) as in Image 1 look at the dataframe
The df:
df <- structure(list(id = c(50109018, 50109019, 50109025, 50109026,
50109027, 50118001, 50202099, 50203004, 50203006, 50203008, 50203009,
50203010, 50203011, 50203012, 50203013, 50203014, 50203015, 50203016,
50203017, 50203019, 50203020, 50203022, 50203026, 50203027, 50203029,
50203030, 50203031, 50203032, 50430001, 50508026, 50508027, 50521001,
50521002, 50527001, 50601001, 50705001, 60901020, 60901021, 60901023,
60901024, 60901026, 60901027, 60901028, 60901029, 60901030, 60901031,
60901033, 60901034, 60901035, 60901036, 60901037, 60901038, 70107034,
70111021, 70111022, 70111023, 70111024, 70201047, 70204055, 70204056,
70211014, 70710002, 70713001, 70713002, 70802011, 70802012, 70802013,
70802015, 71801001, 71801002, 71801003, 110104017, 110104019,
110104023, 110104024, 110104027, 110104028, 110104029, 110104030,
110110005, 110113001, 110113003, 110113005, 110113006, 110113007,
110113008, 110606056, 110606061, 111201006, 111201007, 111201014,
111201017, 111201019, 111201026, 111202007, 111202009, 111202015,
120715011, 120715012, 120715019, 120715020, 120715021, 120715022,
120715025, 120715026, 120715027, 120715029, 120715030, 120715032,
120715033, 120715034, 120715035, 120715037, 130102008, 130102009,
130102010, 130102012, 130102013, 130102014, 130104004, 130105044,
130105045, 130106034, 130106037, 130106038, 130108008, 130108009,
140101088, 140101091, 140101096, 140101097, 140101099, 140102087,
140102088, 140102089, 140102090, 140102092, 140102095, 140103019,
140103020, 140103023, 140103024, 140103026, 140103027, 140103028,
140103029, 140103030, 140103033, 140103035, 140103036, 140103038
), grup_int = structure(c(3L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 3L,
3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 1L,
3L, 3L, 2L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 1L, 2L, 3L,
3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 3L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 2L, 1L,
3L, 2L, 2L, 1L, 2L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 2L,
2L, 3L, 2L, 2L, 1L, 1L, 2L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 2L,
3L, 3L, 1L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 3L), levels = c("OO",
"NUTS", "LFD"), label = "GENERAL: Grupo de intervención", class = "factor"),
ppara = c(3.761, 1.276, 0.918, NA, NA, NA, 1.437, NA, NA,
1.108, NA, 1.962, 1.816, 1.26, 0.498, 1.329, 1.447, NA, 1.848,
0.71, 0.909, NA, NA, 1.165, 1.079, 0.556, 1.197, NA, NA,
0.917, 0.37, 1.591, NA, 1.019, NA, 1.232, NA, NA, NA, 0.597,
NA, 1.136, 0.602, 0.724, 0.799, 0.827, 0.701, NA, NA, 1.973,
1.471, NA, NA, NA, NA, 0.845, 0.734, 0.987, NA, 0.389, 0.289,
1.86, 2.453, 0.895, 3.927, 1.893, 0.684, 0.782, 1.056, NA,
1.378, NA, 3.017, 4.415, NA, NA, 1.329, 1.086, 0.586, NA,
NA, NA, 1.093, 0.422, 1.278, 1.025, NA, 1.464, 0.872, 1.109,
1.342, NA, NA, 0.845, NA, 0.501, 0.765, NA, 2.709, 2.055,
2.078, NA, 1.664, 0.636, 1.358, 1.435, NA, 0.84, 1.348, 2.237,
2.627, 3.445, NA, NA, 0.736, 0.63, NA, NA, 0.599, 1.137,
2.161, NA, 0.731, 1.333, NA, NA, NA, 1.169, NA, 1.541, 1.046,
0.693, NA, 0.565, 1.172, NA, 0.801, 0.907, 3.903, 2.544,
NA, 1.658, 0.917, 1.667, NA, 2.54, NA, NA, NA, NA, NA), ppard = c(1.575,
1.096, 1.278, 1.303, 1.282, 1.954, 1.704, 1.518, 1.637, 0.951,
1.122, 1.025, 1.86, 0.904, 0.53, 1.37, 1.704, 0.751, 1.443,
0.78, 0.678, 1.856, 1.449, 1.08, 1.038, 0.727, 1.33, 0.96,
1.299, 0.851, 0.777, 1.465, NA, 1.121, NA, 1.304, 0.715,
1.549, 1.211, 0.764, 1.61, 1.373, 1.055, 1.054, 0.859, 0.987,
0.893, 1.187, 0.924, 0.989, 1.252, NA, 3.55, 1.074, 0.597,
0.811, 1.439, 1.006, 1.169, 0.699, 0.718, 4.058, 1.931, 1.045,
4.25, 1.931, 0.613, 1.159, 1.123, NA, 1.758, 1.517, 2.089,
5.654, 2.191, 3.724, 1.693, 0.964, 4.672, 1.276, 1.108, 1.363,
1.221, 0.344, 1.363, 1.397, NA, 1.813, 1.203, 1.093, 1.307,
0.623, NA, 1.244, NA, 0.822, 0.997, 1.595, 1.73, 2.223, 1.841,
1.58, 2.088, 1.032, 1.399, 1.788, 1.323, 1.157, 2.091, 1.868,
1.893, 1.98, NA, 0.429, 0.779, 0.714, 0.928, 0.971, 1.201,
0.539, 1.222, 1.186, 0.522, 1.037, 0.494, 0.671, 1.52, 0.946,
1.019, 1.54, 1.188, 1.133, 1.424, 0.894, 0.943, 4.713, 1.041,
1.476, 1.873, 1.515, 1.31, 1.118, 1.562, 1.464, NA, 1.069,
NA, 1.442, 1.798, NA, 2.134), pparg = c(2.201, 1.171, 1.33,
1.303, 0.991, 1.471, 1.795, 1.221, 2.188, 1.158, 1.532, 0.85,
2.393, 0.791, 0.773, 2.011, 1.445, 1.066, 0.392, 0.392, 1.361,
1.025, 1.471, 1.063, 0.89, 0.797, 0.981, 0.5, NA, 0.5, 0.682,
2.783, NA, 1.322, NA, 1.044, 0.893, 2.676, 0.714, 0.7, 1.662,
1.259, 1.135, 0.838, 1.075, 1.45, 1.311, 1.044, 1.223, 1.329,
0.932, NA, 3.831, 1.571, 0.879, 0.6, 0.82, 0.862, 1.545,
0.451, 0.663, 0.818, 1.539, 0.842, 3.584, 1.501, 0.453, 0.738,
1.091, NA, 1.12, 1.182, 2.528, 4.489, 2.161, NA, 1.006, 0.702,
3.117, 1.302, 0.857, 1.069, 0.96, 0.354, 0.902, 0.638, NA,
1.51, 1.011, 0.421, 1.445, 0.634, NA, 1.197, NA, 1.122, 1.168,
1.388, 0.789, 1.51, 1.334, 0.813, 1.718, 1.413, 1.242, 0.903,
1.134, 1.366, 1.451, 1.635, 3.1, 2.107, NA, 1.033, 0.719,
0.961, 1.203, 1.19, 1.094, 0.846, 1.724, 1.746, 0.169, 0.651,
0.4, 0.761, 1.662, 1.324, 0.645, 2.503, 0.555, 1.298, 1.852,
2.334, 1.608, 2.606, 1.2, 1.631, 1.262, 1.439, 0.689, 0.856,
0.93, 1.167, NA, 0.848, NA, 0.909, 2.218, NA, 2.414), nr1h3 = c(1.55,
1.451, 1.046, 1.294, 0.797, 1.602, 1.839, 1.406, 0.87, 1.436,
1.318, 1.028, 1.462, 1.919, 0.786, 1.597, 0.932, 0.936, 0.802,
1.184, 0.956, 1.712, 0.932, 1.023, 1.001, 1.002, 1.682, 0.735,
1.267, 1.052, 0.824, 1.334, 0.026, 1.345, NA, 1.468, 1.317,
1.581, 1.809, 0.845, 1.347, 1.769, 1.206, 1.584, 1.19, 1.016,
1.138, 1.066, 1.097, 1.137, 2.099, NA, NA, 0.999, 0.727,
0.689, 0.678, 0.825, 1.47, 0.612, 0.798, 1.093, 2.946, 0.999,
NA, 1.849, NA, 1.123, 1.617, NA, 1.446, 1.449, 1.716, NA,
3.037, 2.588, 1.344, 2.2, 4.011, NA, NA, NA, 1.176, 0.268,
1.26, 1.366, NA, 1.914, NA, NA, 1.041, 0.536, NA, 2.36, NA,
0.881, 1.357, 1.44, 1.713, 0.967, 1.728, 1.145, 1.854, 0.881,
1.77, 1.597, 1.181, 0.924, 1.38, 1.754, 2.612, 1.751, NA,
0.512, NA, 0.97, 1.604, 0.761, 0.738, 0.711, 1.132, 1.285,
0.451, 0.497, 0.538, 0.779, 0.941, 0.881, 0.763, 1.349, 1.693,
0.91, 1.128, 0.734, 0.833, 3.879, 0.943, 1.457, 1.879, 1.021,
1.016, 1.316, 1.012, 1.259, NA, 1.031, NA, 1.176, 1.588,
NA, 2.899), nr1h2 = c(0.861, 1.744, 0.824, 0.665, 0.913,
1.355, 1.927, 1.568, 1.18, 0.699, 0.952, 1.397, 2.622, 1.389,
0.874, 0.835, 1.304, 0.733, 0.953, 0.803, 0.921, 1.867, 1.56,
0.89, 0.907, 0.518, 1.162, 1.178, 2.069, 0.929, 0.994, 1.16,
0.044, 1.167, NA, 1.383, 1.089, 1.415, 1.571, 0.777, 1.754,
2.031, 0.885, 1.114, 1.388, 1.485, 0.941, 1.146, 0.831, 1.005,
0.957, NA, 3.479, 0.736, 0.613, 0.871, 0.984, 0.851, 1.376,
0.505, 0.632, 1.397, 2.122, 0.927, 8.337, 1.616, 1.063, 1.407,
1.04, NA, 1.993, 1.859, 1.679, 4.436, 1.825, 6.177, 1.51,
1.161, 6.476, 1.734, 0.901, 0.82, 1.325, 0.376, 0.89, 1.25,
NA, 1.447, 1.019, 1.333, 0.809, 0.545, NA, 1.036, NA, 0.887,
0.944, 1.192, 1.848, 1.974, 1.281, 1.992, 1.909, 0.774, 1.436,
1.656, 1.508, 1.796, 1.754, 2.372, 1.617, 1.851, NA, 0.575,
0.624, 0.683, 0.824, 0.87, 1.099, 0.621, 1.278, 1.486, 0.858,
0.715, 0.481, 0.636, 0.872, 0.953, 0.67, 1.356, 1.601, 0.844,
1.303, 0.486, 0.808, 5.391, 0.994, 1.037, 1.677, 1.6, 0.718,
0.846, 2.001, 1.246, NA, 1.159, NA, 1.485, 1.574, NA, 2.04
), rxra = c(1.402, 1.28, 1.076, 1.105, 1.4, 1.327, 1.722,
1.365, 1.151, 0.785, 1.047, 1.458, 1.578, 0.952, 0.982, 1.182,
1.156, 1.045, 0.835, 0.76, 0.772, 1.739, 1.392, 1.907, 0.95,
0.938, 1.166, 1.047, NA, 1.738, 1.378, 1.382, NA, 1.296,
NA, 0.89, 1.009, 1.267, 1.101, 1.29, 0.94, 1.41, 0.731, 1.464,
1.214, 0.964, 0.857, 0.876, 0.663, 0.83, 0.861, NA, 3.695,
0.735, 0.59, 0.675, 1.472, 1.343, 1.282, 0.73, 0.821, 2.176,
1.872, 0.954, 8.058, 1.496, 1.141, 1.615, 1.134, NA, 1.435,
1.847, 1.888, 5.043, 2.009, 2.9, 1.077, 1.204, 3.304, 1.454,
0.779, 1.442, 1.442, 0.444, 0.639, 1.312, NA, 1.267, 1.349,
1.472, 0.909, 0.973, NA, 1.519, NA, 0.885, 0.72, 1.428, 1.543,
1.539, 1.9, 2.659, 3.304, 1.217, 1.241, 1.875, 1.541, 1.449,
2.359, 2.131, 1.58, 2.217, NA, 0.689, 0.927, 1.052, 1.1,
0.75, 0.694, 0.463, 1.109, 1.671, 0.929, 0.768, 0.641, 1.082,
1.198, 1.434, 1.109, 1.303, 1.135, 0.986, 1.549, 1.061, 1.197,
4.063, 1.309, 1.345, 1.357, 1.996, 0.66, 1.188, 1.314, 1.458,
NA, 1.333, NA, 1.497, 1.772, NA, 1.609), rxrb = c(0.987,
0.938, 1.882, 1.506, 2.067, 1.526, 1.223, 1.192, 1.054, 0.854,
1.192, 2.21, 1.78, 0.895, 0.255, 0.78, 0.633, 0.924, 0.868,
0.702, 0.721, 1.814, 1.572, 1.714, 1.337, 0.884, 1.347, 1.239,
1.737, 2.171, 1.062, 1.48, 0.024, 0.645, NA, 1.207, 1.153,
0.922, 1.574, 0.887, 0.636, 1.64, 0.777, 1.241, 1.184, 0.813,
0.372, 1.036, 1.146, 0.842, 1.166, NA, 4.097, 1.564, 0.689,
1.183, 1.281, 0.761, 1.563, 0.818, 0.873, 2.011, 1.277, 1.357,
4.742, 1.925, 1.128, 1.26, 1.239, NA, 1.461, 1.723, 1.514,
6.87, 1.437, 0.09, 1.99, 1.385, 0.069, 1.341, 2.724, 1.618,
1.252, 0.449, 2.379, 1.635, NA, 1.555, 1.71, 0.952, 1.409,
4.682, NA, 1.596, NA, 1.265, 0.97, 1.461, 2.087, 1.55, 1.762,
1.461, 1.574, 2.781, 1.1, 1.741, 1.257, 1.468, 2.02, 1.91,
2.047, 1.462, NA, 0.6, 0.75, 1.177, 1.023, 0.648, 1.029,
0.802, 0.622, 0.906, 0.632, 1.259, 1.054, 1.213, 1.135, 1.253,
1.481, 1.517, 1.918, 2.099, 1.541, 1.003, 0.733, 0.122, 1.748,
1.264, 1.871, 2.006, 1.552, 1.448, 1.116, 1.379, NA, 1.387,
NA, 1.436, 2.086, NA, 1.173), cyp27a1 = c(1.058, 1.079, 1.152,
0.743, 1.803, 1.157, 2.25, 1.193, 1.448, 0.649, 1.718, 0.972,
1.393, 0.908, 1.3, 1.389, 1.16, 0.666, 0.799, 0.665, 0.991,
1.535, 1.257, 1, 1.8, 0.882, 1.087, 1.298, NA, 1.37, 0.765,
1.807, NA, 1.104, NA, 0.967, 1.009, 1.03, 1.139, 0.666, 1.462,
1.532, 0.767, 1.24, 0.997, 1.005, 0.736, 1.122, 0.868, 0.878,
1.129, NA, NA, 1.011, 0.425, 0.615, 0.919, 0.848, 1.178,
0.66, 0.714, 6.647, 2.254, 0.804, NA, 1.831, 1.008, 1.421,
1.215, NA, 1.92, 1.688, 1.734, NA, 1.571, 3.858, 1.146, 1.128,
4.592, 1.865, 1.419, 1.056, 1.201, 0.324, 1.159, 1.099, NA,
1.693, 1.503, 1.175, 1.596, 0.836, NA, 1.532, NA, 1.097,
0.913, 1.837, 1.334, 2.072, 2.149, 2.595, 2.654, 1.237, 1.28,
2.118, 1.153, 1.616, 2.185, 2.29, 3.089, 1.429, NA, 0.981,
0.546, 0.997, 0.807, 0.666, 1.393, 0.574, 0.665, 2.131, 0.348,
1.269, 0.271, 0.674, 0.81, 0.908, 0.877, 1.144, 1.469, 1,
1.309, 0.841, 0.785, 5.332, 1.502, 1.248, 1.705, 2.165, 1.046,
1.227, 1.444, 1.114, NA, 1.222, NA, 1.49, 1.368, NA, 3.854
)), row.names = c(NA, -151L), class = "data.frame")
CodePudding user response:
The issue is that scale
returns a matrix
not a vector, i.e. when applied on a vector the result is a one-column matrix and that's what the [,1]
is telling you.
This could be seen when calling str()
:
Note: I simplified the example data to the first four columns.
library(dplyr)
df <- df |> select(1:4)
df %>%
dplyr::mutate(across(.cols = c(ppara:ppard), .fns = scale)) |>
str()
#> 'data.frame': 151 obs. of 4 variables:
#> $ id : num 50109018 50109019 50109025 50109026 50109027 ...
#> $ grup_int: Factor w/ 3 levels "OO","NUTS","LFD": 3 3 3 2 3 1 1 3 3 3 ...
#> ..- attr(*, "label")= chr "GENERAL: Grupo de intervención"
#> $ ppara : num [1:151, 1] 2.8822 -0.0909 -0.5192 NA NA ...
#> ..- attr(*, "scaled:center")= num 1.35
#> ..- attr(*, "scaled:scale")= num 0.836
#> $ ppard : num [1:151, 1] 0.214 -0.376 -0.152 -0.121 -0.147 ...
#> ..- attr(*, "scaled:center")= num 1.4
#> ..- attr(*, "scaled:scale")= num 0.812
From the output we see that the scaled columns are numeric matrices of dimension 151 x 1.
To prevent that you have to explicitly convert to a vector by picking the column via .[,1]
or using as.numeric
:
df %>%
dplyr::mutate(across(.cols = c(ppara:ppard), .fns = ~ scale(.x)[,1])) |>
str()
#> 'data.frame': 151 obs. of 4 variables:
#> $ id : num 50109018 50109019 50109025 50109026 50109027 ...
#> $ grup_int: Factor w/ 3 levels "OO","NUTS","LFD": 3 3 3 2 3 1 1 3 3 3 ...
#> ..- attr(*, "label")= chr "GENERAL: Grupo de intervención"
#> $ ppara : num 2.8822 -0.0909 -0.5192 NA NA ...
#> $ ppard : num 0.214 -0.376 -0.152 -0.121 -0.147 ...
df %>%
dplyr::mutate(across(.cols = c(ppara:ppard), .fns = ~ as.numeric(scale(.x)))) |>
str()
#> 'data.frame': 151 obs. of 4 variables:
#> $ id : num 50109018 50109019 50109025 50109026 50109027 ...
#> $ grup_int: Factor w/ 3 levels "OO","NUTS","LFD": 3 3 3 2 3 1 1 3 3 3 ...
#> ..- attr(*, "label")= chr "GENERAL: Grupo de intervención"
#> $ ppara : num 2.8822 -0.0909 -0.5192 NA NA ...
#> $ ppard : num 0.214 -0.376 -0.152 -0.121 -0.147 ...
CodePudding user response:
It is all said by @stefan, hence there is another approach:
We could rewrite the scale()
function:
library(dplyr)
my_scale <- function(x){
require(caret)
var <- preProcess(data.frame(x),c("center", "scale"))
predict(var, data.frame(x))$x
}
df1 <- df %>%
dplyr::mutate(across(.cols = c("ppara":"cyp27a1"), .fns = my_scale))
View(df1)