I have two data frames which I want to process with a for loop. Their structures are the following:
> m_ivae
structure(list(fecha = structure(c(17805, 17836, 17866, 17897,
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 18170,
18201, 18231, 18262, 18293, 18322, 18353, 18383, 18414, 18444,
18475, 18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718,
18748, 18779, 18809), class = "Date"), IVAE = c(109.19, 110.09,
111.34, 111.84, 112.49, 111.99, 113.11, 111.89, 112.11, 112.75,
113.7, 112.93, 112.43, 114.88, 114.5, 114.93, 115.13, 105.54,
91.71, 87.93, 93.06, 96.74, 103.26, 106.76, 109.6, 110.74, 112,
112.73, 114.97, 115.01, 114.67, 115.78, 114.52, 111.91), `Agricultura, Ganadería, Silvicultura y Pesca` = c(99.58,
98.71, 103.44, 101.83, 101.31, 98.87, 99.06, 99.46, 96.55, 100.47,
98.79, 98.91, 100.17, 101.98, 100.48, 99.64, 96.04, 92.42, 97.21,
96.11, 100.57, 94.82, 99.07, 103.63, 97.34, 97.17, 95.46, 98.46,
101.02, 100.24, 100.6, 99.95, 103.07, 98.23), `Índice de Producción Industrial (IPI): Industrias Manufactureras, Explotación de Minas y Canteras y Otras Actividades Industriales` = c(101.4,
103.4, 105.07, 106.72, 108.45, 107.76, 107.25, 105.75, 107.03,
107.31, 106.61, 106.95, 106.61, 110.18, 108.68, 109.66, 111.32,
100.02, 76.77, 73.46, 81.99, 94.83, 100.64, 104.51, 106.74, 107.04,
108.75, 110.8, 110.59, 111.25, 108.82, 110.03, 111.32, 107.61
), Construcción = c(112.25, 117.5, 121.37, 124.32, 122.64, 121.21,
128.69, 122.28, 126.55, 120.13, 137.47, 129.82, 126.83, 132.92,
131.72, 137.56, 130.89, 117.08, 87.62, 67.49, 79.56, 88.97, 117.57,
110.01, 118.02, 117.61, 121.64, 120.76, 120.99, 118.96, 122.7,
122.59, 101.2, 106.3), `Comercio, Transporte y Almacenamiento, Actividades de Alojamiento y de Servicio de Comidas` = c(112.2,
113.03, 113.03, 115.69, 113.74, 114.7, 115.93, 115.3, 114.25,
115.05, 116.68, 114.84, 114.56, 116.58, 117.77, 119.19, 119.15,
103.41, 76.66, 75.21, 90.32, 91.72, 97.53, 105.21, 110.43, 109.72,
112.41, 114.05, 115.88, 117.29, 115.05, 114.69, 116.79, 109.68
), `Información y Comunicaciones` = c(115.49, 116.57, 116.18,
114.29, 113.92, 113.82, 116.45, 115.96, 114.81, 115.72, 116.07,
115.42, 115.32, 115.59, 114.22, 114.21, 113.05, 112.42, 111.52,
108.77, 113.92, 114.07, 115.02, 115.79, 117.78, 117.02, 119.21,
119.56, 125.27, 123.15, 118.56, 119.68, 120.02, 127.68), `Actividades Financieras y de Seguros` = c(117.96,
122.17, 120.93, 119.53, 121.15, 122.17, 125.01, 121.22, 127.48,
124.1, 124.56, 126.86, 124.59, 129.96, 131.74, 131.56, 138.4,
134.4, 131.6, 127.16, 124.61, 116.65, 120.28, 119.57, 127.23,
138.75, 141.25, 138.8, 138.79, 141.28, 141.62, 143.53, 137.62,
139.72), `Actividades Inmobiliarias` = c(113.31, 113.83, 114.41,
114.69, 114.97, 115.98, 116.2, 116.22, 115.64, 115.79, 115.95,
116.24, 117.6, 117.84, 115.35, 108.98, 105.89, 103.74, 103.16,
102.5, 102.42, 102.41, 104.16, 107.74, 112.87, 116.57, 115.68,
113.47, 112.41, 112.08, 112.42, 112.74, 113.21, 112.56), `Actividades Profesionales, Científicas, Técnicas, Administrativas, de Apoyo y Otros Servicios` = c(111.84,
111.92, 114.11, 116.44, 117.77, 112.96, 114.64, 113.67, 112.33,
115.12, 113.31, 114.14, 115.46, 117.17, 120.57, 124.26, 122.68,
99.51, 86.36, 79.21, 81.56, 83.6, 88.71, 97.76, 98.16, 101.04,
102.68, 108.37, 113.64, 114.82, 115.91, 118.35, 118.74, 109.14
), `Actividades de Administración Pública y Defensa, Enseñanza, Salud y Asistencia Social` = c(110.04,
108.07, 109.24, 105.85, 108.99, 109.12, 109.6, 109.31, 108.63,
111.22, 111.25, 109.67, 107.59, 108.8, 106.9, 105.82, 108.24,
107.71, 106.75, 104.67, 98.47, 102.09, 108.94, 109.34, 110.3,
110.01, 109.3, 107.24, 113.46, 111.17, 113.44, 116.42, 112.98,
114.37)), row.names = c(NA, -34L), class = c("tbl_df", "tbl",
"data.frame"))
> m_ipc
structure(list(fecha = structure(c(17805, 17836, 17866, 17897,
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 18170,
18201, 18231, 18262, 18293, 18322, 18353, 18383, 18414, 18444,
18475, 18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718,
18748, 18779, 18809, 18840, 18871), class = "Date"), `Índice General` = c(113.02,
112.82, 112.3, 112.24, 112.44, 112.69, 112.87, 113.01, 112.85,
112.56, 112.16, 111.99, 112.04, 112.17, 112.29, 112.15, 112,
112.09, 111.69, 111.94, 112.59, 112.49, 111.82, 111.56, 111.81,
111.98, 112.2, 112.49, 113.19, 114.08, 114.81, 114.84, 115.51,
116.36, 116.63, 117.1), `Alimentos y Bebidas no Alcohólicas` = c(120.22,
120.56, 120.44, 120.81, 121.12, 121.39, 121.71, 122.29, 122.61,
121.82, 120.79, 120.64, 121.08, 121.48, 121.88, 122.35, 122.22,
122.68, 124.24, 125.06, 126.14, 125.84, 123.33, 122.36, 121.89,
122.24, 122.33, 122.5, 123.12, 124.09, 124.19, 123.97, 124.85,
125.76, 125.93, 127.18), `Bebidas Alcohólicas, Tabaco` = c(146,
145.59, 145.84, 147.3, 146.86, 146.84, 147.11, 147.74, 148.21,
149.24, 150.04, 150.05, 150.11, 149.9, 150.54, 151.89, 151.81,
152.29, 152.01, 153.09, 152.72, 154.65, 154.56, 152.64, 153.4,
153.59, 153.87, 154.49, 155.21, 155.63, 155.4, 155.2, 156.36,
156.2, 156, 157.11), `Prendas de Vestir y Calzado` = c(92.82,
92.77, 92.74, 92.76, 92.93, 92.89, 92.9, 92.69, 92.57, 92.42,
92.13, 91.42, 91.44, 91.17, 91.03, 91.09, 91.43, 91.88, 91.84,
91.84, 91.84, 91.84, 91.84, 92.05, 92.55, 92.6, 92.75, 93, 93.5,
93.84, 93.98, 94.35, 94.5, 94.71, 94.86, 94.85), `Alojamiento, Agua, Electricidad, Gas y otros Combustibles` = c(140.49,
139.57, 138.12, 137.52, 137.35, 137.51, 136.16, 135.75, 135.34,
134.77, 134.82, 134.79, 133.85, 134.04, 134.93, 132.51, 131.61,
131.68, 131.02, 131.03, 131.83, 129.07, 128.61, 129, 131.34,
131.41, 131.97, 132.01, 134.25, 135.03, 137.66, 136.74, 136.96,
140.04, 141.58, 141.93), `Muebles, Artículos para el Hogar y para la Conservación Ordinaria del Hogar` = c(100.24,
100.36, 100.14, 100.29, 100.52, 100.16, 100.25, 100.3, 99.86,
99.73, 99.64, 99.63, 99.48, 99.16, 98.94, 99.16, 99.54, 99.98,
100.08, 100.13, 100.02, 99.83, 100.23, 100.39, 100.07, 100.17,
100.92, 101, 101.98, 102.74, 103.46, 103.81, 104.38, 105.06,
105.3, 106.45), Salud = c(99.37, 99.28, 99.29, 99.29, 99.27,
99.27, 99.34, 99.44, 99.54, 99.6, 99.77, 100.06, 100.07, 100.14,
100.12, 100.17, 100.01, 99.98, 99.96, 100.19, 100.22, 100.9,
100.97, 101.13, 101.24, 101.9, 101.88, 102.04, 102.93, 103.14,
103.37, 103.83, 104.14, 104.19, 104.45, 104.53), Transporte = c(112.15,
110.75, 108.27, 106.83, 107.41, 108.94, 111.01, 111.41, 110.51,
110.51, 109.34, 108.64, 109.05, 109.47, 108.79, 108.56, 107.88,
106.73, 100.48, 100.6, 102.77, 104.29, 103.76, 103.45, 103.59,
103.53, 103.64, 105.12, 105.76, 109.23, 111.09, 111.72, 112.93,
113.5, 112.71, 112.13), Comunicaciones = c(84.77, 84.69, 84.69,
84.64, 84.32, 84.32, 84.32, 84.31, 84.1, 83.78, 83.78, 83.78,
83.89, 83.89, 83.7, 83.2, 83.16, 83.16, 83.2, 83.17, 83.17, 82.99,
82.99, 83.03, 83.19, 83.19, 83.17, 83.12, 83.12, 83.12, 83.12,
83.11, 83.11, 83.09, 83.09, 83.09), `Recreación y Cultura` = c(87.35,
87.37, 87.4, 87.77, 88.71, 88.48, 88.72, 88.75, 88.08, 88.14,
88.18, 87.97, 87.81, 87.72, 87.58, 87.63, 87.89, 87.74, 87.67,
87.6, 87.65, 87.81, 88.29, 87.68, 88.02, 88.08, 88.14, 88.06,
87.86, 88.11, 88.51, 88.77, 89.12, 89.11, 88.98, 89.14), Educación = c(112.83,
112.83, 112.83, 113.27, 113.27, 113.27, 113.27, 113.27, 113.27,
113.27, 113.27, 113.27, 113.65, 113.65, 113.65, 114.06, 114.06,
114.06, 114.06, 114.06, 114.06, 114.06, 114.06, 114.06, 114.26,
114.26, 114.26, 114.26, 114.26, 114.26, 114.26, 114.26, 114.26,
114.26, 114.26, 114.26), `Restaurantes y Hoteles` = c(122.94,
122.7, 122.81, 123.41, 123.37, 123.54, 123.49, 123.57, 123.55,
123.63, 123.59, 123.5, 123.58, 123.54, 123.93, 124.32, 124.44,
124.44, 124.5, 124.61, 124.7, 125.04, 125.34, 125.52, 125.52,
125.8, 126.01, 126.36, 126.65, 126.97, 127.49, 127.95, 129.19,
129.73, 130.46, 131.3), `Bienes y Servicios Diversos` = c(107.55,
107.75, 107.6, 107.39, 107.4, 107.55, 107.36, 107.13, 107.22,
107.26, 107.4, 107.48, 107.42, 107.4, 107.3, 107.37, 107.55,
108.21, 108.38, 108.39, 108.46, 109.45, 109.67, 109.42, 109.65,
109.65, 109.99, 110.25, 110.37, 110.19, 110.34, 110.36, 111.16,
111.8, 112.28, 112.23)), row.names = c(NA, -36L), class = c("tbl_df",
"tbl", "data.frame"))
And I am using the following code:
library(janitor)
wide_dataframes = list(m_ivae,m_ipc)
names(wide_dataframes) = c('m_ivae','m_ipc')
for (nm in names(wide_dataframes)){
df = get(nm)
df = clean_names(df)
df[paste0("lag", 1:3)] = lapply(1:3, lag, x=df[,2:ncol(df)])
df[,2:ncol(df)] = apply(df[,2:ncol(df)],2,function(x) as.numeric(as.character(x)))
assign(nm, df)
}
However, after I run the for loop, I get the following error message:
Error: Can't recycle `apply(df[, 2:ncol(df)], 2, function(x) as.numeric(as.character(x)))` (size 40) to size 13.
I tried to fix it by removing the column specifications in the fifth line of the for loop, like this:
for (nm in names(wide_dataframes)){
df = get(nm)
df = clean_names(df)
df[paste0("lag", 1:3)] = lapply(1:3, lag, x=df[,2:ncol(df)])
df = apply(df[,2:ncol(df)],2,function(x) as.numeric(as.character(x)))
assign(nm, df)
}
This solves the error, but removes the first column, which I need to keep in order to perform a left join with a different data frame later on.
CodePudding user response:
The issue seems to be assigning the column names df[paste0("lag", 1:3)]
i.e. when we do the lag
on the whole data or a part of it df[,2:ncol(df)]
, the assignment to the lhs of =
is not of the same length i.e. it is just of length 3 compared to the original ncol(df)-1
. As we are using a for
loop, the inner lag
can also be in a for
loop
for (nm in names(wide_dataframes)){
df <- get(nm)
df <- clean_names(df)
nm1 <- names(df)[2:ncol(df)] # get the names of the columns to be lagged
for(i in 1:3) {
nm2 <- paste0(nm1, "lag", i)
df[nm2] <- lag(df[, nm1], n = i)
}
df[,2:ncol(df)] <- lapply(df[,2:ncol(df)],
function(x) as.numeric(as.character(x)))
assign(nm, df)
}
-checking
> ncol(m_ivae)
[1] 41
> ncol(m_ipc)
[1] 53
compare with original number of columns
> sapply(wide_dataframes, ncol)
m_ivae m_ipc
11 14