Home > other >  Optimize 2 consecutive nested for loops - Is it possible to optimize using data.table or apply funct
Optimize 2 consecutive nested for loops - Is it possible to optimize using data.table or apply funct

Time:11-18

I have a data.table made of data.tables as per the dput at the end of this question. I manipulate this data.table of data.tables using the following nested for-loops:

test_E2 <- list()
for (i in unique(lst_512_32_E2$ID)){
     test_E2[[i]] <- list()
     for (j in 1:length(lst_512_32_E2$V1[[i]])){
          test_E2[[i]][[j]] <- sapply(lst_512_32_E2[ID==i]$V1, '[[', j)
     }
}

t_test_E2 <- list()
for (i in 1:length(test_E2)){
     t_test_E2[[i]] <- list()
     for (j in 1:length(test_E2[[i]])){
          t_test_E2[[i]][[j]] <- t(test_E2[[i]][[j]])
     }
}

Any chance these for-loops could be re-generated/optimized while staying in the data.table world? What about an apply/mapply function as a second alternative? Minding that I want the final output as matrix.

dput:

print(dput(lst_512_32_E2[1:2]))
structure(list(ID = c(1L, 1L), gl = structure(1:2, levels = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", 
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", 
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", 
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", 
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", 
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100", 
"101", "102", "103", "104", "105", "106", "107", "108", "109", 
"110", "111", "112", "113", "114", "115", "116", "117", "118", 
"119", "120", "121", "122", "123", "124", "125", "126", "127", 
"128", "129", "130", "131", "132", "133", "134", "135", "136", 
"137", "138", "139", "140", "141", "142", "143", "144", "145", 
"146", "147", "148", "149", "150", "151", "152", "153", "154", 
"155", "156", "157", "158", "159", "160", "161", "162", "163", 
"164", "165", "166", "167", "168", "169", "170", "171", "172", 
"173", "174", "175", "176", "177", "178", "179", "180", "181", 
"182", "183", "184", "185", "186", "187", "188", "189", "190", 
"191", "192", "193", "194", "195", "196", "197", "198", "199", 
"200", "201", "202", "203", "204", "205", "206", "207", "208", 
"209", "210", "211", "212", "213", "214", "215", "216", "217", 
"218", "219", "220", "221", "222", "223", "224", "225", "226", 
"227", "228", "229", "230", "231", "232", "233", "234", "235", 
"236", "237", "238", "239", "240", "241", "242", "243", "244", 
"245", "246", "247", "248", "249", "250", "251", "252", "253", 
"254", "255", "256", "257", "258", "259", "260", "261", "262", 
"263", "264", "265", "266", "267", "268", "269", "270", "271", 
"272", "273", "274", "275", "276", "277", "278", "279", "280", 
"281", "282", "283", "284", "285", "286", "287", "288", "289", 
"290", "291", "292", "293", "294", "295", "296", "297", "298", 
"299", "300", "301", "302", "303", "304", "305", "306", "307", 
"308", "309", "310", "311", "312", "313", "314", "315", "316", 
"317", "318", "319", "320", "321", "322", "323", "324", "325", 
"326", "327", "328", "329", "330", "331", "332", "333", "334", 
"335", "336", "337", "338", "339", "340", "341", "342", "343", 
"344", "345", "346", "347", "348", "349", "350", "351", "352", 
"353", "354", "355", "356", "357", "358", "359", "360", "361", 
"362", "363", "364", "365", "366", "367", "368", "369", "370", 
"371", "372", "373", "374", "375", "376", "377", "378", "379", 
"380", "381", "382", "383", "384", "385", "386", "387", "388", 
"389", "390", "391", "392", "393", "394", "395", "396", "397", 
"398", "399", "400", "401", "402", "403", "404", "405", "406", 
"407", "408", "409", "410", "411", "412", "413", "414", "415", 
"416", "417", "418", "419", "420", "421", "422", "423", "424", 
"425", "426", "427", "428", "429", "430", "431", "432", "433", 
"434", "435", "436", "437", "438", "439", "440", "441", "442", 
"443", "444", "445", "446", "447", "448", "449", "450", "451", 
"452", "453", "454", "455", "456", "457", "458", "459", "460", 
"461", "462", "463", "464", "465", "466", "467", "468", "469", 
"470", "471", "472", "473", "474", "475", "476", "477", "478", 
"479", "480", "481", "482", "483", "484", "485", "486", "487", 
"488", "489", "490", "491", "492", "493", "494", "495", "496", 
"497", "498", "499", "500", "501", "502", "503", "504", "505", 
"506", "507", "508", "509", "510", "511", "512", "513", "514", 
"515", "516", "517", "518", "519", "520", "521", "522", "523", 
"524", "525", "526", "527", "528", "529", "530", "531", "532", 
"533", "534", "535", "536", "537", "538", "539", "540", "541", 
"542", "543", "544", "545", "546", "547", "548", "549", "550", 
"551", "552", "553", "554", "555", "556", "557", "558", "559", 
"560", "561", "562", "563", "564", "565", "566", "567", "568", 
"569", "570", "571", "572", "573", "574", "575", "576", "577", 
"578", "579", "580", "581", "582", "583", "584", "585", "586", 
"587", "588", "589", "590", "591", "592", "593", "594", "595", 
"596", "597", "598", "599", "600", "601", "602", "603", "604", 
"605", "606", "607", "608", "609", "610", "611", "612", "613", 
"614", "615", "616", "617", "618", "619", "620", "621", "622", 
"623", "624", "625", "626", "627", "628", "629", "630", "631", 
"632", "633", "634", "635", "636", "637", "638", "639", "640"
), class = "factor"), V1 = list(structure(list(V1 = c(-0.049, 
-0.042, 0.015, -0.051, -0.107, -0.078, -0.02, -0.046, -0.063, 
0.068, 0.095, -0.007, -0.046, 0.044, 0.137, 0.098, 0.081, -0.073, 
-0.037, 0.012, -0.037, -0.044, 0.015, 0.044, -0.029, -0.09, -0.061, 
-0.042, -0.002, 0.007, 0.024, -0.005, -0.11, -0.076, 0.032, 0.088, 
-0.005, -0.105, -0.117, -0.071, -0.002, -0.017, -0.034, -0.098, 
-0.071, -0.056, -0.083, -0.093, -0.012, 0.002, 0.042, -0.056, 
-0.017, 0.007, -0.015, 0.02, 0.015, 0.007, 0.029, 0.054, 0.01, 
-0.007, -0.056, -0.049, -0.034, 0.002, 0.017, -0.071, -0.103, 
-0.093, -0.051, -0.01, -0.107, -0.063, 0.054, 0.007, 0.037, 0.071, 
0.107, -0.02, -0.056, -0.078, 0.027, 0.063, -0.051, -0.115, -0.068, 
-0.059, -0.024, -0.044, 0.027, -0.012, -0.054, -0.02, 0.022, 
-0.066, -0.037, 0.117, 0.071, 0.029, 0.015, -0.032, 0.027, -0.044, 
-0.22, -0.2, -0.024, 0.007, -0.129, -0.068, 0.044, 0.059, 0.012, 
0.002, -0.068, 0.029, 0.117, 0.039, 0.005, 0.088, 0.032, -0.095, 
-0.076, -0.032, -0.059, -0.142, -0.164, -0.071, -0.02, -0.032, 
-0.088, -0.022, 0.032, 0.032, 0.007, -0.022, -0.042, 0.024, 0.042, 
-0.017, -0.034, 0.01, 0.002, -0.076, -0.078, -0.054, -0.095, 
-0.073, -0.034, -0.103, -0.081, -0.088, -0.017, -0.049, 0.012, 
-0.09, -0.122, 0.01, 0.022, 0.122, 0.107, 0.012, -0.017, -0.107, 
-0.107, 0.034, -0.034, -0.044, -0.061, -0.115, -0.132, -0.193, 
-0.029, 0.078, 0.093, 0.1, 0.049, -0.037, 0.029, -0.027, 0.002, 
0.081, -0.024, -0.083, -0.046, -0.002, -0.037, -0.149, -0.02, 
0.01, -0.049, -0.105, -0.051, 0.078, 0.071, 0.007, -0.081, 0.054, 
0.164, 0.042, 0.073, -0.02, -0.032, 0.015, 0.002, -0.081, 0.042, 
0.024, -0.132, -0.063, 0.051, 0.02, 0, 0.02, -0.01, -0.005, 0.071, 
0.01, -0.005, 0.088, 0.037, -0.015, -0.042, -0.024, -0.012, 0.071, 
-0.022, -0.1, -0.115, -0.029, -0.01, -0.002, -0.051, -0.081, 
0.027, 0.11, 0.022, -0.061, 0.061, 0.01, -0.012, -0.02, -0.049, 
0.029, 0.01, -0.029, -0.032, 0.01, 0.042, -0.01, 0.042, 0.034, 
-0.088, -0.083, -0.09, 0.037, -0.002, 0.056, 0.024, 0.044, 0.154, 
0.088, 0.027, 0.034, 0.105, 0.081, -0.02, -0.083, -0.068, -0.017, 
0.034, 0.042, -0.073, -0.112, -0.015, 0.088, 0.071, -0.066, -0.085, 
0.083, 0.156, 0.105, -0.073, -0.071, 0.09, 0.078, -0.051, -0.142, 
-0.076, 0.005, -0.01, -0.093, -0.076, -0.049, 0.056, 0.01, -0.046, 
0.042, 0.132, 0.049, -0.029, 0.044, 0.107, 0.122, 0.068, -0.002, 
-0.078, -0.012, -0.037, -0.105, -0.115, 0.017, 0.042, 0.015, 
0.032, 0.054, 0.024, -0.002, 0.083, 0.061, -0.007, 0.056, 0.046, 
-0.01, 0.049, 0.022, -0.024, -0.024, -0.022, -0.127, -0.176, 
-0.081, -0.068, 0, 0.015, -0.029, -0.017, -0.027, -0.002, 0.054, 
0.005, -0.022, -0.027, -0.007, 0.095, 0.029, -0.085, -0.059, 
-0.063, 0.024, 0.029, -0.063, -0.078, -0.127, -0.068, -0.022, 
-0.029, 0.046, 0.029, 0.01, 0.039, 0.132, 0.068, 0.044, 0.012, 
-0.029, -0.015, 0.093, -0.01, -0.134, -0.115, -0.066, -0.032, 
0.002, -0.039, -0.134, -0.051, 0.034, 0.061, 0.066, 0.061, 0.066, 
0.01, 0.024, 0.093, 0.044, 0.037, 0.012, 0.002, -0.027, -0.11, 
-0.11, -0.073, -0.029, 0.032, 0.005, -0.066, -0.005, -0.02, -0.029, 
-0.068, -0.01, 0.071, 0.081, 0.034, -0.037, -0.032, -0.007, -0.012, 
-0.073, -0.088, -0.071, -0.049, -0.083, -0.044, -0.112, 0.015, 
-0.1, -0.154, 0.029, 0.073, 0.073, 0, -0.01, 0.005, -0.012, -0.103, 
-0.12, -0.093, -0.042, -0.024, -0.154, -0.073, -0.054, -0.1, 
-0.125, -0.117, -0.066, 0.034, 0.085, 0.012, 0.039, 0.085, 0.005, 
-0.022, -0.017, 0.02, 0.039, -0.046, -0.007, 0.012, -0.012, -0.063, 
-0.054, 0.007, -0.056, -0.107, 0.037, 0.093, 0.046, -0.061, -0.015, 
0.039, 0.024, 0.068, 0.007, -0.027, 0.051, -0.134, -0.11, 0.007, 
-0.093, -0.105, -0.056, -0.076, 0.012, -0.071, -0.056, -0.117, 
-0.073, 0.002, 0.054, 0.078, 0.09, 0.11, 0.09, -0.022, -0.044, 
0.042, 0.073, -0.005, 0.015, 0.017, -0.085, -0.1, -0.085, -0.059, 
-0.103, -0.071, -0.056, -0.034, 0.032, 0.039, -0.007, -0.007, 
0.068, 0.027, -0.054, -0.078, -0.061, -0.059, -0.024)), row.names = c(NA, 
-512L), class = c("data.table", "data.frame")), structure(list(
    V1 = c(-0.11, -0.076, 0.032, 0.088, -0.005, -0.105, -0.117, 
    -0.071, -0.002, -0.017, -0.034, -0.098, -0.071, -0.056, -0.083, 
    -0.093, -0.012, 0.002, 0.042, -0.056, -0.017, 0.007, -0.015, 
    0.02, 0.015, 0.007, 0.029, 0.054, 0.01, -0.007, -0.056, -0.049, 
    -0.034, 0.002, 0.017, -0.071, -0.103, -0.093, -0.051, -0.01, 
    -0.107, -0.063, 0.054, 0.007, 0.037, 0.071, 0.107, -0.02, 
    -0.056, -0.078, 0.027, 0.063, -0.051, -0.115, -0.068, -0.059, 
    -0.024, -0.044, 0.027, -0.012, -0.054, -0.02, 0.022, -0.066, 
    -0.037, 0.117, 0.071, 0.029, 0.015, -0.032, 0.027, -0.044, 
    -0.22, -0.2, -0.024, 0.007, -0.129, -0.068, 0.044, 0.059, 
    0.012, 0.002, -0.068, 0.029, 0.117, 0.039, 0.005, 0.088, 
    0.032, -0.095, -0.076, -0.032, -0.059, -0.142, -0.164, -0.071, 
    -0.02, -0.032, -0.088, -0.022, 0.032, 0.032, 0.007, -0.022, 
    -0.042, 0.024, 0.042, -0.017, -0.034, 0.01, 0.002, -0.076, 
    -0.078, -0.054, -0.095, -0.073, -0.034, -0.103, -0.081, -0.088, 
    -0.017, -0.049, 0.012, -0.09, -0.122, 0.01, 0.022, 0.122, 
    0.107, 0.012, -0.017, -0.107, -0.107, 0.034, -0.034, -0.044, 
    -0.061, -0.115, -0.132, -0.193, -0.029, 0.078, 0.093, 0.1, 
    0.049, -0.037, 0.029, -0.027, 0.002, 0.081, -0.024, -0.083, 
    -0.046, -0.002, -0.037, -0.149, -0.02, 0.01, -0.049, -0.105, 
    -0.051, 0.078, 0.071, 0.007, -0.081, 0.054, 0.164, 0.042, 
    0.073, -0.02, -0.032, 0.015, 0.002, -0.081, 0.042, 0.024, 
    -0.132, -0.063, 0.051, 0.02, 0, 0.02, -0.01, -0.005, 0.071, 
    0.01, -0.005, 0.088, 0.037, -0.015, -0.042, -0.024, -0.012, 
    0.071, -0.022, -0.1, -0.115, -0.029, -0.01, -0.002, -0.051, 
    -0.081, 0.027, 0.11, 0.022, -0.061, 0.061, 0.01, -0.012, 
    -0.02, -0.049, 0.029, 0.01, -0.029, -0.032, 0.01, 0.042, 
    -0.01, 0.042, 0.034, -0.088, -0.083, -0.09, 0.037, -0.002, 
    0.056, 0.024, 0.044, 0.154, 0.088, 0.027, 0.034, 0.105, 0.081, 
    -0.02, -0.083, -0.068, -0.017, 0.034, 0.042, -0.073, -0.112, 
    -0.015, 0.088, 0.071, -0.066, -0.085, 0.083, 0.156, 0.105, 
    -0.073, -0.071, 0.09, 0.078, -0.051, -0.142, -0.076, 0.005, 
    -0.01, -0.093, -0.076, -0.049, 0.056, 0.01, -0.046, 0.042, 
    0.132, 0.049, -0.029, 0.044, 0.107, 0.122, 0.068, -0.002, 
    -0.078, -0.012, -0.037, -0.105, -0.115, 0.017, 0.042, 0.015, 
    0.032, 0.054, 0.024, -0.002, 0.083, 0.061, -0.007, 0.056, 
    0.046, -0.01, 0.049, 0.022, -0.024, -0.024, -0.022, -0.127, 
    -0.176, -0.081, -0.068, 0, 0.015, -0.029, -0.017, -0.027, 
    -0.002, 0.054, 0.005, -0.022, -0.027, -0.007, 0.095, 0.029, 
    -0.085, -0.059, -0.063, 0.024, 0.029, -0.063, -0.078, -0.127, 
    -0.068, -0.022, -0.029, 0.046, 0.029, 0.01, 0.039, 0.132, 
    0.068, 0.044, 0.012, -0.029, -0.015, 0.093, -0.01, -0.134, 
    -0.115, -0.066, -0.032, 0.002, -0.039, -0.134, -0.051, 0.034, 
    0.061, 0.066, 0.061, 0.066, 0.01, 0.024, 0.093, 0.044, 0.037, 
    0.012, 0.002, -0.027, -0.11, -0.11, -0.073, -0.029, 0.032, 
    0.005, -0.066, -0.005, -0.02, -0.029, -0.068, -0.01, 0.071, 
    0.081, 0.034, -0.037, -0.032, -0.007, -0.012, -0.073, -0.088, 
    -0.071, -0.049, -0.083, -0.044, -0.112, 0.015, -0.1, -0.154, 
    0.029, 0.073, 0.073, 0, -0.01, 0.005, -0.012, -0.103, -0.12, 
    -0.093, -0.042, -0.024, -0.154, -0.073, -0.054, -0.1, -0.125, 
    -0.117, -0.066, 0.034, 0.085, 0.012, 0.039, 0.085, 0.005, 
    -0.022, -0.017, 0.02, 0.039, -0.046, -0.007, 0.012, -0.012, 
    -0.063, -0.054, 0.007, -0.056, -0.107, 0.037, 0.093, 0.046, 
    -0.061, -0.015, 0.039, 0.024, 0.068, 0.007, -0.027, 0.051, 
    -0.134, -0.11, 0.007, -0.093, -0.105, -0.056, -0.076, 0.012, 
    -0.071, -0.056, -0.117, -0.073, 0.002, 0.054, 0.078, 0.09, 
    0.11, 0.09, -0.022, -0.044, 0.042, 0.073, -0.005, 0.015, 
    0.017, -0.085, -0.1, -0.085, -0.059, -0.103, -0.071, -0.056, 
    -0.034, 0.032, 0.039, -0.007, -0.007, 0.068, 0.027, -0.054, 
    -0.078, -0.061, -0.059, -0.024, 0.037, -0.007, -0.083, -0.032, 
    -0.061, -0.081, -0.093, -0.117, 0.034, 0.044, 0.037, 0.054, 
    0.083, 0.002, -0.103, 0.083, 0.115, -0.139, -0.046, 0.142, 
    0.032, -0.139, -0.151, 0.081, 0.107, -0.061, -0.076, 0.005, 
    0.176, 0.078, -0.061, 0.01)), row.names = c(NA, -512L), class = c("data.table", 
"data.frame")))), row.names = c(NA, -2L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x000002289534be80>)
   ID gl                  V1
1:  1  1 <data.table[512x1]>
2:  1  2 <data.table[512x1]>

CodePudding user response:

There are several ways to optimize nested loops. First we need to know what we are trying to accomplish. Please help the community help you by describing the essence of the problem. In one iteration of your nested loop, for example where i and j are both 1, what should happen?

In order to end up with a matrix (or a multi-dimensional array, even), the sizes and shapes of the chunks that are to be combined must be compatible with each other. What are the dimensions of the answer you seek? Perhaps one dimension is: length( unique( X$ID )). Another might be 512 (if there are always this number of observations)

# I've made a few substitutions, to make things easier on my eyes while I try to see what's going on:
#   lst_512_32_E2   ->   X
#   test_E2         ->   Y
#   t_test_E2       ->   Z

# Then the first loop looks like this:

# 1  for( i in unique( X$ID )){
# 2    Y[[ i ]] <- list()
# 3    for( j in 1:length( X$V1[[ i ]] ) ){
# 4      Y[[ i ]][[ j ]] <- sapply( X[ ID == i ]$V1, '[[', j )
# 5    }
# 6  }

# Stepping through the loop...

# --- Line 1
### for( i in unique( X$ID )){
# For now, initialize i to the the first unique ID
i <- unique( X$ID )[1]

# --- Line 2
### Y[[ i ]] <- list()
#  Skip this for now

# --- Line 3
### for( j in 1:length( X$V1[[ i ]] ) ){
# For now, initialize j to 1
j <- 1

# --- Line 4
### Y[[ i ]][[ j ]] <- sapply( X[ ID == i ]$V1, '[[', j )

X[ ID == i ]   #   <---- this is a typo?
# Error in `[.data.frame`(X, ID == i) : object 'ID' not found

#   /------ include X$ here to make it work
#  __
X[ X$ID == i, ] %>% str
# Classes ‘data.table’ and 'data.frame':    2 obs. of  3 variables:
# $ ID: int  1 1
# $ gl: Factor w/ 640 levels "1","2","3","4",..: 1 2
# $ V1:List of 2
#  ..$ :Classes ‘data.table’ and 'data.frame':  512 obs. of  1 variable:
#  .. ..$ V1: num  -0.049 -0.042 0.015 -0.051 -0.107 -0.078 -0.02 -0.046 -0.063 0.068 ...
#  ..$ :Classes ‘data.table’ and 'data.frame':  512 obs. of  1 variable:
#  .. ..$ V1: num  -0.11 -0.076 0.032 0.088 -0.005 -0.105 -0.117 -0.071 -0.002 -0.017 ...

# Still on line 4, what does j do here?
#    sapply( X[ ID == i ]$V1, '[[', j )
# sapply expects a list, a function, other named variables, simplify, and USE.NAMES as in:
# sapply( X, FUN, ..., simplify, USE.NAMES)
#         X = X[ X$ID == i, "V1" ]
#         FUN =  '[['
#   j does what?

# We can get V1 for X where ID == i with the following:
# Note, however, that even though we take the first unique ID,
# multiple rows in our table could (and, in this case do) match that.
# X[ X$ID == i ]$V1 %>% str
# List of 2
# $ :Classes ‘data.table’ and 'data.frame': 512 obs. of  1 variable:
#  ..$ V1: num [1:512] -0.049 -0.042 0.015 -0.051 -0.107 -0.078 -0.02 -0.046 -0.063 0.068 ...
# $ :Classes ‘data.table’ and 'data.frame': 512 obs. of  1 variable:
#  ..$ V1: num [1:512] -0.11 -0.076 0.032 0.088 -0.005 -0.105 -0.117 -0.071 -0.002 -0.017 ...

CodePudding user response:

You can try split lapply to generate lists test_E2, and perform t over entries in test_E2 in turn

test_E2 <- with(
  lst_512_32_E2,
  lapply(
    split(V1, ID),
    function(x) unname(as.matrix(do.call(cbind, x)))
  )
)

t_test_E2 <- lapply(test_E2, t)
  • Related