Home > Mobile >  How to create a grouping column for every n rows without an NA value in r
How to create a grouping column for every n rows without an NA value in r

Time:12-01

I have a dataframe like this:

df <- structure(list(id = c("A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", 
"D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "F", "F", "F", "F", "F", "F", "F", 
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", 
"F", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", 
"G", "G", "G", "G", "G", "G", "G", "G", "G", "H", "H", "H", "H", 
"H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", 
"H", "H", "H", "H", "I", "I", "I", "I", "I", "I", "I", "I", "I", 
"I", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I"), 
    category = c("Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange"), number = structure(c(1L, 
    1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 
    6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
    4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 
    2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 
    7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 
    5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 
    3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 
    1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 
    6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
    4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 
    2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 
    7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 
    5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L), levels = c("1", "2", "3", 
    "4", "5", "6", "7"), class = "factor"), value = c(NA, 626.003642929148, 
    14.8530720820503, 1084.38927965752, 471.655282952508, 64.7236912331857, 
    776.07706769966, 396.916793965147, 54.4855105782135, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 545.062982509092, 
    1.91732773771467, NA, NA, NA, NA, NA, NA, NA, NA, NA, 394.894986584073, 
    195.280219395579, 7.64479280434642, 22.0315874609581, NA, 
    NA, NA, NA, NA, NA, NA, NA, 1080.64933432694, 805.86292498854, 
    13.482712704827, NA, NA, NA, NA, NA, NA, 1245.98805385198, 
    460.473209104431, 7.63424473634, 890.478501410027, 660.395220925587, 
    9.17592533879766, NA, 299.959907306427, 11.4907173073867, 
    NA, 491.805140195306, 177.400798058143, NA, NA, NA, NA, NA, 
    NA, 184.289228960705, 771.021642313767, 6.31043426543333, 
    281.726609330022, NA, NA, NA, NA, NA, NA, NA, NA, NA, 509.378049364324, 
    278.038058491692, 527.80894112378, 975.632789695641, 228.599356169827, 
    760.529109540798, 25.8508640237331, 208.36160360409, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 857.05785408852, 
    348.960106993208, 95.34862608411, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 643.00490054136, 
    176.787378197281, 454.641131289842, 875.117995998053, 89.4833304141165, 
    854.237890262158, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, 657.837193971776, 201.407816618736, 218.002918108005, 
    981.375399584459, 252.181299279728, 254.482264508715, NA, 
    NA, 237.354256272709, NA, NA, 346.67665285344, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, 207.808025637129, 418.411113648792, 
    30.3048309454833, 990.842297838783, 560.436400795975, 27.8512837727917, 
    1024.57867280143, 568.935110651167, 26.6536802259542, 887.976034157129, 
    NA, NA, NA, NA, NA, NA, NA, NA)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -189L), groups = structure(list(
    id = c("A", "A", "A", "A", "A", "A", "A", "B", "B", "B", 
    "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", "D", 
    "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", 
    "E", "F", "F", "F", "F", "F", "F", "F", "G", "G", "G", "G", 
    "G", "G", "G", "H", "H", "H", "H", "H", "H", "H", "I", "I", 
    "I", "I", "I", "I", "I"), number = structure(c(1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 
    5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 
    6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 
    7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L
    ), levels = c("1", "2", "3", "4", "5", "6", "7"), class = "factor"), 
    .rows = structure(list(1:3, 4:6, 7:9, 10:12, 13:15, 16:18, 
        19:21, 22:24, 25:27, 28:30, 31:33, 34:36, 37:39, 40:42, 
        43:45, 46:48, 49:51, 52:54, 55:57, 58:60, 61:63, 64:66, 
        67:69, 70:72, 73:75, 76:78, 79:81, 82:84, 85:87, 88:90, 
        91:93, 94:96, 97:99, 100:102, 103:105, 106:108, 109:111, 
        112:114, 115:117, 118:120, 121:123, 124:126, 127:129, 
        130:132, 133:135, 136:138, 139:141, 142:144, 145:147, 
        148:150, 151:153, 154:156, 157:159, 160:162, 163:165, 
        166:168, 169:171, 172:174, 175:177, 178:180, 181:183, 
        184:186, 187:189), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -63L), .drop = TRUE))

As you can see there are many instances where there are NAs in the values column. What I would like to do is a rolling search (within each id), where I take a 3 row window (step of 1), and add a grouping label (in this case labeled ID_group_groupnumber) as soon as I hit 3 rows where there isn't an NA values in the value column. Importantly, one single value for each other category ("Apple","Banana", and "Orange") need to be present in the final group.

Here is what the output should look like, where groups can only be 3 rows, and every time this rule does not apply, an NA value is used as a label instead.

 df <- structure(list(id = c("A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", 
"D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "E", "E", 
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
"E", "E", "E", "E", "E", "E", "F", "F", "F", "F", "F", "F", "F", 
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", 
"F", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", 
"G", "G", "G", "G", "G", "G", "G", "G", "G", "H", "H", "H", "H", 
"H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", 
"H", "H", "H", "H", "I", "I", "I", "I", "I", "I", "I", "I", "I", 
"I", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I"), 
    category = c("Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange", "Banana", "Apple", 
    "Orange", "Banana", "Apple", "Orange"), number = structure(c(1L, 
    1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 
    6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
    4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 
    2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 
    7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 
    5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 
    3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 
    1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 
    6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
    4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L, 2L, 
    2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 
    7L, 7L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 
    5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L), levels = c("1", "2", "3", 
    "4", "5", "6", "7"), class = "factor"), value = c(NA, 626.003642929148, 
    14.8530720820503, 1084.38927965752, 471.655282952508, 64.7236912331857, 
    776.07706769966, 396.916793965147, 54.4855105782135, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 545.062982509092, 
    1.91732773771467, NA, NA, NA, NA, NA, NA, NA, NA, NA, 394.894986584073, 
    195.280219395579, 7.64479280434642, 22.0315874609581, NA, 
    NA, NA, NA, NA, NA, NA, NA, 1080.64933432694, 805.86292498854, 
    13.482712704827, NA, NA, NA, NA, NA, NA, 1245.98805385198, 
    460.473209104431, 7.63424473634, 890.478501410027, 660.395220925587, 
    9.17592533879766, NA, 299.959907306427, 11.4907173073867, 
    NA, 491.805140195306, 177.400798058143, NA, NA, NA, NA, NA, 
    NA, 184.289228960705, 771.021642313767, 6.31043426543333, 
    281.726609330022, NA, NA, NA, NA, NA, NA, NA, NA, NA, 509.378049364324, 
    278.038058491692, 527.80894112378, 975.632789695641, 228.599356169827, 
    760.529109540798, 25.8508640237331, 208.36160360409, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 857.05785408852, 
    348.960106993208, 95.34862608411, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 643.00490054136, 
    176.787378197281, 454.641131289842, 875.117995998053, 89.4833304141165, 
    854.237890262158, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, 657.837193971776, 201.407816618736, 218.002918108005, 
    981.375399584459, 252.181299279728, 254.482264508715, NA, 
    NA, 237.354256272709, NA, NA, 346.67665285344, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, 207.808025637129, 418.411113648792, 
    30.3048309454833, 990.842297838783, 560.436400795975, 27.8512837727917, 
    1024.57867280143, 568.935110651167, 26.6536802259542, 887.976034157129, 
    NA, NA, NA, NA, NA, NA, NA, NA), grouping = c(NA, "A_group1", 
    "A_group1", "A_group1", "A_group2", "A_group2", "A_group2", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "B_group1", "B_group1", 
    "B_group1", NA, NA, NA, NA, NA, NA, NA, NA, NA, "C_group1", 
    "C_group1", "C_group1", NA, NA, NA, NA, NA, NA, "C_group2", 
    "C_group2", "C_group2", "C_group3", "C_group3", "C_group3", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "D_group1", 
    "D_group1", "D_group1", NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, "E_group1", "E_group1", "E_group1", "E_group2", "E_group2", 
    "E_group2", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, "F_group1", "F_group1", "F_group1", NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    "G_group1", "G_group1", "G_group1", "G_group2", "G_group2", 
    "G_group2", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, "H_group1", "H_group1", "H_group1", "H_group2", 
    "H_group2", "H_group2", NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, "I_group1", "I_group1", "I_group1", 
    "I_group2", "I_group2", "I_group2", "I_group3", "I_group3", 
    "I_group3", NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, 
-189L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), groups = structure(list(id = c("A", "A", "A", "A", "A", "A", 
"A", "B", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", 
"C", "C", "D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", 
"E", "E", "E", "F", "F", "F", "F", "F", "F", "F", "G", "G", "G", 
"G", "G", "G", "G", "H", "H", "H", "H", "H", "H", "H", "I", "I", 
"I", "I", "I", "I", "I"), number = structure(c(1L, 2L, 3L, 4L, 
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), levels = c("1", 
"2", "3", "4", "5", "6", "7"), class = "factor"), .rows = structure(list(
    1:3, 4:6, 7:9, 10:12, 13:15, 16:18, 19:21, 22:24, 25:27, 
    28:30, 31:33, 34:36, 37:39, 40:42, 43:45, 46:48, 49:51, 52:54, 
    55:57, 58:60, 61:63, 64:66, 67:69, 70:72, 73:75, 76:78, 79:81, 
    82:84, 85:87, 88:90, 91:93, 94:96, 97:99, 100:102, 103:105, 
    106:108, 109:111, 112:114, 115:117, 118:120, 121:123, 124:126, 
    127:129, 130:132, 133:135, 136:138, 139:141, 142:144, 145:147, 
    148:150, 151:153, 154:156, 157:159, 160:162, 163:165, 166:168, 
    169:171, 172:174, 175:177, 178:180, 181:183, 184:186, 187:189), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -63L), .drop = TRUE))

I've wrangled the data into this format using expand grid for the columns 'category' and 'number' because I thought it would be easier to identify the groupings. To solve this problem, I thought of pivoting it to wide-form (but then each id is quite different and not sure how I'd group by the columns), and rollapply in the zoo package but not sure how to specify sequentially finding 3 rows without an NA. Importantly no overlap can occur. I appreciate any guidance/approaches to this in r.

Thanks very much.

CodePudding user response:

Number inputs a vector and outputs 1:3 repeated to the length of that vector.
except that NA's are used at the end if 1:3 does not fit evenly.

Then within group for each run of NA's or not NA's apply Number using ave and then sequentially number the triples within id. At the end we make the grouping names and NA out the grouping entries that should be NA.

library(dplyr)
library(data.table) # rleid

Number <- function(x) {
  r <- rep(1:3, length = length(x))
  n <- 3 * (length(r) %/% 3)
  ifelse(seq_along(x) > n, NA, r)
}

result <- df %>%
  group_by(id) %>%
  mutate(grouping = ave(1:n(), rleid(!is.na(value)), FUN = Number)   0*value,
    grouping = cumsum(coalesce(grouping, 0) == 1)   0 * grouping) %>%
  ungroup %>%
  mutate(grouping = ifelse(is.na(grouping), NA, paste0(id, "_group", grouping)))
  • Related