Home > Enterprise >  divide column in classes or groups
divide column in classes or groups

Time:07-12

I have a column with distance and I want create a new column distance_class where the distances are grouped under a certain class range. The classes must be steps of 10, so for example: a distance of 8.099716 will be assigned to 10 in the new column. A distance of 12.430613 will be assigned to 20 etc. Since my data goes up to 498 metres, this require a lot of classes and I know how to do it manually:

df$distance_class <- 10
df[df$distance >= 10.0000001 & df$distance <= 20,]$distance_class <- 20

etc, but this fills a lot of space in my r script. What function could I use to make these classes?

df =

structure(list(HEX_Tag_ID = c("3D6.153413ECBC", "3D6.153413ECE0", 
"3D6.153413EF72", "3D6.153413EF72", "3D6.15341B9871", "3D6.15341B9B1D", 
"3D6.15341B9B36", "3D6.15341BA2E5", "3D6.15341BA3BA", "3D6.15341BA4AA", 
"3D6.15341BAACC", "3D6.15341BAD53", "3D6.15341BADE3", "3D6.15341BAE18", 
"3D6.15341BAE4D", "3D6.15341BB40B", "3D6.15341BB46B", "3D6.15341BB664", 
"3D6.15341BBB4F", "3D6.15341BBCBC", "3D6.15341BBFB5", "3D6.15341BBFEF", 
"3D6.15341BC0A1", "3D6.15341BC0FB", "3D6.15341BC232", "3D6.15341BC301", 
"3D6.15341BC475", "3D6.15341BC60F", "3D6.15341BC9D8", "3D6.15341BCB9A", 
"3D6.15341BCBFE", "3D6.15341BCF0C", "3D6.15341BCF0C", "3D6.15341BCF8A", 
"3D6.15341BD0D4", "3D6.15341BD291", "3D6.15341BD531", "3D6.15341BD71B", 
"3D6.15341BDE9F", "3D6.15341BDF75", "3D6.15341BE2C4", "3D6.15341BE5B6", 
"3D6.15341BE8C3", "3D6.15341BEBB7", "3D6.15341BF00C", "3D6.15341BF0EF", 
"3D6.15341BF1FD", "3D6.15341BF4E3", "3D6.15341BF6C8", "3D6.15341BF8F1", 
"3D6.15341BF949", "3D6.15341BFA21", "3D6.15341BFBF2", "3D6.15341BFE8F", 
"3D6.15341BFF55", "3D6.15341BFFF8", "3D6.15341C0083", "3D6.15343A5AF3", 
"3D6.15343A5B9A", "3D6.15343A5DB5", "3D6.15343A5F31", "3D6.15343A6078", 
"3D6.15343A6589", "3D6.15343A658F", "3D6.15343A66A8", "3D6.15343A66CA", 
"3D6.15343A677A", "3D6.15343A67C9", "3D6.15343A6824", "3D6.15343A6829", 
"3D6.15343A6847", "3D6.15343A6848", "3D6.15343A688B", "3D6.15343A6AFB", 
"3D6.15343A6E01", "3D6.15343A6E12", "3D6.15343A6E56", "3D6.15343A6F23", 
"3D6.15343A71A8", "3D6.15343A71B0", "3D6.15343A71B6", "3D6.15343A71BF", 
"3D6.15343A71C3", "3D6.15343A71D0", "3D6.15343A71E0", "3D6.15343A7548", 
"3D6.15343A757C", "3D6.15343A75A6", "3D6.15343A75BB", "3D6.15343A766B", 
"3D6.15343A7736", "3D6.15343A773B", "3D6.15343A7CC8", "3D6.15343A7CF5", 
"3D6.15343A7DD8", "3D6.15343A8073", "3D6.15343A8317", "3D6.15343A831D", 
"3D6.15343A832E", "3D6.15343A980B"), Lengteklasse = structure(c(4L, 
5L, 5L, 5L, 5L, 5L, 4L, 2L, 3L, 3L, 4L, 5L, 4L, 6L, 7L, 6L, 5L, 
6L, 4L, 6L, 7L, 2L, 5L, 7L, 3L, 5L, 3L, 6L, 5L, 6L, 6L, 5L, 5L, 
6L, 6L, 4L, 3L, 5L, 6L, 4L, 5L, 6L, 5L, 4L, 4L, 5L, 5L, 5L, 6L, 
5L, 2L, 5L, 4L, 4L, 6L, 3L, 5L, 4L, 4L, 6L, 3L, 4L, 5L, 4L, 4L, 
3L, 5L, 5L, 4L, 3L, 4L, 6L, 5L, 5L, 5L, 3L, 1L, 3L, 5L, 4L, 7L, 
4L, 6L, 2L, 2L, 5L, 7L, 4L, 5L, 5L, 3L, 3L, 6L, 5L, 5L, 6L, 5L, 
2L, 4L, 6L), .Label = c("6", "7", "8", "9", "10", "11", "12", 
"13"), class = "factor"), distance = c(8.0997162289136, 12.4306131164191, 
61.3106411610124, 58.5153016042213, 32.2744999859089, 89.414358823574, 
28.0108486925122, 47.8219099537216, 9.0620201326797, 42.44256852726, 
78.890670989817, 14.4029581051937, 30.9625482306174, 16.8654943464726, 
46.9868070888032, 50.0744747268464, 28.0727803370875, 90.1657178100163, 
105.883889293255, 64.6258993837158, 13.7303841199513, 5.30383607472221, 
23.4063386162898, 101.846842420048, 110.743855606155, 93.7033487614962, 
54.2208290023929, 284.353952212319, 5.80214621411396, 9.00695055301609, 
141.475231499517, 17.0048805782567, 28.0458481248819, 154.038498446018, 
37.725475052369, 139.446207060833, 51.1696402910276, 15.5079590734332, 
57.4006455173585, 11.100902574906, 241.023087647416, 17.8158585698874, 
128.095902669084, 30.3714003058918, 110.477320018898, 31.4974206462646, 
9.00695055301609, 82.0444687609409, 19.9640782122363, 98.4831535433995, 
10.1485490181753, 10.1198911075185, 60.0673937023095, 61.30855893794, 
15.243456492333, 7.19527826709304, 5.89430300905792, 14.6110248650041, 
16.0826976446818, 12.4654874602058, 117.743722916799, 1.12489917618149, 
39.2696515885893, 10.3075837820188, 12.1762671570106, 11.7501210701527, 
4.87143478511475, 40.5667469824971, 202.73045988395, 10.1705562580906, 
170.793444639309, 447.235720974034, 8.27263671180456, 10.5997730205872, 
13.3275431540535, 39.1729038623626, 6.43559510500503, 61.94224987052, 
85.8550416470563, 5.55746379097464, 60.054459598035, 15.9423052029938, 
16.0655921713406, 125.621317253718, 30.4341885798698, 19.6787135744613, 
21.9526432324342, 32.6115161846427, 23.0584552162136, 15.7035854165388, 
32.0891313247117, 356.071825968971, 21.5282811844687, 49.3123085255221, 
22.9122313204, 62.5149283380102, 9.05272361429296, 74.9756897608931, 
19.1320807799778, 39.1088642338198)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
    HEX_Tag_ID = c("3D6.153413ECBC", "3D6.153413ECE0", "3D6.153413EF72", 
    "3D6.15341B9871", "3D6.15341B9B1D", "3D6.15341B9B36", "3D6.15341BA2E5", 
    "3D6.15341BA3BA", "3D6.15341BA4AA", "3D6.15341BAACC", "3D6.15341BAD53", 
    "3D6.15341BADE3", "3D6.15341BAE18", "3D6.15341BAE4D", "3D6.15341BB40B", 
    "3D6.15341BB46B", "3D6.15341BB664", "3D6.15341BBB4F", "3D6.15341BBCBC", 
    "3D6.15341BBFB5", "3D6.15341BBFEF", "3D6.15341BC0A1", "3D6.15341BC0FB", 
    "3D6.15341BC232", "3D6.15341BC301", "3D6.15341BC475", "3D6.15341BC60F", 
    "3D6.15341BC9D8", "3D6.15341BCB9A", "3D6.15341BCBFE", "3D6.15341BCF0C", 
    "3D6.15341BCF8A", "3D6.15341BD0D4", "3D6.15341BD291", "3D6.15341BD531", 
    "3D6.15341BD71B", "3D6.15341BDE9F", "3D6.15341BDF75", "3D6.15341BE2C4", 
    "3D6.15341BE5B6", "3D6.15341BE8C3", "3D6.15341BEBB7", "3D6.15341BF00C", 
    "3D6.15341BF0EF", "3D6.15341BF1FD", "3D6.15341BF4E3", "3D6.15341BF6C8", 
    "3D6.15341BF8F1", "3D6.15341BF949", "3D6.15341BFA21", "3D6.15341BFBF2", 
    "3D6.15341BFE8F", "3D6.15341BFF55", "3D6.15341BFFF8", "3D6.15341C0083", 
    "3D6.15343A5AF3", "3D6.15343A5B9A", "3D6.15343A5DB5", "3D6.15343A5F31", 
    "3D6.15343A6078", "3D6.15343A6589", "3D6.15343A658F", "3D6.15343A66A8", 
    "3D6.15343A66CA", "3D6.15343A677A", "3D6.15343A67C9", "3D6.15343A6824", 
    "3D6.15343A6829", "3D6.15343A6847", "3D6.15343A6848", "3D6.15343A688B", 
    "3D6.15343A6AFB", "3D6.15343A6E01", "3D6.15343A6E12", "3D6.15343A6E56", 
    "3D6.15343A6F23", "3D6.15343A71A8", "3D6.15343A71B0", "3D6.15343A71B6", 
    "3D6.15343A71BF", "3D6.15343A71C3", "3D6.15343A71D0", "3D6.15343A71E0", 
    "3D6.15343A7548", "3D6.15343A757C", "3D6.15343A75A6", "3D6.15343A75BB", 
    "3D6.15343A766B", "3D6.15343A7736", "3D6.15343A773B", "3D6.15343A7CC8", 
    "3D6.15343A7CF5", "3D6.15343A7DD8", "3D6.15343A8073", "3D6.15343A8317", 
    "3D6.15343A831D", "3D6.15343A832E", "3D6.15343A980B"), .rows = structure(list(
        1L, 2L, 3:4, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
        14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 
        25L, 26L, 27L, 28L, 29L, 30L, 31L, 32:33, 34L, 35L, 36L, 
        37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 
        48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 
        59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 
        70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 
        81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 
        92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -98L), .drop = TRUE))

CodePudding user response:

You can use plyr::round_any function:

library(dplyr)

df |> 
  mutate(distance_class = plyr::round_any(distance,10, f = ceiling))

Output:

   HEX_Tag_ID     Lengteklasse distance distance_class
   <chr>          <fct>           <dbl>          <dbl>
 1 3D6.153413ECBC 9                8.10             10
 2 3D6.153413ECE0 10              12.4              20
 3 3D6.153413EF72 10              61.3              70
 4 3D6.153413EF72 10              58.5              60
 5 3D6.15341B9871 10              32.3              40
 6 3D6.15341B9B1D 10              89.4              90
 7 3D6.15341B9B36 9               28.0              30
 8 3D6.15341BA2E5 7               47.8              50
 9 3D6.15341BA3BA 8                9.06             10
10 3D6.15341BA4AA 8               42.4              50
  • Related