count all values that's greater than 0.8 within the same column-CodePudding

This is the data:

tmp <- structure(list(id = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), perc = c(1, 0.996059244, 
1.001618656, 0.997829726, 0.915108419, 0.92539353, 0.047364961, 
0.017640387, 0.036429124, 0.045454043, -0.006316982, 0.025515999, 
-0.011243315, 0.00177058, 0.083566957, 0.018730071, 10.893466942, 
0.635321677, 0.486662427, 0.421685776, 0.398957515, 0.397167489, 
0.406132297, 0.42004932, 0.436092126, 0.451437885, 0.469267847, 
0.484069249, 0.496775105, 0.510017824, 0.517280558, 1, 0.970310965, 
0.920600257, 0.801496781, 0.635352677, 0.351879201, 0.133918706, 
0.020005058, -0.003554937, -0.027281619, -0.030216871, -0.035568669, 
-0.018927467, -0.054635806, -0.023042942, -0.049607356, 1), breakpoint = c(1, 
1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 1)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -48L), groups = structure(list(id = c(1, 
2, 3), .rows = structure(list(1:16, 17:32, 33:48), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))

I want to count the values greater than 0.8 and record it to a new column, group by ID, so far I'm able to use ifelse to get 0 and 1 and store in another column, but get stuck after that... What I tried:

tmp <- tmp %>%
  dplyr::group_by(id)%>%
  dplyr::mutate(breakpoint = ifelse(perc >= 0.8, 1,0))

This is my desired out:

structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L), percentage = c(1, 0.996059244, 1.001618656, 
0.997829726, 0.915108419, 0.92539353, 0.047364961, 0.017640387, 
0.036429124, 0.045454043, -0.006316982, 0.025515999, -0.011243315, 
0.00177058, 0.083566957, 0.018730071, 1, 0.893466942, 0.635321677, 
0.486662427, 0.421685776, 0.398957515, 0.397167489, 0.406132297, 
0.42004932, 0.436092126, 0.451437885, 0.469267847, 0.484069249, 
0.496775105, 0.510017824, 0.517280558, 1, 0.970310965, 0.920600257, 
0.801496781, 0.635352677, 0.351879201, 0.133918706, 0.020005058, 
-0.003554937, -0.027281619, -0.030216871, -0.035568669, -0.018927467, 
-0.054635806, -0.023042942, -0.049607356), breakpoint = c(NA, 
NA, NA, NA, NA, 6L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
2L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, 
-48L))

Can someone also tell me how to display the output as code? Only know to use ctrl k...Thanks!

CodePudding user response：

tmp %>%
  group_by(id) %>%
  mutate(res = cumsum(breakpoint)*breakpoint,
         res = res * NA ^(res != max(res)))

  id         perc breakpoint res
1   1  1.000000000          1  NA
2   1  0.996059244          1  NA
3   1  1.001618656          1  NA
4   1  0.997829726          1  NA
5   1  0.915108419          1  NA
6   1  0.925393530          1   6
7   1  0.047364961          0  NA
8   1  0.017640387          0  NA
9   1  0.036429124          0  NA
10  1  0.045454043          0  NA
11  1 -0.006316982          0  NA
12  1  0.025515999          0  NA
13  1 -0.011243315          0  NA
14  1  0.001770580          0  NA
15  1  0.083566957          0  NA
16  1  0.018730071          0  NA
17  2 10.893466942          1  NA
18  2  0.635321677          0  NA
19  2  0.486662427          0  NA
20  2  0.421685776          0  NA
21  2  0.398957515          0  NA
22  2  0.397167489          0  NA
23  2  0.406132297          0  NA
24  2  0.420049320          0  NA
25  2  0.436092126          0  NA
26  2  0.451437885          0  NA
27  2  0.469267847          0  NA
28  2  0.484069249          0  NA
29  2  0.496775105          0  NA
30  2  0.510017824          0  NA
31  2  0.517280558          0  NA
32  2  1.000000000          1   2
33  3  0.970310965          1  NA
34  3  0.920600257          1  NA
35  3  0.801496781          1  NA
36  3  0.635352677          0  NA
37  3  0.351879201          0  NA
38  3  0.133918706          0  NA
39  3  0.020005058          0  NA
40  3 -0.003554937          0  NA
41  3 -0.027281619          0  NA
42  3 -0.030216871          0  NA
43  3 -0.035568669          0  NA
44  3 -0.018927467          0  NA
45  3 -0.054635806          0  NA
46  3 -0.023042942          0  NA
47  3 -0.049607356          0  NA
48  3  1.000000000          1   4
>

CodePudding user response：

tmp %>%
    dplyr::group_by(id)%>%
    dplyr::summarise(new = sum(perc >= 0.8))

     id   new
  <dbl> <int>
1     1     6
2     2     2
3     3     4

CodePudding user response：

I'm pretty not sure about your purpose, and please note that in id 2, perc is 10.9 then .635 which is smaller than 0.8, so maybe count you want is 1.(?)

Please check this out with dput object(tmp) you provided.

tmp %>%
  group_by(id) %>%
  mutate(key = (cumsum(perc < 0.8) == 0) * (perc > 0.8)) %>%
  summarize(key = sum(key))

     id   key
  <dbl> <int>
1     1     6
2     2     1
3     3     3