Home > database >  Get cumulative proportion over time for a variable
Get cumulative proportion over time for a variable

Time:10-01

I am trying to get the cumulative proportion of a variable (Gender; sf_sex) across time (startday; Days).

My code is the following

with(df,
 cbind( Freq=table(startday, sf_sex),
        Cumul =cumsum(table(startday, sf_sex)),
        relative = prop.table(table(startday, sf_sex),1)))

What I do get is somehow correct but not exactly as I would like to see the cumulative proportion for every day and I get the proportion of simply, the every day cases. So, put it in another way - the cases for the variable sum every day with all the previous ones and having the proportion of that rather than only the proportion of a day without considering the other ones.

Here is the data

df <- structure(list(sf_sex = c("Female", "Male", "Male", "Female", 
"Male", "Female", "Female", "Female", "Male", "Male", "Female", 
"Male", "Male", "Male", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Male", "Male", "Male", "Male", 
"Female", "Female", "Male", "Male", "Female", "Male", "Male", 
"Male", "Male", "Male", "Male", "Female", "Male", "Female", "Female", 
"Male", "Male", "Male", "Female", "Male", "Male", "Male", "Female", 
"Male", "Male", "Female", "Female", "Female", "Female", "Male", 
"Female", "Female", "Female", "Female", "Male", "Female", "Female", 
"Male", "Female", "Female", "Male", "Female", "Female", "Male", 
"Female", "Male", "Female", "Male", "Female", "Male", "Male", 
"Male", "Male", "Female", "Female", "Male", "Female", "Male", 
"Female", "Female", "Female", "Male", "Female", "Male", "Male", 
"Female", "Female", "Male", "Female", "Female", "Female", "Male", 
"Male", "Female", "Male", "Female", "Female", "Female", "Female", 
"Male", "Female", "Male", "Female", "Female", "Female", "Female", 
"Female", "Female", "Male", "Male", "Female", "Male", "Male", 
"Female", "Male", "Male", "Male", "Female", "Male", "Male", "Male", 
"Female", "Male", "Male", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Male", "Female", 
"Male", "Male", "Female", "Female", "Female", "Female", "Male", 
"Male", "Male", "Female", "Female", "Female", "Male", "Female", 
"Male", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Male", "Male", "Male", "Female", "Male", "Female", 
"Female", "Male", "Female", "Male", "Male", "Male", "Male", "Female", 
"Male", "Female", "Female", "Male", "Male", "Female", "Female", 
"Female", "Female", "Male", "Female", "Female", "Male", "Male", 
"Female", "Male", "Female", "Female", "Male", "Male", "Female", 
"Female", "Male", "Female"), startday = c("05/27/2019", "06/12/2019", 
"05/20/2019", "06/16/2019", "05/25/2019", "05/20/2019", "06/16/2019", 
"05/20/2019", "05/20/2019", "05/20/2019", "05/25/2019", "05/20/2019", 
"05/21/2019", "06/28/2019", "05/27/2019", "05/20/2019", "05/21/2019", 
"05/20/2019", "05/24/2019", "05/20/2019", "06/19/2019", "05/23/2019", 
"05/20/2019", "05/20/2019", "07/04/2019", "06/14/2019", "06/08/2019", 
"06/03/2019", "06/13/2019", "06/04/2019", "06/15/2019", "05/20/2019", 
"06/17/2019", "06/03/2019", "06/03/2019", "06/11/2019", "05/20/2019", 
"06/04/2019", "05/22/2019", "05/20/2019", "05/20/2019", "06/08/2019", 
"05/20/2019", "05/21/2019", "05/20/2019", "05/27/2019", "05/27/2019", 
"05/22/2019", "05/21/2019", "05/21/2019", "06/18/2019", "06/05/2019", 
"05/20/2019", "05/20/2019", "06/09/2019", "05/28/2019", "05/28/2019", 
"06/24/2019", "05/28/2019", "05/27/2019", "06/05/2019", "05/20/2019", 
"06/10/2019", "05/20/2019", "05/30/2019", "05/21/2019", "05/20/2019", 
"05/20/2019", "05/31/2019", "06/16/2019", "05/28/2019", "05/25/2019", 
"06/04/2019", "06/02/2019", "06/04/2019", "06/05/2019", "06/03/2019", 
"05/25/2019", "05/26/2019", "05/28/2019", "06/30/2019", "05/21/2019", 
"06/03/2019", "05/21/2019", "06/16/2019", "06/04/2019", "05/24/2019", 
"06/04/2019", "05/31/2019", "06/06/2019", "05/27/2019", "05/30/2019", 
"06/01/2019", "06/06/2019", "05/20/2019", "05/22/2019", "06/15/2019", 
"06/03/2019", "05/21/2019", "06/18/2019", "06/27/2019", "05/21/2019", 
"05/30/2019", "05/22/2019", "05/25/2019", "06/06/2019", "06/05/2019", 
"06/09/2019", "05/23/2019", "05/21/2019", "06/15/2019", "06/14/2019", 
"05/20/2019", "06/15/2019", "05/24/2019", "05/22/2019", "05/20/2019", 
"05/23/2019", "05/21/2019", "05/24/2019", "05/22/2019", "06/22/2019", 
"06/06/2019", "05/20/2019", "05/20/2019", "05/28/2019", "05/23/2019", 
"05/20/2019", "06/04/2019", "06/21/2019", "06/26/2019", "05/24/2019", 
"05/22/2019", "06/05/2019", "06/06/2019", "05/23/2019", "05/26/2019", 
"05/26/2019", "06/04/2019", "07/02/2019", "05/20/2019", "05/20/2019", 
"05/20/2019", "06/04/2019", "05/20/2019", "05/23/2019", "06/05/2019", 
"05/20/2019", "05/27/2019", "05/24/2019", "06/13/2019", "06/21/2019", 
"06/10/2019", "05/20/2019", "06/13/2019", "06/05/2019", "05/23/2019", 
"06/04/2019", "05/20/2019", "05/20/2019", "05/28/2019", "06/29/2019", 
"06/17/2019", "06/03/2019", "06/03/2019", "06/02/2019", "06/04/2019", 
"05/26/2019", "06/03/2019", "06/22/2019", "05/20/2019", "06/03/2019", 
"05/22/2019", "05/23/2019", "06/03/2019", "05/29/2019", "05/28/2019", 
"07/02/2019", "06/07/2019", "06/21/2019", "06/01/2019", "06/03/2019", 
"05/21/2019", "05/30/2019", "06/07/2019", "06/04/2019", "05/20/2019", 
"06/17/2019", "06/06/2019", "05/20/2019", "05/26/2019", "05/21/2019", 
"06/03/2019", "06/03/2019", "06/04/2019", "06/16/2019", "05/22/2019", 
"07/03/2019", "05/20/2019", "05/21/2019")), row.names = c("16573", 
"2114", "17632", "14249", "5169", "20505", "22268", "11519", 
"8500", "14597", "15830", "5964", "24088", "6259", "1345", "24484", 
"10225", "24985", "12584", "19627", "24278", "10814", "10559", 
"18905", "23512", "17167", "11760", "23947", "1003", "16229", 
"15348", "14627", "24806", "12419", "18681", "13595", "9228", 
"16354", "18363", "20331", "21097", "23457", "22440", "9176", 
"9862", "3682", "13423", "25536", "20187", "8217", "6137", "8648", 
"6552", "4859", "18014", "9015", "2624", "12437", "23884", "20404", 
"3365", "3291", "23731", "15372", "24447", "11009", "16533", 
"22990", "22145", "25122", "8335", "7527", "16011", "16865", 
"12429", "4709", "3269", "19721", "5001", "11731", "7933", "18174", 
"16398", "3432", "2890", "5792", "4057", "15877", "20939", "15928", 
"15896", "23313", "9982", "20427", "4510", "14587", "17223", 
"7665", "14281", "594", "7076", "23310", "609", "20217", "8099", 
"18773", "4546", "10367", "18237", "22110", "9658", "6909", "12047", 
"24545", "5082", "13545", "8783", "10961", "20754", "7086", "15179", 
"4822", "20599", "23359", "23749", "22878", "11783", "11278", 
"3232", "19277", "18045", "1862", "1503", "19801", "18922", "22789", 
"3673", "12472", "14335", "17846", "1269", "9050", "15449", "20114", 
"9692", "5020", "24011", "10208", "411", "2741", "10972", "23409", 
"17518", "12153", "16689", "22623", "13604", "16199", "12978", 
"24524", "20858", "21581", "20166", "18741", "7929", "22840", 
"14782", "16208", "11057", "5126", "9278", "10843", "19346", 
"8898", "15046", "12816", "1714", "375", "23216", "8672", "11015", 
"2847", "5564", "23642", "5673", "20655", "4787", "9709", "4399", 
"11853", "6448", "7210", "2195", "1176", "4342", "5421", "12508", 
"23105", "1505", "17312"), class = "data.frame")

Thanks in advance

CodePudding user response:

If I understood correctly

library(reshape2)
df2=dcast(df,startday~sf_sex,length)
df2[,c("FemaleC","MaleC")]=cumsum(df2[,c("Female","Male")])
df2[,c("FemaleC","MaleC")]/rowSums(df2[,c("FemaleC","MaleC")])

which would look like this (first 10 rows)

     startday Female Male FemaleC MaleC   FemaleP     MaleP
1  05/20/2019     22   19      22    19 0.5365854 0.4634146
2  05/21/2019     10    5      32    24 0.5714286 0.4285714
3  05/22/2019      7    2      39    26 0.6000000 0.4000000
4  05/23/2019      5    3      44    29 0.6027397 0.3972603
5  05/24/2019      3    3      47    32 0.5949367 0.4050633
6  05/25/2019      2    3      49    35 0.5833333 0.4166667
7  05/26/2019      5    0      54    35 0.6067416 0.3932584
8  05/27/2019      3    4      57    39 0.5937500 0.4062500
9  05/28/2019      5    3      62    42 0.5961538 0.4038462
10 05/29/2019      1    0      63    42 0.6000000 0.4000000
  •  Tags:  
  • r
  • Related