I am trying to get the cumulative proportion of a variable (Gender; sf_sex) across time (startday; Days).
My code is the following
with(df,
cbind( Freq=table(startday, sf_sex),
Cumul =cumsum(table(startday, sf_sex)),
relative = prop.table(table(startday, sf_sex),1)))
What I do get is somehow correct but not exactly as I would like to see the cumulative proportion for every day and I get the proportion of simply, the every day cases. So, put it in another way - the cases for the variable sum every day with all the previous ones and having the proportion of that rather than only the proportion of a day without considering the other ones.
Here is the data
df <- structure(list(sf_sex = c("Female", "Male", "Male", "Female",
"Male", "Female", "Female", "Female", "Male", "Male", "Female",
"Male", "Male", "Male", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Male", "Male",
"Female", "Female", "Male", "Male", "Female", "Male", "Male",
"Male", "Male", "Male", "Male", "Female", "Male", "Female", "Female",
"Male", "Male", "Male", "Female", "Male", "Male", "Male", "Female",
"Male", "Male", "Female", "Female", "Female", "Female", "Male",
"Female", "Female", "Female", "Female", "Male", "Female", "Female",
"Male", "Female", "Female", "Male", "Female", "Female", "Male",
"Female", "Male", "Female", "Male", "Female", "Male", "Male",
"Male", "Male", "Female", "Female", "Male", "Female", "Male",
"Female", "Female", "Female", "Male", "Female", "Male", "Male",
"Female", "Female", "Male", "Female", "Female", "Female", "Male",
"Male", "Female", "Male", "Female", "Female", "Female", "Female",
"Male", "Female", "Male", "Female", "Female", "Female", "Female",
"Female", "Female", "Male", "Male", "Female", "Male", "Male",
"Female", "Male", "Male", "Male", "Female", "Male", "Male", "Male",
"Female", "Male", "Male", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Female", "Female", "Male", "Female",
"Male", "Male", "Female", "Female", "Female", "Female", "Male",
"Male", "Male", "Female", "Female", "Female", "Male", "Female",
"Male", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Male", "Male", "Male", "Female", "Male", "Female",
"Female", "Male", "Female", "Male", "Male", "Male", "Male", "Female",
"Male", "Female", "Female", "Male", "Male", "Female", "Female",
"Female", "Female", "Male", "Female", "Female", "Male", "Male",
"Female", "Male", "Female", "Female", "Male", "Male", "Female",
"Female", "Male", "Female"), startday = c("05/27/2019", "06/12/2019",
"05/20/2019", "06/16/2019", "05/25/2019", "05/20/2019", "06/16/2019",
"05/20/2019", "05/20/2019", "05/20/2019", "05/25/2019", "05/20/2019",
"05/21/2019", "06/28/2019", "05/27/2019", "05/20/2019", "05/21/2019",
"05/20/2019", "05/24/2019", "05/20/2019", "06/19/2019", "05/23/2019",
"05/20/2019", "05/20/2019", "07/04/2019", "06/14/2019", "06/08/2019",
"06/03/2019", "06/13/2019", "06/04/2019", "06/15/2019", "05/20/2019",
"06/17/2019", "06/03/2019", "06/03/2019", "06/11/2019", "05/20/2019",
"06/04/2019", "05/22/2019", "05/20/2019", "05/20/2019", "06/08/2019",
"05/20/2019", "05/21/2019", "05/20/2019", "05/27/2019", "05/27/2019",
"05/22/2019", "05/21/2019", "05/21/2019", "06/18/2019", "06/05/2019",
"05/20/2019", "05/20/2019", "06/09/2019", "05/28/2019", "05/28/2019",
"06/24/2019", "05/28/2019", "05/27/2019", "06/05/2019", "05/20/2019",
"06/10/2019", "05/20/2019", "05/30/2019", "05/21/2019", "05/20/2019",
"05/20/2019", "05/31/2019", "06/16/2019", "05/28/2019", "05/25/2019",
"06/04/2019", "06/02/2019", "06/04/2019", "06/05/2019", "06/03/2019",
"05/25/2019", "05/26/2019", "05/28/2019", "06/30/2019", "05/21/2019",
"06/03/2019", "05/21/2019", "06/16/2019", "06/04/2019", "05/24/2019",
"06/04/2019", "05/31/2019", "06/06/2019", "05/27/2019", "05/30/2019",
"06/01/2019", "06/06/2019", "05/20/2019", "05/22/2019", "06/15/2019",
"06/03/2019", "05/21/2019", "06/18/2019", "06/27/2019", "05/21/2019",
"05/30/2019", "05/22/2019", "05/25/2019", "06/06/2019", "06/05/2019",
"06/09/2019", "05/23/2019", "05/21/2019", "06/15/2019", "06/14/2019",
"05/20/2019", "06/15/2019", "05/24/2019", "05/22/2019", "05/20/2019",
"05/23/2019", "05/21/2019", "05/24/2019", "05/22/2019", "06/22/2019",
"06/06/2019", "05/20/2019", "05/20/2019", "05/28/2019", "05/23/2019",
"05/20/2019", "06/04/2019", "06/21/2019", "06/26/2019", "05/24/2019",
"05/22/2019", "06/05/2019", "06/06/2019", "05/23/2019", "05/26/2019",
"05/26/2019", "06/04/2019", "07/02/2019", "05/20/2019", "05/20/2019",
"05/20/2019", "06/04/2019", "05/20/2019", "05/23/2019", "06/05/2019",
"05/20/2019", "05/27/2019", "05/24/2019", "06/13/2019", "06/21/2019",
"06/10/2019", "05/20/2019", "06/13/2019", "06/05/2019", "05/23/2019",
"06/04/2019", "05/20/2019", "05/20/2019", "05/28/2019", "06/29/2019",
"06/17/2019", "06/03/2019", "06/03/2019", "06/02/2019", "06/04/2019",
"05/26/2019", "06/03/2019", "06/22/2019", "05/20/2019", "06/03/2019",
"05/22/2019", "05/23/2019", "06/03/2019", "05/29/2019", "05/28/2019",
"07/02/2019", "06/07/2019", "06/21/2019", "06/01/2019", "06/03/2019",
"05/21/2019", "05/30/2019", "06/07/2019", "06/04/2019", "05/20/2019",
"06/17/2019", "06/06/2019", "05/20/2019", "05/26/2019", "05/21/2019",
"06/03/2019", "06/03/2019", "06/04/2019", "06/16/2019", "05/22/2019",
"07/03/2019", "05/20/2019", "05/21/2019")), row.names = c("16573",
"2114", "17632", "14249", "5169", "20505", "22268", "11519",
"8500", "14597", "15830", "5964", "24088", "6259", "1345", "24484",
"10225", "24985", "12584", "19627", "24278", "10814", "10559",
"18905", "23512", "17167", "11760", "23947", "1003", "16229",
"15348", "14627", "24806", "12419", "18681", "13595", "9228",
"16354", "18363", "20331", "21097", "23457", "22440", "9176",
"9862", "3682", "13423", "25536", "20187", "8217", "6137", "8648",
"6552", "4859", "18014", "9015", "2624", "12437", "23884", "20404",
"3365", "3291", "23731", "15372", "24447", "11009", "16533",
"22990", "22145", "25122", "8335", "7527", "16011", "16865",
"12429", "4709", "3269", "19721", "5001", "11731", "7933", "18174",
"16398", "3432", "2890", "5792", "4057", "15877", "20939", "15928",
"15896", "23313", "9982", "20427", "4510", "14587", "17223",
"7665", "14281", "594", "7076", "23310", "609", "20217", "8099",
"18773", "4546", "10367", "18237", "22110", "9658", "6909", "12047",
"24545", "5082", "13545", "8783", "10961", "20754", "7086", "15179",
"4822", "20599", "23359", "23749", "22878", "11783", "11278",
"3232", "19277", "18045", "1862", "1503", "19801", "18922", "22789",
"3673", "12472", "14335", "17846", "1269", "9050", "15449", "20114",
"9692", "5020", "24011", "10208", "411", "2741", "10972", "23409",
"17518", "12153", "16689", "22623", "13604", "16199", "12978",
"24524", "20858", "21581", "20166", "18741", "7929", "22840",
"14782", "16208", "11057", "5126", "9278", "10843", "19346",
"8898", "15046", "12816", "1714", "375", "23216", "8672", "11015",
"2847", "5564", "23642", "5673", "20655", "4787", "9709", "4399",
"11853", "6448", "7210", "2195", "1176", "4342", "5421", "12508",
"23105", "1505", "17312"), class = "data.frame")
Thanks in advance
CodePudding user response:
If I understood correctly
library(reshape2)
df2=dcast(df,startday~sf_sex,length)
df2[,c("FemaleC","MaleC")]=cumsum(df2[,c("Female","Male")])
df2[,c("FemaleC","MaleC")]/rowSums(df2[,c("FemaleC","MaleC")])
which would look like this (first 10 rows)
startday Female Male FemaleC MaleC FemaleP MaleP
1 05/20/2019 22 19 22 19 0.5365854 0.4634146
2 05/21/2019 10 5 32 24 0.5714286 0.4285714
3 05/22/2019 7 2 39 26 0.6000000 0.4000000
4 05/23/2019 5 3 44 29 0.6027397 0.3972603
5 05/24/2019 3 3 47 32 0.5949367 0.4050633
6 05/25/2019 2 3 49 35 0.5833333 0.4166667
7 05/26/2019 5 0 54 35 0.6067416 0.3932584
8 05/27/2019 3 4 57 39 0.5937500 0.4062500
9 05/28/2019 5 3 62 42 0.5961538 0.4038462
10 05/29/2019 1 0 63 42 0.6000000 0.4000000