I would like to calculate the likelihood that the two measurements are exactly equivalent. The measurements are taken on the same day by two different individuals separated by 10 minutes. The day starts at 4am (act1) and ends at 3.50am (act144). At each time-step, different measurements were taken by 2 individuals. People are identified by serial and pnum numbers. If the measurements taken on the same day (DiaryDateAct) are identical, I would like to replace it with 10; otherwise, I'd like to replace it with 0.
At the end of the same day, I would like to determine that the total sum of the measurements is identical.
Such as the desired output:
serial(11011202)_DiaryDateAct(14/12/2014)_110 = act1_1 - act1_15 = 150
.
serial(11011202)_DiaryDateAct(14/12/2014)_7330 = act1_79 - act1_86 = 80
....an so on
How can I do this for n number of id's (serial(11011202)_DiaryDateAct(14/12/2014)) and i number of measurements (eg. 110, 7330)
For example:
Data structure
Data sample:
structure(list(serial = structure(c(11011202, 11011202, 11011202,
11011202), label = "Household number", format.stata = ".0g"),
pnum = structure(c(1, 4, 4, 1), label = "Person number", format.stata = "%8.0g"),
daynum = structure(c(1, 1, 2, 2), label = "Diary number", format.stata = "%8.0g"),
DiaryDate_Act = structure(c(16415, 16415, 16418, 16418), label = "Diary Day - ACTUAL DATE", class = "Date", format.stata = "%tdD_m_Y"),
act1_1 = structure(c(110, 110, 110, 110), label = "Primary activity: 04:00-04:10", format.stata = "%8.0g"),
act1_2 = structure(c(110, 110, 110, 110), label = "Primary activity: 04:10-04:20", format.stata = "%8.0g"),
act1_3 = structure(c(110, 110, 110, 110), label = "Primary activity: 04:20-04:30", format.stata = "%8.0g"),
act1_4 = structure(c(110, 110, 110, 110), label = "Primary activity: 04:30-04:40", format.stata = "%8.0g"),
act1_5 = structure(c(110, 110, 110, 110), label = "Primary activity: 04:40-04:50", format.stata = "%8.0g"),
act1_6 = structure(c(110, 110, 110, 110), label = "Primary activity: 04:50-05:00", format.stata = "%8.0g"),
act1_7 = structure(c(110, 110, 110, 110), label = "Primary activity: 05:00-05:10", format.stata = "%8.0g"),
act1_8 = structure(c(110, 110, 110, 110), label = "Primary activity: 05:10-05:20", format.stata = "%8.0g"),
act1_9 = structure(c(110, 110, 110, 110), label = "Primary activity: 05:20-05:30", format.stata = "%8.0g"),
act1_10 = structure(c(110, 110, 110, 110), label = "Primary activity: 05:30-05:40", format.stata = "%8.0g"),
act1_11 = structure(c(110, 110, 110, 110), label = "Primary activity: 05:40-05:50", format.stata = "%8.0g"),
act1_12 = structure(c(8219, 110, 110, 110), label = "Primary activity: 05:50-06:00", format.stata = "%8.0g"),
act1_13 = structure(c(310, 110, 110, 110), label = "Primary activity: 06:00-06:10", format.stata = "%8.0g"),
act1_14 = structure(c(3210, 110, 110, 110), label = "Primary activity: 06:10-06:20", format.stata = "%8.0g"),
act1_15 = structure(c(3110, 110, 110, 110), label = "Primary activity: 06:20-06:30", format.stata = "%8.0g"),
act1_16 = structure(c(7241, 110, 110, 111), label = "Primary activity: 06:30-06:40", format.stata = "%8.0g"),
act1_17 = structure(c(210, 110, 110, 310), label = "Primary activity: 06:40-06:50", format.stata = "%8.0g"),
act1_18 = structure(c(3819, 110, 110, 3110), label = "Primary activity: 06:50-07:00", format.stata = "%8.0g"),
act1_19 = structure(c(210, 110, 110, 3310), label = "Primary activity: 07:00-07:10", format.stata = "%8.0g"),
act1_20 = structure(c(210, 310, 110, 3310), label = "Primary activity: 07:10-07:20", format.stata = "%8.0g"),
act1_21 = structure(c(210, 210, 110, 210), label = "Primary activity: 07:20-07:30", format.stata = "%8.0g"),
act1_22 = structure(c(3310, 310, 110, 210), label = "Primary activity: 07:30-07:40", format.stata = "%8.0g"),
act1_23 = structure(c(3210, 310, 110, 210), label = "Primary activity: 07:40-07:50", format.stata = "%8.0g"),
act1_24 = structure(c(3210, 9210, 110, 210), label = "Primary activity: 07:50-08:00", format.stata = "%8.0g"),
act1_25 = structure(c(3210, 9210, 110, 3310), label = "Primary activity: 08:00-08:10", format.stata = "%8.0g"),
act1_26 = structure(c(3210, 9210, 110, 3310), label = "Primary activity: 08:10-08:20", format.stata = "%8.0g"),
act1_27 = structure(c(3110, 9210, 110, 7241), label = "Primary activity: 08:20-08:30", format.stata = "%8.0g"),
act1_28 = structure(c(3110, 9210, 110, 7241), label = "Primary activity: 08:30-08:40", format.stata = "%8.0g"),
act1_29 = structure(c(3110, 9210, 110, 7241), label = "Primary activity: 08:40-08:50", format.stata = "%8.0g"),
act1_30 = structure(c(3110, 9210, 110, 7241), label = "Primary activity: 08:50-09:00", format.stata = "%8.0g"),
act1_31 = structure(c(7259, 2110, 111, 3430), label = "Primary activity: 09:00-09:10", format.stata = "%8.0g"),
act1_32 = structure(c(5140, 2110, 111, 3430), label = "Primary activity: 09:10-09:20", format.stata = "%8.0g"),
act1_33 = structure(c(5140, 2110, 111, 3210), label = "Primary activity: 09:20-09:30", format.stata = "%8.0g"),
act1_34 = structure(c(5140, 2110, 3110, 3210), label = "Primary activity: 09:30-09:40", format.stata = "%8.0g"),
act1_35 = structure(c(5140, 2110, 7330, 3210), label = "Primary activity: 09:40-09:50", format.stata = "%8.0g"),
act1_36 = structure(c(5140, 2110, 7330, 3210), label = "Primary activity: 09:50-10:00", format.stata = "%8.0g"),
act1_37 = structure(c(7259, 2110, 7330, 210), label = "Primary activity: 10:00-10:10", format.stata = "%8.0g"),
act1_38 = structure(c(7259, 2110, 7330, 5110), label = "Primary activity: 10:10-10:20", format.stata = "%8.0g"),
act1_39 = structure(c(7259, 2110, 7330, 3210), label = "Primary activity: 10:20-10:30", format.stata = "%8.0g"),
act1_40 = structure(c(7259, 2110, 7330, 3210), label = "Primary activity: 10:30-10:40", format.stata = "%8.0g"),
act1_41 = structure(c(7259, 2110, 7330, 5110), label = "Primary activity: 10:40-10:50", format.stata = "%8.0g"),
act1_42 = structure(c(7259, 2110, 7330, 5110), label = "Primary activity: 10:50-11:00", format.stata = "%8.0g"),
act1_43 = structure(c(3110, 2110, 8212, 5110), label = "Primary activity: 11:00-11:10", format.stata = "%8.0g"),
act1_44 = structure(c(5140, 2110, 8212, 5120), label = "Primary activity: 11:10-11:20", format.stata = "%8.0g"),
act1_45 = structure(c(5140, 2110, 8212, 5120), label = "Primary activity: 11:20-11:30", format.stata = "%8.0g"),
act1_46 = structure(c(8120, 2110, 8212, 5120), label = "Primary activity: 11:30-11:40", format.stata = "%8.0g"),
act1_47 = structure(c(8120, 2110, 8212, 5120), label = "Primary activity: 11:40-11:50", format.stata = "%8.0g"),
act1_48 = structure(c(8120, 2110, 8212, 9360), label = "Primary activity: 11:50-12:00", format.stata = "%8.0g"),
act1_49 = structure(c(8120, 2110, 8212, 3610), label = "Primary activity: 12:00-12:10", format.stata = "%8.0g"),
act1_50 = structure(c(8120, 2110, 8212, 3611), label = "Primary activity: 12:10-12:20", format.stata = "%8.0g"),
act1_51 = structure(c(8120, 2110, 8212, 3611), label = "Primary activity: 12:20-12:30", format.stata = "%8.0g"),
act1_52 = structure(c(8120, 2110, 8212, 3611), label = "Primary activity: 12:30-12:40", format.stata = "%8.0g"),
act1_53 = structure(c(8120, 2110, 8212, 3611), label = "Primary activity: 12:40-12:50", format.stata = "%8.0g"),
act1_54 = structure(c(8120, 2110, 8212, 3611), label = "Primary activity: 12:50-13:00", format.stata = "%8.0g"),
act1_55 = structure(c(210, 2110, 3610, 3611), label = "Primary activity: 13:00-13:10", format.stata = "%8.0g"),
act1_56 = structure(c(210, 2110, 3610, 210), label = "Primary activity: 13:10-13:20", format.stata = "%8.0g"),
act1_57 = structure(c(210, 210, 3610, 210), label = "Primary activity: 13:20-13:30", format.stata = "%8.0g"),
act1_58 = structure(c(210, 210, 3610, 210), label = "Primary activity: 13:30-13:40", format.stata = "%8.0g"),
act1_59 = structure(c(210, 210, 3610, 210), label = "Primary activity: 13:40-13:50", format.stata = "%8.0g"),
act1_60 = structure(c(210, 210, 3610, 210), label = "Primary activity: 13:50-14:00", format.stata = "%8.0g"),
act1_61 = structure(c(9360, 210, 3610, 3610), label = "Primary activity: 14:00-14:10", format.stata = "%8.0g"),
act1_62 = structure(c(9360, 210, 3610, 3610), label = "Primary activity: 14:10-14:20", format.stata = "%8.0g"),
act1_63 = structure(c(9360, 210, 3610, 9360), label = "Primary activity: 14:20-14:30", format.stata = "%8.0g"),
act1_64 = structure(c(9360, 2110, 3610, 9360), label = "Primary activity: 14:30-14:40", format.stata = "%8.0g"),
act1_65 = structure(c(9360, 2110, 3610, 3240), label = "Primary activity: 14:40-14:50", format.stata = "%8.0g"),
act1_66 = structure(c(9360, 2110, 3610, 3240), label = "Primary activity: 14:50-15:00", format.stata = "%8.0g"),
act1_67 = structure(c(3610, 2110, 3610, 3240), label = "Primary activity: 15:00-15:10", format.stata = "%8.0g"),
act1_68 = structure(c(3610, 2110, 3610, 3110), label = "Primary activity: 15:10-15:20", format.stata = "%8.0g"),
act1_69 = structure(c(3610, 2110, 3610, 3110), label = "Primary activity: 15:20-15:30", format.stata = "%8.0g"),
act1_70 = structure(c(3610, 9210, 3610, 3110), label = "Primary activity: 15:30-15:40", format.stata = "%8.0g"),
act1_71 = structure(c(3610, 9210, 3610, 3110), label = "Primary activity: 15:40-15:50", format.stata = "%8.0g"),
act1_72 = structure(c(3610, 9210, 3610, 3110), label = "Primary activity: 15:50-16:00", format.stata = "%8.0g"),
act1_73 = structure(c(3240, 9210, 3610, 3110), label = "Primary activity: 16:00-16:10", format.stata = "%8.0g"),
act1_74 = structure(c(3240, 9210, 3610, 3110), label = "Primary activity: 16:10-16:20", format.stata = "%8.0g"),
act1_75 = structure(c(3110, 9210, 3610, 3110), label = "Primary activity: 16:20-16:30", format.stata = "%8.0g"),
act1_76 = structure(c(3110, 9210, 3240, 3110), label = "Primary activity: 16:30-16:40", format.stata = "%8.0g"),
act1_77 = structure(c(3110, 9210, 3240, 3110), label = "Primary activity: 16:40-16:50", format.stata = "%8.0g"),
act1_78 = structure(c(3110, 9210, 3240, 3110), label = "Primary activity: 16:50-17:00", format.stata = "%8.0g"),
act1_79 = structure(c(7220, 7330, 7330, 3110), label = "Primary activity: 17:00-17:10", format.stata = "%8.0g"),
act1_80 = structure(c(7220, 7330, 7330, 5310), label = "Primary activity: 17:10-17:20", format.stata = "%8.0g"),
act1_81 = structure(c(7220, 7330, 7330, 5310), label = "Primary activity: 17:20-17:30", format.stata = "%8.0g"),
act1_82 = structure(c(7220, 7330, 7330, 5310), label = "Primary activity: 17:30-17:40", format.stata = "%8.0g"),
act1_83 = structure(c(7220, 7330, 7330, 5310), label = "Primary activity: 17:40-17:50", format.stata = "%8.0g"),
act1_84 = structure(c(7220, 7330, 7330, 5310), label = "Primary activity: 17:50-18:00", format.stata = "%8.0g"),
act1_85 = structure(c(210, 7330, 7330, 3110), label = "Primary activity: 18:00-18:10", format.stata = "%8.0g"),
act1_86 = structure(c(210, 7330, 7330, 3110), label = "Primary activity: 18:10-18:20", format.stata = "%8.0g"),
act1_87 = structure(c(210, 2120, 7330, 210), label = "Primary activity: 18:20-18:30", format.stata = "%8.0g"),
act1_88 = structure(c(210, 2120, 7330, 210), label = "Primary activity: 18:30-18:40", format.stata = "%8.0g"),
act1_89 = structure(c(210, 2120, 2120, 210), label = "Primary activity: 18:40-18:50", format.stata = "%8.0g"),
act1_90 = structure(c(8210, 2120, 2120, 210), label = "Primary activity: 18:50-19:00", format.stata = "%8.0g"),
act1_91 = structure(c(8210, 210, 310, 3240), label = "Primary activity: 19:00-19:10", format.stata = "%8.0g"),
act1_92 = structure(c(8210, 210, 8212, 3240), label = "Primary activity: 19:10-19:20", format.stata = "%8.0g"),
act1_93 = structure(c(8210, 210, 8212, 3240), label = "Primary activity: 19:20-19:30", format.stata = "%8.0g"),
act1_94 = structure(c(8210, 8210, 8212, 3240), label = "Primary activity: 19:30-19:40", format.stata = "%8.0g"),
act1_95 = structure(c(8210, 8210, 8212, 3240), label = "Primary activity: 19:40-19:50", format.stata = "%8.0g"),
act1_96 = structure(c(8210, 8210, 8212, 3240), label = "Primary activity: 19:50-20:00", format.stata = "%8.0g"),
act1_97 = structure(c(3130, 8210, 8212, 3130), label = "Primary activity: 20:00-20:10", format.stata = "%8.0g"),
act1_98 = structure(c(3130, 8210, 8212, 3130), label = "Primary activity: 20:10-20:20", format.stata = "%8.0g"),
act1_99 = structure(c(3130, 8210, 8212, 3130), label = "Primary activity: 20:20-20:30", format.stata = "%8.0g"),
act1_100 = structure(c(3130, 3110, 8212, 8210), label = "Primary activity: 20:30-20:40", format.stata = "%8.0g"),
act1_101 = structure(c(3130, 210, 8212, 8210), label = "Primary activity: 20:40-20:50", format.stata = "%8.0g"),
act1_102 = structure(c(5110, 2120, 8212, 8210), label = "Primary activity: 20:50-21:00", format.stata = "%8.0g"),
act1_103 = structure(c(7220, 2120, 8212, 8210), label = "Primary activity: 21:00-21:10", format.stata = "%8.0g"),
act1_104 = structure(c(7220, 2120, 5140, 8210), label = "Primary activity: 21:10-21:20", format.stata = "%8.0g"),
act1_105 = structure(c(7220, 2120, 5140, 8210), label = "Primary activity: 21:20-21:30", format.stata = "%8.0g"),
act1_106 = structure(c(7220, 310, 5140, 8210), label = "Primary activity: 21:30-21:40", format.stata = "%8.0g"),
act1_107 = structure(c(7220, 8210, 5140, 8210), label = "Primary activity: 21:40-21:50", format.stata = "%8.0g"),
act1_108 = structure(c(7220, 8210, 5140, 8210), label = "Primary activity: 21:50-22:00", format.stata = "%8.0g"),
act1_109 = structure(c(5110, 310, 110, 310), label = "Primary activity: 22:00-22:10", format.stata = "%8.0g"),
act1_110 = structure(c(5110, 110, 110, 310), label = "Primary activity: 22:10-22:20", format.stata = "%8.0g"),
act1_111 = structure(c(5110, 110, 110, 310), label = "Primary activity: 22:20-22:30", format.stata = "%8.0g"),
act1_112 = structure(c(5110, 110, 110, 310), label = "Primary activity: 22:30-22:40", format.stata = "%8.0g"),
act1_113 = structure(c(5110, 110, 110, 310), label = "Primary activity: 22:40-22:50", format.stata = "%8.0g"),
act1_114 = structure(c(5110, 110, 110, 310), label = "Primary activity: 22:50-23:00", format.stata = "%8.0g"),
act1_115 = structure(c(5110, 110, 110, 110), label = "Primary activity: 23:00-23:10", format.stata = "%8.0g"),
act1_116 = structure(c(5110, 110, 110, 110), label = "Primary activity: 23:10-23:20", format.stata = "%8.0g"),
act1_117 = structure(c(5110, 110, 110, 110), label = "Primary activity: 23:20-23:30", format.stata = "%8.0g"),
act1_118 = structure(c(5110, 110, 110, 110), label = "Primary activity: 23:30-23:40", format.stata = "%8.0g"),
act1_119 = structure(c(7220, 110, 110, 110), label = "Primary activity: 23:40-23:50", format.stata = "%8.0g"),
act1_120 = structure(c(7220, 110, 110, 110), label = "Primary activity: 23:50-00:00", format.stata = "%8.0g"),
act1_121 = structure(c(7220, 110, 110, 110), label = "Primary activity: 00:00-00:10", format.stata = "%8.0g"),
act1_122 = structure(c(7220, 110, 110, 110), label = "Primary activity: 00:10-00:20", format.stata = "%8.0g"),
act1_123 = structure(c(7220, 110, 110, 110), label = "Primary activity: 00:20-00:30", format.stata = "%8.0g"),
act1_124 = structure(c(7220, 110, 110, 110), label = "Primary activity: 00:30-00:40", format.stata = "%8.0g"),
act1_125 = structure(c(7220, 110, 110, 110), label = "Primary activity: 00:40-00:50", format.stata = "%8.0g"),
act1_126 = structure(c(7220, 110, 110, 110), label = "Primary activity: 00:50-01:00", format.stata = "%8.0g"),
act1_127 = structure(c(110, 110, 110, 110), label = "Primary activity: 01:00-01:10", format.stata = "%8.0g"),
act1_128 = structure(c(110, 110, 110, 110), label = "Primary activity: 01:10-01:20", format.stata = "%8.0g"),
act1_129 = structure(c(110, 110, 110, 110), label = "Primary activity: 01:20-01:30", format.stata = "%8.0g"),
act1_130 = structure(c(110, 110, 110, 110), label = "Primary activity: 01:30-01:40", format.stata = "%8.0g"),
act1_131 = structure(c(110, 110, 110, 110), label = "Primary activity: 01:40-01:50", format.stata = "%8.0g"),
act1_132 = structure(c(110, 110, 110, 110), label = "Primary activity: 01:50-02:00", format.stata = "%8.0g"),
act1_133 = structure(c(110, 110, 110, 110), label = "Primary activity: 02:00-02:10", format.stata = "%8.0g"),
act1_134 = structure(c(110, 110, 110, 110), label = "Primary activity: 02:10-02:20", format.stata = "%8.0g"),
act1_135 = structure(c(110, 110, 110, 110), label = "Primary activity: 02:20-02:30", format.stata = "%8.0g"),
act1_136 = structure(c(110, 110, 110, 110), label = "Primary activity: 02:30-02:40", format.stata = "%8.0g"),
act1_137 = structure(c(110, 110, 110, 110), label = "Primary activity: 02:40-02:50", format.stata = "%8.0g"),
act1_138 = structure(c(110, 110, 110, 110), label = "Primary activity: 02:50-03:00", format.stata = "%8.0g"),
act1_139 = structure(c(110, 110, 110, 110), label = "Primary activity: 03:00-03:10", format.stata = "%8.0g"),
act1_140 = structure(c(110, 110, 110, 110), label = "Primary activity: 03:10-03:20", format.stata = "%8.0g"),
act1_141 = structure(c(110, 110, 110, 110), label = "Primary activity: 03:20-03:30", format.stata = "%8.0g"),
act1_142 = structure(c(110, 110, 110, 110), label = "Primary activity: 03:30-03:40", format.stata = "%8.0g"),
act1_143 = structure(c(110, 110, 110, 110), label = "Primary activity: 03:40-03:50", format.stata = "%8.0g"),
act1_144 = structure(c(110, 110, 110, 110), label = "Primary activity: 03:50-04:00", format.stata = "%8.0g")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L))
CodePudding user response:
You can group_by
the date, summarize
using an ifelse
on the diff
of the two rows to assign the 10 or 0, then sum
the resulting columns:
library(tidyverse)
df %>%
group_by(serial, DiaryDate_Act) %>%
summarize(across(starts_with("act"), ~ifelse(diff(.x) == 0, 10, 0))) %>%
mutate(sum = apply(.[grep("^act", names(.))], 1, sum)) %>%
select(-starts_with("act")) %>%
ungroup()
#> # A tibble: 2 x 3
#> serial DiaryDate_Act sum
#> <dbl> <date> <dbl>
#> 1 11011202 2014-12-11 370
#> 2 11011202 2014-12-14 470