I have the following data set:
structure(list(ID = structure(c(1L, 2L, 9L, 10L, 11L, 12L, 14L,
15L, 16L, 19L, 20L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 43L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 55L, 56L, 57L, 58L, 60L, 61L, 62L,
66L, 67L, 68L, 71L, 72L, 73L, 74L, 75L, 81L, 84L, 85L, 93L, 95L,
96L, 104L, 107L, 109L, 116L, 126L, 133L, 135L, 138L, 139L, 146L,
150L, 154L, 164L, 165L, 1L, 2L, 9L, 10L, 11L, 12L, 14L, 15L,
16L, 19L, 20L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 43L, 46L, 47L, 48L,
49L, 50L, 51L, 52L, 53L, 55L, 56L, 57L, 58L, 60L, 61L, 62L, 66L,
67L, 68L, 71L, 72L, 73L, 74L, 75L, 81L, 84L, 85L, 93L, 95L, 96L,
104L, 107L, 109L, 116L, 126L, 133L, 135L, 138L, 139L, 146L, 150L,
154L, 164L, 165L, 1L, 2L, 9L, 10L, 11L, 12L, 14L, 15L, 16L, 19L,
20L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
35L, 36L, 37L, 38L, 39L, 40L, 42L, 43L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 55L, 56L, 57L, 58L, 60L, 61L, 62L, 66L, 67L, 68L,
71L, 72L, 73L, 74L, 75L, 81L, 84L, 85L, 93L, 95L, 96L, 104L,
107L, 109L, 116L, 126L, 133L, 135L, 138L, 139L, 146L, 150L, 154L,
164L, 165L), .Label = c("33714", "35377", "38623", "38806", "39593",
"39820", "39951", "40286", "40556", "40798", "40800", "40815",
"43762", "50848", "52183", "52461", "52577", "53202", "53320",
"53873", "54153", "54206", "54581", "55122", "55267", "55462",
"55612", "55920", "56022", "56307", "56420", "56679", "57405",
"57445", "57480", "57725", "57809", "58004", "58215", "58229",
"58503", "59326", "59327", "59344", "59361", "59865", "60099",
"60100", "60280", "60384", "60429", "60493", "60503", "60575",
"60603", "60664", "60846", "61415", "61656", "61749", "61883",
"62081", "62210", "62285", "62937", "62983", "63327", "63329",
"64081", "64328", "64418", "64507", "64596", "65178", "65250",
"65302", "65478", "65480", "65487", "65572", "65802", "65935",
"65974", "65975", "65978", "65991", "65995", "66013", "66154",
"66237", "66245", "66389", "66396", "66460", "66572", "66589",
"67174", "73230", "73525", "73539", "73677", "73942", "73953",
"74034", "74113", "74114", "74427", "74439", "74607", "74641",
"74657", "74794", "74800", "74836", "74942", "74952", "74962",
"74969", "74977", "74985", "74989", "75220", "75229", "75407",
"75653", "75732", "75735", "75757", "75895", "75898", "76381",
"76559", "76574", "76594", "76595", "76746", "76751", "76755",
"76759", "76775", "77088", "77091", "77099", "77134", "77188",
"77203", "77252", "77304", "77413", "77453", "77528", "77556",
"77585", "77668", "78262", "79724", "79730", "79850", "79977",
"80052", "80819", "80901", "80932", "81064", "81065", "81071",
"81098", "81142", "81175"), class = "factor"), Timepoint_yrs = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 2L,
3L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L), Sex = c("Male",
"Female", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Female",
"Female", "Male", "Female", "Male", "Female", "Female", "Male",
"Female", "Female", "Male", "Female", "Male", "Male", "Female",
"Female", "Male", "Male", "Male", "Female", "Female", "Female",
"Male", "Male", "Female", "Female", "Male", "Female", "Male",
"Male", "Male", "Female", "Female", "Female", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Female", "Male",
"Male", "Male", "Female", "Male", "Female", "Male", "Male", "Male",
"Female", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Female",
"Female", "Male", "Female", "Male", "Female", "Female", "Male",
"Female", "Female", "Male", "Female", "Male", "Male", "Female",
"Female", "Male", "Male", "Male", "Female", "Female", "Female",
"Male", "Male", "Female", "Female", "Male", "Female", "Male",
"Male", "Male", "Female", "Female", "Female", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Female", "Male",
"Male", "Male", "Female", "Male", "Female", "Male", "Male", "Male",
"Female", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Female",
"Female", "Male", "Female", "Male", "Female", "Female", "Male",
"Female", "Female", "Male", "Female", "Male", "Male", "Female",
"Female", "Male", "Male", "Male", "Female", "Female", "Female",
"Male", "Male", "Female", "Female", "Male", "Female", "Male",
"Male", "Male", "Female", "Female", "Female", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Female", "Male",
"Male", "Male", "Female", "Male", "Female", "Male", "Male"),
Timepoint = c("bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl",
"bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl",
"bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl",
"bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl",
"bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl",
"bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl",
"bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl", "bl",
"bl", "bl", "bl", "bl", "bl", "bl", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1",
"flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu1", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2", "flu2",
"flu2"), mean_AD = c(0.0010333, 0.00105981, 0.00113736, 0.001108715,
0.00104864, 0.00110772, 0.00109096, 0.00109855, 0.00104169,
0.00112465, 0.001096525, 0.000985059, 0.001098955, 0.001069465,
0.00105376, 0.00106878, 0.00110388, 0.00108702, 0.001162835,
0.001070955, 0.0010971, 0.00111695, 0.001060525, 0.00108797,
0.00103262, 0.001117605, 0.001061707, 0.001156365, 0.00104431,
0.00109114, 0.001053765, 0.001045395, 0.00106441, 0.00108481,
0.0011145, 0.001095115, 0.001099075, 0.001057, 0.001096125,
0.00109696, 0.001064795, 0.00108024, 0.00102137, 0.001103185,
0.00111948, 0.001110965, 0.00103784, 0.00104419, 0.00101302,
0.00108785, 0.001098765, 0.001052415, 0.0010976, 0.001064385,
0.001129705, 0.001076575, 0.001049785, 0.00103181, 0.001078155,
0.001129015, 0.001024814, 0.00109171, 0.001007862, 0.001099885,
0.00109162, 0.001060665, 0.00106572, 0.00106803, 0.00113409,
0.001052505, 0.001138575, 0.00108723, 0.001046765, 0.001090765,
0.001060205, 0.0010143, 0.001106365, 0.00113328, 0.001045545,
0.001081185, 0.001102495, 0.00108775, 0.001055425, 0.001130705,
0.00115777, 0.00105974, 0.001089395, 0.001069105, 0.001054605,
0.00107229, 0.001128765, 0.001111455, 0.001111155, 0.001095955,
0.001108905, 0.001077275, 0.00104525, 0.00111458, 0.001110655,
0.001114555, 0.00110557, 0.001167755, 0.0010552, 0.00110002,
0.00104991, 0.001069695, 0.001101185, 0.001066815, 0.00109388,
0.001079455, 0.001097655, 0.001106855, 0.00108804, 0.001102495,
0.001077445, 0.00104224, 0.001064015, 0.00110557, 0.00112483,
0.001093865, 0.00105965, 0.001048305, 0.0010719, 0.001098505,
0.001073105, 0.001049325, 0.001097175, 0.00111584, 0.001122035,
0.001036422, 0.001016249, 0.001015195, 0.00110143, 0.001121325,
0.001047755, 0.001063235, 0.001079355, 0.001086495, 0.001074005,
0.00109106, 0.00103907, 0.0010907, 0.001101685, 0.001017475,
0.00113138, 0.001086615, 0.001076375, 0.001088265, 0.00106309,
0.00106822, 0.001052205, 0.001106125, 0.001060125, 0.00106945,
0.001092695, 0.00107696, 0.001063425, 0.001116725, 0.00105891,
0.001054205, 0.00109295, 0.00109387, 0.00101807, 0.001066195,
0.001120295, 0.001111565, 0.001088595, 0.00102183, 0.0010934,
0.00111935, 0.00105371, 0.00108314, 0.0011006, 0.001079585,
0.001127775, 0.001140825, 0.00106203, 0.001118035, 0.00103535,
0.00099512, 0.001078955, 0.00108867, 0.0010789, 0.001030445,
0.00106243, 0.001028545, 0.00108679, 0.00105624, 0.001110145,
0.00107318, 0.00106523, 0.001103515, 0.00112404, 0.001064455,
0.001040425, 0.001059305, 0.00106362, 0.001079395, 0.00107183,
0.0010652, 0.00106983, 0.00111722, 0.00114111, 0.001059649,
0.001029902, 0.001062825, 0.001102155, 0.001122135, 0.00103623,
0.00108648, 0.001081035, 0.001110075, 0.001039397, 0.001057715,
0.0010338, 0.001071455, 0.001072065, 0.001032233, 0.00111996,
0.00106407, 0.0010693, 0.001104395)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -222L), groups = structure(list(
ID = structure(c(1L, 2L, 9L, 10L, 11L, 12L, 14L, 15L, 16L,
19L, 20L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 43L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 55L, 56L, 57L, 58L, 60L, 61L,
62L, 66L, 67L, 68L, 71L, 72L, 73L, 74L, 75L, 81L, 84L, 85L,
93L, 95L, 96L, 104L, 107L, 109L, 116L, 126L, 133L, 135L,
138L, 139L, 146L, 150L, 154L, 164L, 165L), .Label = c("33714",
"35377", "38623", "38806", "39593", "39820", "39951", "40286",
"40556", "40798", "40800", "40815", "43762", "50848", "52183",
"52461", "52577", "53202", "53320", "53873", "54153", "54206",
"54581", "55122", "55267", "55462", "55612", "55920", "56022",
"56307", "56420", "56679", "57405", "57445", "57480", "57725",
"57809", "58004", "58215", "58229", "58503", "59326", "59327",
"59344", "59361", "59865", "60099", "60100", "60280", "60384",
"60429", "60493", "60503", "60575", "60603", "60664", "60846",
"61415", "61656", "61749", "61883", "62081", "62210", "62285",
"62937", "62983", "63327", "63329", "64081", "64328", "64418",
"64507", "64596", "65178", "65250", "65302", "65478", "65480",
"65487", "65572", "65802", "65935", "65974", "65975", "65978",
"65991", "65995", "66013", "66154", "66237", "66245", "66389",
"66396", "66460", "66572", "66589", "67174", "73230", "73525",
"73539", "73677", "73942", "73953", "74034", "74113", "74114",
"74427", "74439", "74607", "74641", "74657", "74794", "74800",
"74836", "74942", "74952", "74962", "74969", "74977", "74985",
"74989", "75220", "75229", "75407", "75653", "75732", "75735",
"75757", "75895", "75898", "76381", "76559", "76574", "76594",
"76595", "76746", "76751", "76755", "76759", "76775", "77088",
"77091", "77099", "77134", "77188", "77203", "77252", "77304",
"77413", "77453", "77528", "77556", "77585", "77668", "78262",
"79724", "79730", "79850", "79977", "80052", "80819", "80901",
"80932", "81064", "81065", "81071", "81098", "81142", "81175"
), class = "factor"), .rows = structure(list(c(1L, 75L, 149L
), c(2L, 76L, 150L), c(3L, 77L, 151L), c(4L, 78L, 152L),
c(5L, 79L, 153L), c(6L, 80L, 154L), c(7L, 81L, 155L),
c(8L, 82L, 156L), c(9L, 83L, 157L), c(10L, 84L, 158L),
c(11L, 85L, 159L), c(12L, 86L, 160L), c(13L, 87L, 161L
), c(14L, 88L, 162L), c(15L, 89L, 163L), c(16L, 90L,
164L), c(17L, 91L, 165L), c(18L, 92L, 166L), c(19L, 93L,
167L), c(20L, 94L, 168L), c(21L, 95L, 169L), c(22L, 96L,
170L), c(23L, 97L, 171L), c(24L, 98L, 172L), c(25L, 99L,
173L), c(26L, 100L, 174L), c(27L, 101L, 175L), c(28L,
102L, 176L), c(29L, 103L, 177L), c(30L, 104L, 178L),
c(31L, 105L, 179L), c(32L, 106L, 180L), c(33L, 107L,
181L), c(34L, 108L, 182L), c(35L, 109L, 183L), c(36L,
110L, 184L), c(37L, 111L, 185L), c(38L, 112L, 186L),
c(39L, 113L, 187L), c(40L, 114L, 188L), c(41L, 115L,
189L), c(42L, 116L, 190L), c(43L, 117L, 191L), c(44L,
118L, 192L), c(45L, 119L, 193L), c(46L, 120L, 194L),
c(47L, 121L, 195L), c(48L, 122L, 196L), c(49L, 123L,
197L), c(50L, 124L, 198L), c(51L, 125L, 199L), c(52L,
126L, 200L), c(53L, 127L, 201L), c(54L, 128L, 202L),
c(55L, 129L, 203L), c(56L, 130L, 204L), c(57L, 131L,
205L), c(58L, 132L, 206L), c(59L, 133L, 207L), c(60L,
134L, 208L), c(61L, 135L, 209L), c(62L, 136L, 210L),
c(63L, 137L, 211L), c(64L, 138L, 212L), c(65L, 139L,
213L), c(66L, 140L, 214L), c(67L, 141L, 215L), c(68L,
142L, 216L), c(69L, 143L, 217L), c(70L, 144L, 218L),
c(71L, 145L, 219L), c(72L, 146L, 220L), c(73L, 147L,
221L), c(74L, 148L, 222L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -74L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
Which looks like:
# A tibble: 222 x 5
# Groups: ID [74]
ID Timepoint_yrs Sex Timepoint mean_AD
<fct> <int> <chr> <chr> <dbl>
1 33714 0 Male bl 0.00103
2 35377 0 Female bl 0.00106
3 40556 0 Male bl 0.00114
4 40798 0 Male bl 0.00111
5 40800 0 Male bl 0.00105
6 40815 0 Male bl 0.00111
7 50848 0 Male bl 0.00109
8 52183 0 Male bl 0.00110
9 52461 0 Male bl 0.00104
10 53320 0 Male bl 0.00112
# ... with 212 more rows
I want to pivot mean_AD
wider across Timepoint
. However, I get this odd result of NA
values in some columns:
DF_wide<- DF %>%
pivot_wider(names_from = Timepoint, values_from = mean_AD)
# A tibble: 221 x 6
# Groups: ID [74]
ID Timepoint_yrs Sex bl flu1 flu2
<fct> <int> <chr> <dbl> <dbl> <dbl>
1 33714 0 Male 0.00103 NA NA
2 35377 0 Female 0.00106 NA NA
3 40556 0 Male 0.00114 NA NA
4 40798 0 Male 0.00111 NA NA
5 40800 0 Male 0.00105 NA NA
6 40815 0 Male 0.00111 NA NA
7 50848 0 Male 0.00109 NA NA
8 52183 0 Male 0.00110 NA NA
9 52461 0 Male 0.00104 NA NA
10 53320 0 Male 0.00112 NA NA
# ... with 211 more rows
The original DF posted above was created with:
DF %>%
group_by(ID) %>%
mutate(Timepoint_yrs = Age - min(Age)) %>%
filter(Age < 35) %>%
dplyr::select(ID, Timepoint_yrs, Sex, Timepoint, mean_AD)
Expected output missing the Timepoint_yrs
: (If I remove the group_by
and mutate
code lines, the pivot_wider
function works properly.)
# A tibble: 74 x 5
ID Sex bl flu1 flu2
<fct> <chr> <dbl> <dbl> <dbl>
1 33714 Male 0.00103 0.00106 0.00106
2 35377 Female 0.00106 0.00101 0.00107
3 40556 Male 0.00114 0.00111 0.00105
4 40798 Male 0.00111 0.00113 0.00111
5 40800 Male 0.00105 0.00105 0.00106
6 40815 Male 0.00111 0.00108 0.00107
7 50848 Male 0.00109 0.00110 0.00109
8 52183 Male 0.00110 0.00109 0.00108
9 52461 Male 0.00104 0.00106 0.00106
10 53320 Male 0.00112 0.00113 0.00112
# ... with 64 more rows
I'm not sure why this is not working?
CodePudding user response:
There was a column that was not needed - Timepoint_yrs
, select
out the column and then it should work. When we use the 'Timepoint_yrs' also in the code, it will look for the match for the value corresponding to that column as well
library(dplyr)
library(tidyr)
DF %>%
ungroup %>%
select(-Timepoint_yrs) %>%
pivot_wider(names_from = Timepoint, values_from = mean_AD)
-output
# A tibble: 74 × 5
ID Sex bl flu1 flu2
<fct> <chr> <dbl> <dbl> <dbl>
1 33714 Male 0.00103 0.00106 0.00106
2 35377 Female 0.00106 0.00101 0.00107
3 40556 Male 0.00114 0.00111 0.00105
4 40798 Male 0.00111 0.00113 0.00111
5 40800 Male 0.00105 0.00105 0.00106
6 40815 Male 0.00111 0.00108 0.00107
7 50848 Male 0.00109 0.00110 0.00109
8 52183 Male 0.00110 0.00109 0.00108
9 52461 Male 0.00104 0.00106 0.00106
10 53320 Male 0.00112 0.00113 0.00112
# … with 64 more rows
CodePudding user response:
Maybe something like this?
library(dplyr)
library(tidyr)
DF %>%
pivot_wider(
id_cols = c(ID, Sex),
names_from = Timepoint,
values_from = mean_AD,
) %>%
right_join(DF, by="ID") %>%
select(ID, Sex=Sex.x, Timepoint_yrs, bl, flu1, flu2)
ID Sex Timepoint_yrs bl flu1 flu2
<fct> <chr> <int> <dbl> <dbl> <dbl>
1 33714 Male 0 0.00103 0.00106 0.00106
2 33714 Male 1 0.00103 0.00106 0.00106
3 33714 Male 3 0.00103 0.00106 0.00106
4 35377 Female 0 0.00106 0.00101 0.00107
5 35377 Female 1 0.00106 0.00101 0.00107
6 35377 Female 3 0.00106 0.00101 0.00107
7 40556 Male 0 0.00114 0.00111 0.00105
8 40556 Male 2 0.00114 0.00111 0.00105
9 40556 Male 3 0.00114 0.00111 0.00105
10 40798 Male 0 0.00111 0.00113 0.00111
# ... with 212 more rows