Home > OS >  Converting two couple 2 columns into a long format dataset: how to do?
Converting two couple 2 columns into a long format dataset: how to do?

Time:11-27

I have tried to manipulate the original dataset I had as follows.

db%>%  
    mutate(GROUP = fct_recode(GROUP, CONTROL = "CONTROLLO")) %>%
      to_long(keys  = c("tests0", "tests7"),
              values =c("score0", "score7"),
              grep("T0$", names(.), value = TRUE), 
              grep("T7$", names(.), value = TRUE)) %>%
      as_tibble %>%
      mutate(Gender = factor(Gender, levels = c(1, 0), labels = c("M", "F")),
             across(c(tests0, tests7), factor)) 

And I have obtained the following dataset

# A tibble: 678 x 9
      ID GROUP    Gender   Age Education tests0     score0 tests7     score7
   <dbl> <fct>    <fct>  <dbl>     <dbl> <fct>       <dbl> <fct>       <dbl>
 1     1 TRAINING M         74        18 ADAS_CogT0  14.7  ADAS_CogT7  16   
 2     2 TRAINING F         76         4 ADAS_CogT0  15.3  ADAS_CogT7   9.32
 3     3 TRAINING M         81         8 ADAS_CogT0  17.3  ADAS_CogT7  21.3 
 4     4 TRAINING F         74         5 ADAS_CogT0  19    ADAS_CogT7  17   
 5     5 TRAINING M         69         8 ADAS_CogT0   7.66 ADAS_CogT7   8.32
 6     6 TRAINING F         72        11 ADAS_CogT0  12.6  ADAS_CogT7  11   
 7     7 TRAINING F         75         5 ADAS_CogT0  18.7  ADAS_CogT7  15.0 
 8     8 TRAINING M         83         5 ADAS_CogT0  15.0  ADAS_CogT7  11.0 
 9     9 TRAINING F         78         4 ADAS_CogT0  18.0  ADAS_CogT7  17   
10    10 TRAINING F         72         8 ADAS_CogT0  17.3  ADAS_CogT7  18.3 

Can anyone know how could I do to create a unique column for tests with the correspondent score aside?

Here the dataset I've used

dput(head(db, 100))
structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 
93, 94, 95, 96, 97, 98, 99, 100), GROUP = c("TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", 
"TRAINING", "TRAINING", "TRAINING", "TRAINING", "TRAINING", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO", 
"CONTROLLO", "CONTROLLO", "CONTROLLO", "CONTROLLO"), Gender = c(1, 
0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 
0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 
1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0), Age = c(74, 76, 
81, 74, 69, 72, 75, 83, 78, 72, 82, 68, 72, 72, 73, 80, 69, 72, 
70, 80, 75, 80, 78, 74, 82, 74, 80, 82, 78, 81, 66, 71, 70, 79, 
78, 73, 72, 77, 77, 71, 83, 74, 70, 71, 77, 69, 67, 64, 79, 71, 
77, 77, 73, 67, 68, 79, 81, 67, 84, 75, 80, 73, 68, 74, 77, 79, 
79, 72, 73, 78, 76, 78, 77, 74, 78, 77, 77, 82, 77, 70, 77, 81, 
79, 75, 74, 78, 69, 77, 73, 77, 70, 79, 70, 72, 77, 72, 71, 71, 
73, 81), Education = c(18, 4, 8, 5, 8, 11, 5, 5, 4, 8, 8, 12, 
5, 18, 13, 5, 13, 13, 5, 5, 13, 5, 3, 8, 17, 5, 8, 5, 5, 8, 17, 
8, 18, 18, 13, 13, 13, 13, 15, 17, 8, 5, 5, 13, 8, 5, 11, 13, 
8, 8, 8, 5, 13, 8, 5, 17, 8, 12, 13, 5, 8, 8, 8, 5, 3, 8, 18, 
5, 8, 13, 8, 5, 17, 8, 5, 17, 5, 8, 11, 8, 8, 5, 12, 3, 8, 8, 
8, 13, 5, 5, 8, 8, 13, 5, 5, 8, 13, 5, 8, 12), ADAS_CogT0 = c(14.66, 
15.33, 17.33, 19, 7.66, 12.6, 18.67, 14.99, 17.99, 17.33, 13.66, 
16.99, 10.66, 9.66, 14.99, 15.66, 13.33, 4.33, 14.33, 15.99, 
16.33, 10.66, 14.66, 10.66, 19.33, 17.66, 15.99, 20.66, 20.6, 
17, 10.33, 6.33, 6.66, 19.99, 13.33, 24.33, 12.33, 10.33, 12.33, 
9.66, 10.99, 13.99, 23, 6.32, 11.32, 13.99, 14.66, 8.99, 14.33, 
9.99, 7.33, 15.66, 14, 7.99, 23.32, 14.66, 9.99, 5.66, 6.99, 
11.66, 10.33, 6.99, 19.32, NA, 10, 17.66, 13.66, 10.32, NA, NA, 
8.66, 9, 6.99, 14.99, 9.66, 13.66, 15.32, 12, 14, 13.66, 11.99, 
15.66, 16, 15, 16.99, 20, 11, 7.99, 8.33, 8.32, 14.99, 18.66, 
10.33, 11.99, 9.32, 17, 14.33, 14.66, 16.6, 9.99), ROCF_CT0 = c(32, 
13.5, 34.7, 26.1, 35.4, 24.6, 28.9, 34.1, 12.1, 28.1, 22.7, 31.6, 
30.4, 34.4, 37.2, 38.1, 18.6, 26.2, 34.8, 37.1, 26.2, 35.1, 37.1, 
36, 11.2, 17.4, 32.7, 28.4, 21.1, 26.7, 31.9, 33.4, 27.4, 32.7, 
33.9, 37.2, 28.2, 34.2, 29.7, 30, 16.2, 37.4, 34.8, 36.6, 33, 
30.8, 34.6, 35.1, 29.7, 36.8, 28, 34.4, 33.7, 28.9, 31.8, 31.7, 
10.2, 27.1, 36.8, 22.4, 35.7, 28, 30.9, 35.4, 26.9, 29.7, 26.7, 
28.8, 32, 37.9, 27, 18.6, 33, 38, 25.1, 31, 27.1, 26.7, 37, 30.4, 
37, 30.1, 36.9, 24.4, 34, 6.7, 20.4, 30.2, 31.1, 33.4, 17.4, 
23.7, 30.6, 28.3, 19.4, 33.4, 31.6, 39.8, 27, 33.9), ROCF_IT0 = c(3.7, 
7.6, 14.9, 13.6, 23.4, 4, 8.1, 23.9, 10.1, 7, 10, 9.2, 15.6, 
10.8, 6.6, 13.6, 4, 15.3, 7.4, 0, 8.6, 22.9, 11.6, 10.1, 6.8, 
12.1, 12.9, 0, 10.6, 5.9, 14.8, 10.9, 10.6, 10.6, 7.5, 7.3, 4.1, 
3.1, 7.6, 13.4, 10.9, 19, 3.4, 11.7, 14.6, 9.4, 16.7, 15.7, 14.5, 
12.5, 15.1, 22.6, 9.1, 15.5, 11, 6.1, 11.5, 5.6, 17.2, 18.1, 
23.5, 15.6, 7.7, 13.5, 14.5, 11.9, 10.3, 11.5, 12.6, 19, 11.9, 
15.4, 17.7, 8.4, 13.4, 5.2, 11.1, 16, 10.9, 8, 22.6, 13.6, 12.2, 
6.5, 13.4, 11.5, 8, 18.1, 10, 11, 5.9, 14.9, 12, 11.4, 17, 5.9, 
15, 10.9, 8.9, 15.7), ROCF_RT0 = c(3.9, 7.8, 16.3, 13.4, 24.1, 
4.7, 7.8, 20.9, 9.8, 5.1, 9.8, 8.2, 16.3, 11.3, 5.9, 14.8, 4.7, 
14.4, 6.7, 0, 4.9, 23.3, 14.8, 11.7, 6.4, 10.3, 14.3, 0, 10.8, 
6.3, 15.4, 10.1, 5.3, 12, 6.9, 5.4, 4.4, 4.4, 7, 10.9, 11.9, 
17.3, 3.7, 12.2, 6.7, 14.7, 16.2, 7.8, 10.8, 10.2, 16.7, 22.8, 
9.1, 15.7, 10.7, 4, 8.8, 6.8, 14.6, 14.5, 24.8, 13.7, 0, 10.3, 
17.3, 11.3, 8, 13.2, 15.2, 18.4, 4.3, 12.8, 18.9, 4.3, 11.8, 
4.9, 11.8, 13.3, 10.3, 7.1, 23.7, 15.8, 16.4, 6.8, 20.3, 11.3, 
7.6, 19.9, 10.8, 12.5, 5.6, 11.3, 11.7, 13.5, 12.3, 5.1, 14.2, 
8.7, 12.3, 16.4), PVF_T0 = c(41.3, 35.4, 16.7, 28.4, 46.9, 33.6, 
39.4, 27.4, 17.4, 37.9, 31.7, 37.6, 33.4, 21.5, 49.5, 40.4, 10.6, 
40.5, 30.6, 28.4, 29.5, 22.4, 45.4, 29.7, 17.3, 39.4, 31.7, 24.4, 
38.4, 22.7, 6.8, 19.9, 56.5, 35.3, 43.5, 32.5, 28.5, 38.5, 22.3, 
32.3, 30.7, 23.4, 18.6, 46.6, 36.7, 35.6, 38.6, 16.9, 38.7, 37.6, 
39.7, 34.4, 27.5, 16.2, 32.6, 48.3, 26.7, 49.9, 40.5, 34.4, 37.7, 
39.7, 23.2, 19.4, 21.4, 33.7, 11.3, 41.6, 23.7, 31.5, 28.7, 34.4, 
33.3, 27.7, 23.4, 32.3, 49.4, 35.7, 26.7, 31.9, 37.7, 35.4, 20.5, 
28.4, 14.7, 30.7, 35.9, 16.5, 27.4, 49.4, 37.9, 27.7, 41.6, 31.6, 
55.4, 42.9, 21.6, 25.6, 26.7, 43.5), SVF_T0 = c(40, 37, 30, 28, 
49, 27, 34, 35, 29, 33, 20, 18, 36, 47, 43, 39, 36, 38, 48, 31, 
25, 34, 31, 33, 24, 30, 45, 20, 33, 34, 29, 42, 40, 37, 37, 25, 
28, 42, 19, 32, 37, 35, 20, 43, 33, 40, 40, 42, 27, 35, 29, 39, 
31, 48, 41, 27, 32, 45, 39, 40, 36, 38, 27, 32, 47, 36, 24, 40, 
38, 29, 43, 47, 38, 33, 44, 39, 46, 36, 32, 38, 38, 35, 26, 43, 
29, 24, 33, 30, 33, 57, 40, 37, 44, 34, 46, 41, 33, 35, 29, 37
), ADAS_CogT7 = c(16, 9.32, 21.33, 17, 8.32, 11, 14.99, 10.99, 
17, 18.33, 13.32, 14.34, 8.99, 7, 11.99, 15.33, 6.99, 5.33, 12.32, 
13, 21.32, 7.99, 13.33, 11.99, 17.32, 16.32, 16.33, 14.66, 18.99, 
17.33, 7.99, 9.33, 10.99, NA, 12.99, 16.33, 21.66, 9, 9.34, 8.66, 
8.33, 13.66, 15.66, 6.66, 10.99, 13.33, 13.33, 7.99, 11.99, 11.32, 
7.33, 9.66, 6.99, NA, 15.99, 15.66, 14.66, 6.32, 7, 11, 14, 10.33, 
24.66, NA, 14.99, NA, 15.99, 9.32, NA, NA, 9.99, 9.33, 7.66, 
17.33, 10.32, 16, 17, 12.99, 15, 14.33, 10, 14.99, 19, 13.99, 
19.33, NA, 10, 6.99, 11.66, 6.66, 14.33, 16, 8.66, 10, NA, 20, 
14.99, 19.66, 26.66, 8.99), ROCF_CT7 = c(33, 23.9, 28.7, 39.1, 
36.4, 29.2, 11.4, 38.1, 18.1, 28.4, 22.2, 18.6, 37, 35, 33.2, 
23.6, 21.1, 30.2, 28.8, 37.1, 32.2, 33.1, 26.1, 38, 23.6, 21.4, 
37.3, 36.4, 15.1, 35.7, 29.9, 31.4, 32.4, NA, 37.9, 28.2, 37.2, 
31.9, 36.7, 24.5, 19.2, 23.4, 32.8, 36.2, 31.7, 24.3, 31.6, 33.1, 
19.2, 30.4, 32.7, 24.4, 36.2, NA, 28.1, 31.7, 19.7, 31.6, 28.3, 
27.4, 37.7, 35, 33.4, 34.4, 32.1, NA, 33.7, 31.8, NA, 34.9, 15, 
20.1, 31, 35, 37.1, 29, 32.6, 31.7, 28.4, 34.4, 35.7, 35.1, 35.9, 
27.4, 34, NA, 17.4, 28.9, 35.4, 33.4, 30.4, 14.7, 33.2, 26.8, 
NA, 20, 33.6, 26.8, 36, 34.9), ROCF_IT7 = c(1.7, 12.1, 5.9, 12.1, 
24.4, 9.8, 7.6, 24.4, 9.6, 7, 11.5, 9.7, 17.1, 14.2, 4.6, 12.1, 
4.5, 18.8, 8.4, 14.6, 6.1, 17.1, 15.1, 13.1, 11.8, 12.1, 15.9, 
7.6, 11.1, 5.9, 23.8, 12.4, 11.1, NA, 11, 7.8, 5.6, 6.5, 10.1, 
9.9, 14.4, 17, 6.4, 27.3, 16, 14.4, 19.7, 16.2, 8.5, 8, 20.5, 
15.1, 15.6, NA, 9, 8.6, 8.5, 13.2, 17.7, 14.1, 11, 9.1, 7, 16, 
20.4, NA, 12.3, 16, NA, 15, 8.4, 17.9, 19.7, 11.9, 18.6, 4.7, 
12.1, 17.5, 9, 5.5, 17.4, 12.1, 9.7, 9.5, 13.4, NA, 7.5, 14.5, 
11, 19.5, 4.9, 11.9, 26, 12, NA, 3.9, 5.5, 6.4, 4.9, 16.2), ROCF_RT7 = c(2.4, 
9.3, 6.3, 12.4, 22.6, 8.9, 7.8, 21.9, 9.8, 5.1, 10.8, 4.2, 12.3, 
18.9, 2.9, 13.8, 1.2, 19.9, 4.2, 9.8, 5.9, 21.3, 12.3, 10.2, 
12.1, 7.8, 6.3, 7.8, 9.8, 6.3, 16.9, 11.6, 11.3, NA, 8.9, 16.9, 
3.9, 6.4, 13, 9.9, 14.9, 18.4, 3.7, 15.4, 8.8, 14.2, 14.7, 14.3, 
8.8, 5.1, 21.8, 14.3, 11.4, NA, 8.7, 9.5, 8.8, 14.2, 17, 13.8, 
12.8, 9.7, 5.1, 15.3, 22.8, NA, 6.4, 12.7, NA, 20.9, 7.8, 11.8, 
22.4, 4.3, 17.8, 4.9, 13.8, 16.4, 7.4, 5.6, 24.8, 11.8, 9.4, 
11.8, 20.3, NA, 7.1, 13.9, 12.3, 23.5, 2.6, 14.8, 15.2, 11.7, 
NA, 4.3, 6.7, 5.2, 7.3, 18.4), PVF_T7 = c(38.3, 49.4, 16.7, 31.4, 
60.9, 29.5, 48.4, 28.4, 17.4, 32.9, 40.7, 42.6, 42.4, 32.3, 38.5, 
40.4, 20.4, 40.5, 42.6, 28.4, 27.5, 24.4, 40.4, 28.7, 5.3, 38.4, 
46.7, 26.4, 37.4, 23.7, 9.8, 34.9, 55.5, NA, 38.5, 38.5, 25.5, 
43.5, 36.3, 30.3, 32.7, 15.4, 21.6, 40.5, 38.7, 48.6, 47.6, 26.9, 
35.7, 31.9, 47.7, 35.4, 29.5, NA, 33.6, 48.3, 27.7, 55.6, 39.5, 
30.4, 36.7, 38.7, 23.9, 26.4, 28.4, NA, 18.3, 47.6, NA, 35.5, 
29.7, 39.4, 44.3, 26.7, 29.4, 31.3, 44.4, 30.7, 11.5, 40.9, 37.7, 
40.4, 14.5, 21.4, 14.7, NA, 29.9, 20.5, 34.4, 55.4, 32.9, 20.7, 
27.6, 31.6, NA, 7.7, 25.6, 22.6, 22.7, 34.5), SVF_T7 = c(26, 
48, 30, 29, 43, 25, 42, 36, 31, 27, 22, 21, 40, 46, 32, 29, 32, 
38, 46, 31, 20, 36, 35, 32, 12, 28, 47, 20, 33, 30, 22, 44, 35, 
NA, 36, 21, 24, 45, 25, 25, 30, 27, 34, 50, 31, 42, 34, 40, 25, 
31, 41, 37, 38, NA, 38, 19, 29, 46, 40, 38, 36, 34, 31, 40, 39, 
NA, 19, 40, NA, 29, 44, 43, 43, 31, 45, 28, 35, 31, 33, 36, 28, 
35, 34, 34, 29, NA, 26, 30, 34, 57, 40, 36, 41, 36, NA, 22, 31, 
29, 26, 31)), row.names = c(NA, -100L), class = c("tbl_df", "tbl", 
"data.frame"))
> 

I've preferred to make two separated columns for the two diffrent time points observations but I think it would more confrotable to have just a unique one

CodePudding user response:

Not really sure what you are aiming for but it sounds as if you want the data in long format by test?


library(stringr)
library(tidyr)
library(dplyr)

# extract variables associated with tests
nms_test <- names(db)[6:17]

# create new names which will be helpful when changing to long format
nms_new <- paste0(str_remove(str_extract(nms_test, "[A-z_] (?=T\\d$)"), "_$"),
                  "-",
                  paste0(str_extract(nms_test, "T\\d$")))
       
# create a named vector for the new names
names(nms_test) <- nms_new


  db %>% 
  rename(nms_test) %>%
  pivot_longer(-c(1:5),
               names_to = c(".value", "test"),
               names_sep = "-")

#> # A tibble: 6 x 12
#>      ID GROUP   Gender   Age Education test  ADAS_Cog ROCF_C ROCF_I ROCF_R   PVF
#>   <dbl> <chr>    <dbl> <dbl>     <dbl> <chr>    <dbl>  <dbl>  <dbl>  <dbl> <dbl>
#> 1     1 TRAINI~      1    74        18 T0       14.7    32      3.7    3.9  41.3
#> 2     1 TRAINI~      1    74        18 T7       16      33      1.7    2.4  38.3
#> 3     2 TRAINI~      0    76         4 T0       15.3    13.5    7.6    7.8  35.4
#> 4     2 TRAINI~      0    76         4 T7        9.32   23.9   12.1    9.3  49.4
#> 5     3 TRAINI~      1    81         8 T0       17.3    34.7   14.9   16.3  16.7
#> 6     3 TRAINI~      1    81         8 T7       21.3    28.7    5.9    6.3  16.7
#> # ... with 1 more variable: SVF <dbl>

Alternatively if I understand your later comment correctly you want all tests and test results in two columns, which is simpler!


db1 <-
  db %>% 
  pivot_longer(-c(1:5),
               names_to = "test",
               values_to = "value")

head(db1, 10)
#> # A tibble: 10 x 7
#>       ID GROUP    Gender   Age Education test       value
#>    <dbl> <chr>     <dbl> <dbl>     <dbl> <chr>      <dbl>
#>  1     1 TRAINING      1    74        18 ADAS_CogT0  14.7
#>  2     1 TRAINING      1    74        18 ROCF_CT0    32  
#>  3     1 TRAINING      1    74        18 ROCF_IT0     3.7
#>  4     1 TRAINING      1    74        18 ROCF_RT0     3.9
#>  5     1 TRAINING      1    74        18 PVF_T0      41.3
#>  6     1 TRAINING      1    74        18 SVF_T0      40  
#>  7     1 TRAINING      1    74        18 ADAS_CogT7  16  
#>  8     1 TRAINING      1    74        18 ROCF_CT7    33  
#>  9     1 TRAINING      1    74        18 ROCF_IT7     1.7
#> 10     1 TRAINING      1    74        18 ROCF_RT7     2.4

Created on 2021-11-26 by the reprex package (v2.0.1)

  • Related