I have a data where I would like to compare the measurements two-by-two by taking the difference and say if the difference is more than 0.2 add another column as Yes, otherwise No. Like the example data in the image. And, at the end for each individual if there is at least one Yes, the final is Yes. I have filled the first row manually, but I would like to do the same for all my individuals(1000 individuals).enter image description here
library(readxl)
dd <- read_excel("dd.xlsx")
dd <- dput(dd)
structure(list(ID = c(1, 2, 3, 4, 5, 6), m1 = c("2.1", "1.4","NA", "4.0", "2.5", "NA"),
m2 = c("2.8", "1.5", "NA", "4.0", "3.8", "1.1"),
m3 = c("3.5", "1.5", "NA", "4.0", "NA", "1.3"),
m4 = c("NA", "1.8", "1.8", "4.0", "NA", "1.3"),
m5 = c("NA","1.5", "2.9", "NA", "NA", "1.5"),
m6 = c("NA", "NA", "3.5", "NA", "NA", "1.2")),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L))
CodePudding user response:
This approach uses a pivot longer to estimate your columns, and then pivot wider with rename will give you what you want (See second solution). However, if you just want the Final
column, by ID, just do this:
- Simple approach to just getting
Final
by ID
dd %>%
pivot_longer(cols = !ID) %>%
mutate(value=as.numeric(value)) %>%
group_by(ID) %>%
summarize(Final = any(value-lag(value)>0.2))
- Extended approach if you really want all those intermediate variables
dd %>%
pivot_longer(cols = !ID) %>%
mutate(value=as.numeric(value)) %>%
group_by(ID) %>%
mutate(diff=value-lag(value),
gt=if_else(diff>0.2, "Yes", "No")) %>%
pivot_wider(id_cols = ID,names_from = name,values_from = value:gt) %>%
rowwise() %>%
mutate(Final = any(c_across(starts_with("gt"))=="Yes")) %>%
select(!c(diff_m1,gt_m1)) %>%
rename_with(~c("ID", "m1","m2","m3","m4","m5","m6",
"m2-m1", "m3-m2", "m4-m3", "m5-m4", "m6-m5",
"r(m2-m1)", "r(m3-m2)", "r(m4-m3)", "r(m5-m4)", "r(m6-m5)",
"Final"), .cols = everything()
)
Output:
ID m1 m2 m3 m4 m5 m6 `m2-m1` `m3-m2` `m4-m3` `m5-m4` `m6-m5` `r(m2-m1)` `r(m3-m2)` `r(m4-m3)`
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
1 1 2.1 2.8 3.5 NA NA NA 0.7 0.7 NA NA NA Yes Yes NA
2 2 1.4 1.5 1.5 1.8 1.5 NA 0.100 0 0.3 -0.3 NA No No Yes
3 3 NA NA NA 1.8 2.9 3.5 NA NA NA 1.1 0.6 NA NA NA
4 4 4 4 4 4 NA NA 0 0 0 NA NA No No No
5 5 2.5 3.8 NA NA NA NA 1.3 NA NA NA NA Yes NA NA
6 6 NA 1.1 1.3 1.3 1.5 1.2 NA 0.2 0 0.2 -0.3 NA No No
# … with 3 more variables: `r(m5-m4)` <chr>, `r(m6-m5)` <chr>, Final <lgl>
CodePudding user response:
Here is another tidyverse
approach:
library(dplyr)
library(tibble)
dd %>%
type.convert(as.is =TRUE) %>%
select(-1) %>%
mutate(across(-1, .names = "{.col}-{names(.)[match(.col,
names(.))-1]}") - across(-last_col())) %>%
mutate(across(contains("-"), ~ifelse(is.na(.), NA, "YES"), .names = "r({.col})")) %>%
mutate(Final = ifelse(rowSums(dd == "YES") > 0, "No", "Yes")) %>%
add_column(ID = dd$ID, .before = "m1")
ID m1 m2 m3 m4 m5 m6 `m2-m1` `m3-m2` `m4-m3` `m5-m4` `m6-m5` `r(m2-m1)` `r(m3-m2)` `r(m4-m3)` `r(m5-m4)` `r(m6-m5)` Final
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 2.1 2.8 3.5 NA NA NA 0.7 0.7 NA NA NA YES YES NA NA NA Yes
2 2 1.4 1.5 1.5 1.8 1.5 NA 0.100 0 0.3 -0.3 NA YES YES YES YES NA Yes
3 3 NA NA NA 1.8 2.9 3.5 NA NA NA 1.1 0.6 NA NA NA YES YES Yes
4 4 4 4 4 4 NA NA 0 0 0 NA NA YES YES YES NA NA Yes
5 5 2.5 3.8 NA NA NA NA 1.3 NA NA NA NA YES NA NA NA NA Yes
6 6 NA 1.1 1.3 1.3 1.5 1.2 NA 0.2 0 0.2 -0.3 NA YES YES YES YES Yes