I have a list (my.list) that looks like this:
$S1
Study_ID B C D
1 100 NA C1 0.9124000
2 100 1.5 PTA NA
3 200 1.8 C1 0.5571429
4 200 2.1 PTA 0.7849462
5 300 3.2 C1 0.3271900
6 300 1.4 PTA NA
7 400 NA C1 0.8248200
8 400 9.3 PTA 0.2847020
$S2
Study_ID B C D
1 100 NA C1 0.9124000
2 100 0.70 PTA NA
3 200 NA C1 0.5571429
4 200 0.45 PTA 0.7849462
5 300 0.91 C1 0.3271900
6 300 0.78 PTA 0.6492000
7 400 0.65 C1 0.8248200
8 400 NA PTA NA
If a patient has 'NA' in column D, I would like to remove the entire patient from the list - that is, remove them based on Study_ID.
In other words, if there is an NA in Column D, I would like to remove the two rows that have the same Study_ID.
My desired output would look like this:
$S1
Study_ID B C D
1 200 1.8 C1 0.5571429
2 200 2.1 PTA 0.7849462
3 400 NA C1 0.8248200
4 400 9.3 PTA 0.2847020
$S2
Study_ID B C D
1 200 NA C1 0.5571429
2 200 0.45 PTA 0.7849462
3 300 0.91 C1 0.3271900
4 300 0.78 PTA 0.6492000
How can I go about doing this?
Reproducible Data:
my.list <- structure(list(S1 = structure(list(Study_ID = c(100, 100, 200,
200, 300,300,400,400), B = c(NA, 1.5, 1.8, 2.1, 3.2, 1.4, NA, 9.3), C = c("C1", "PTA", "C1", "PTA", "C1", "PTA","C1", "PTA"), D = c(0.9124, NA, 0.5571429, 0.7849462, 0.32719, NA, 0.82482, 0.284702
)), .Names = c("Study_ID", "B", "C", "D"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8")), S2 = structure(list(Study_ID = c(100, 100, 200,
200, 300,300,400,400), B = c(NA, 0.7, NA, 0.45,
0.91, 0.78, 0.65, NA), C = c("C1", "PTA", "C1", "PTA", "C1", "PTA", "C1", "PTA"), D = c(0.9124, NA, 0.5571429, 0.7849462, 0.32719,0.6492, 0.82482, NA
)), .Names = c("Study_ID", "B", "C",
"D"), class = "data.frame", row.names = c("1", "2", "3", "4",
"5", "6", "7", "8"))), .Names = c("S1", "S2"))
CodePudding user response:
tidyverse
library(tidyverse)
my.list %>%
map(~group_by(.x, Study_ID)) %>%
map(~filter(.x, !any(is.na(D))))
#> $S1
#> # A tibble: 4 × 4
#> # Groups: Study_ID [2]
#> Study_ID B C D
#> <dbl> <dbl> <chr> <dbl>
#> 1 200 1.8 C1 0.557
#> 2 200 2.1 PTA 0.785
#> 3 400 NA C1 0.825
#> 4 400 9.3 PTA 0.285
#>
#> $S2
#> # A tibble: 4 × 4
#> # Groups: Study_ID [2]
#> Study_ID B C D
#> <dbl> <dbl> <chr> <dbl>
#> 1 200 NA C1 0.557
#> 2 200 0.45 PTA 0.785
#> 3 300 0.91 C1 0.327
#> 4 300 0.78 PTA 0.649
data.table
library(magrittr)
library(data.table)
lapply(my.list, setDT) %>%
lapply(function(x) x[, .SD[!any(is.na(D))], by = Study_ID])
#> $S1
#> Study_ID B C D
#> 1: 200 1.8 C1 0.5571429
#> 2: 200 2.1 PTA 0.7849462
#> 3: 400 NA C1 0.8248200
#> 4: 400 9.3 PTA 0.2847020
#>
#> $S2
#> Study_ID B C D
#> 1: 200 NA C1 0.5571429
#> 2: 200 0.45 PTA 0.7849462
#> 3: 300 0.91 C1 0.3271900
#> 4: 300 0.78 PTA 0.6492000
data
my.list <-
structure(list(
S1 = structure(
list(
Study_ID = c(100, 100, 200,
200, 300, 300, 400, 400),
B = c(NA, 1.5, 1.8, 2.1, 3.2, 1.4, NA, 9.3),
C = c("C1", "PTA", "C1", "PTA", "C1", "PTA", "C1", "PTA"),
D = c(0.9124, NA, 0.5571429, 0.7849462, 0.32719, NA, 0.82482, 0.284702)
),
.Names = c("Study_ID", "B", "C", "D"),
class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6", "7", "8")
),
S2 = structure(
list(
Study_ID = c(100, 100, 200,
200, 300, 300, 400, 400),
B = c(NA, 0.7, NA, 0.45,
0.91, 0.78, 0.65, NA),
C = c("C1", "PTA", "C1", "PTA", "C1", "PTA", "C1", "PTA"),
D = c(0.9124, NA, 0.5571429, 0.7849462, 0.32719, 0.6492, 0.82482, NA)
),
.Names = c("Study_ID", "B", "C",
"D"),
class = "data.frame",
row.names = c("1", "2", "3", "4",
"5", "6", "7", "8")
)
), .Names = c("S1", "S2"))
CodePudding user response:
Small alternative to @Yuriy answer:
library(dplyr)
library(purrr)
map(my.list, function(x) {
x %>%
group_by(Study_ID) %>%
filter(all(!is.na(D))) %>%
ungroup()
})
In base R:
lapply(my.list, function(x) {
to_remove <- unique(x[which(is.na(x$D)), "Study_ID"])
x[!x$Study_ID %in% to_remove, ]
})