How can I subset the following dataset based on the fact that Numeric_variable
includes 2 for some units (e.g. A,C), and 3 for some other units (e.g. B,D)? I would like to keep all units that have 1,1,1,1,1,1 (e.g. E,F) in each group. Data provided below.
Unit | Numeric_variable
A | 1
A | 1
A | 2
A | 2
A | 2
A | 2
C | 1
C | 1
C | 2
C | 2
C | 2
C | 2
B | 1
B | 1
B | 3
B | 3
B | 3
B | 3
D | 1
D | 1
D | 3
D | 3
D | 3
D | 3
E | 1
E | 1
E | 1
E | 1
E | 1
E | 1
F | 1
F | 1
F | 1
F | 1
F | 1
F | 1
Into:
df1:
Unit | Numeric_variable
A | 1
A | 1
A | 2
A | 2
A | 2
A | 2
C | 1
C | 1
C | 2
C | 2
C | 2
C | 2
E | 1
E | 1
E | 1
E | 1
E | 1
E | 1
F | 1
F | 1
F | 1
F | 1
F | 1
F | 1
df2:
Unit | Numeric_variable
B | 1
B | 1
B | 3
B | 3
B | 3
B | 3
D | 1
D | 1
D | 3
D | 3
D | 3
D | 3
E | 1
E | 1
E | 1
E | 1
E | 1
E | 1
F | 1
F | 1
F | 1
F | 1
F | 1
F | 1
data
Unit <- c("A", "A", "A", "A", "A", "A", "C", "C",
"C", "C", "C", "C", "B", "B", "B", "B", "B", "B", "D", "D", "D",
"D", "D", "D","E", "E", "E", "E", "E", "E","F", "F", "F", "F", "F", "F")
Numeric_variable <- c(1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1, 3, 3, 3, 3L, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
df <- data.frame(Unit, Numeric_variable)
CodePudding user response:
We could this almost with the same code. Just and | is.na(x)
to filter
:
library(dplyr)
library(tidyr)
df_all <- df %>%
mutate(x = case_when(Numeric_variable==2 ~ 2,
Numeric_variable==3 ~ 3,
TRUE ~ NA_real_)) %>%
fill(x, .direction = "up")
df_2 <- df_all %>%
filter(x == 2 | is.na(x)) %>%
select(-x)
df_3 <- df_all %>%
filter(x == 3 | is.na(x)) %>%
select(-x)
df_2
Unit Numeric_variable
1 A 1
2 A 1
3 A 2
4 A 2
5 A 2
6 A 2
7 C 1
8 C 1
9 C 2
10 C 2
11 C 2
12 C 2
13 E 1
14 E 1
15 E 1
16 E 1
17 E 1
18 E 1
19 F 1
20 F 1
21 F 1
22 F 1
23 F 1
24 F 1
> df_3
Unit Numeric_variable
1 B 1
2 B 1
3 B 3
4 B 3
5 B 3
6 B 3
7 D 1
8 D 1
9 D 3
10 D 3
11 D 3
12 D 3
13 E 1
14 E 1
15 E 1
16 E 1
17 E 1
18 E 1
19 F 1
20 F 1
21 F 1
22 F 1
23 F 1
24 F 1
CodePudding user response:
We could use subset
- create a logical vector where the 'Numeric_variable' is 2 (or 3 for df2), subset the 'Unit' based on the logical vector, concatenate with 'E', 'F' and create another logical vector with %in%
on the original 'Unit' column
df1 <- subset(df, Unit %in% c(Unit[Numeric_variable == 2], c("E", "F")))
df2 <- subset(df, Unit %in% c(Unit[Numeric_variable == 3], c("E", "F")))
-output
> df1
Unit Numeric_variable
1 A 1
2 A 1
3 A 2
4 A 2
5 A 2
6 A 2
7 C 1
8 C 1
9 C 2
10 C 2
11 C 2
12 C 2
25 E 1
26 E 1
27 E 1
28 E 1
29 E 1
30 E 1
31 F 1
32 F 1
33 F 1
34 F 1
35 F 1
36 F 1
> df2
Unit Numeric_variable
13 B 1
14 B 1
15 B 3
16 B 3
17 B 3
18 B 3
19 D 1
20 D 1
21 D 3
22 D 3
23 D 3
24 D 3
25 E 1
26 E 1
27 E 1
28 E 1
29 E 1
30 E 1
31 F 1
32 F 1
33 F 1
34 F 1
35 F 1
36 F 1
If the 'E', 'F' cases needs to be determined programmatically
df1 <- subset(df, !Unit %in% Unit[Numeric_variable != 1]|
Unit %in% Unit[Numeric_variable == 2])
df2 <- subset(df, !Unit %in% Unit[Numeric_variable != 1]|
Unit %in% Unit[Numeric_variable == 3])
-output
> df1
Unit Numeric_variable
1 A 1
2 A 1
3 A 2
4 A 2
5 A 2
6 A 2
7 C 1
8 C 1
9 C 2
10 C 2
11 C 2
12 C 2
25 E 1
26 E 1
27 E 1
28 E 1
29 E 1
30 E 1
31 F 1
32 F 1
33 F 1
34 F 1
35 F 1
36 F 1
> df2
Unit Numeric_variable
13 B 1
14 B 1
15 B 3
16 B 3
17 B 3
18 B 3
19 D 1
20 D 1
21 D 3
22 D 3
23 D 3
24 D 3
25 E 1
26 E 1
27 E 1
28 E 1
29 E 1
30 E 1
31 F 1
32 F 1
33 F 1
34 F 1
35 F 1
36 F 1
CodePudding user response:
With data.table
:
library(data.table)
dt <- data.table(
Unit = rep(LETTERS[c(1,3,2,4:6)], each = 6),
Numeric_variable = c(1,1,2,2,2,2,1,1,2,2,2,2,1,1,3,3,3,3,1,1,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1)
)
dt1 <- dt[, .(Numeric_variable = if(any(Numeric_variable == 3)) Numeric_variable[0] else Numeric_variable), Unit]
dt2 <- dt[, .(Numeric_variable = if(any(Numeric_variable == 2)) Numeric_variable[0] else Numeric_variable), Unit]
dt1
#> Unit Numeric_variable
#> 1: A 1
#> 2: A 1
#> 3: A 2
#> 4: A 2
#> 5: A 2
#> 6: A 2
#> 7: C 1
#> 8: C 1
#> 9: C 2
#> 10: C 2
#> 11: C 2
#> 12: C 2
#> 13: E 1
#> 14: E 1
#> 15: E 1
#> 16: E 1
#> 17: E 1
#> 18: E 1
#> 19: F 1
#> 20: F 1
#> 21: F 1
#> 22: F 1
#> 23: F 1
#> 24: F 1
#> Unit Numeric_variable
dt2
#> Unit Numeric_variable
#> 1: B 1
#> 2: B 1
#> 3: B 3
#> 4: B 3
#> 5: B 3
#> 6: B 3
#> 7: D 1
#> 8: D 1
#> 9: D 3
#> 10: D 3
#> 11: D 3
#> 12: D 3
#> 13: E 1
#> 14: E 1
#> 15: E 1
#> 16: E 1
#> 17: E 1
#> 18: E 1
#> 19: F 1
#> 20: F 1
#> 21: F 1
#> 22: F 1
#> 23: F 1
#> 24: F 1