I have been trying to sort the rows of my dataset(A2) to match multiple columns of another dataset (A1). I used a code sometime back and it worked perfectly. Today I modified the code to use on a new data set and I am getting NA's which I shouldn't be getting.
This is the code I am using
A4 <- A2[match(paste(A1$Subject,A1$Condition,A1$test, A1$Replication), paste(A2$Subject,A2$Condition,A2$test,A2$Replication)),]
Data:
A1 <- structure(list(Subject = c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
2, 2, 2, 2), Condition = c(1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2,
1, 1, 2, 2), test = c(1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2,
2, 2, 2), Replication = c(101, 108, 103, 105, 101, 108, 103,
105, 101, 108, 103, 105, 101, 108, 103, 105), Movement = c(43,
56, 45, 43, 35, 34, 34, 3, 4, 67, 45, 34, 65, 34, 345, 23)), row.names = c(NA,
-16L), class = c("tbl_df", "tbl", "data.frame"))
A2 <-structure(list(Subject = c(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
1, 1, 1, 1, 1), Condition = c(1, 1, 2, 2, 1, 1, 2, 2, 1, 1,
2, 2, 2, 1, 1, 2), test = c(1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1,
1, 2, 2, 2, 2), Replication = c(108, 101, 103, 105, 101, 108,
103, 105, 108, 101, 103, 105, 101, 108, 103, 105), Movement = c(43,
56, 45, 43, 35, 34, 34, 3, 4, 67, 45, 34, 65, 34, 345, 23)), row.names = c(NA,
-16L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
We may use slice
in dplyr
library(dplyr)
library(purrr)
library(stringr)
A2 %>%
slice(match(invoke(str_c, across(Subject:Replication, ~
A1[[cur_column()]])), invoke(str_c, across(Subject:Replication))))
-output
# A tibble: 14 × 5
Subject Condition test Replication Movement
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 101 67
2 1 1 1 108 4
3 1 2 1 103 45
4 1 2 1 105 34
5 1 1 2 108 34
6 1 2 2 105 23
7 2 1 1 101 56
8 2 1 1 108 43
9 2 2 1 103 45
10 2 2 1 105 43
11 2 1 2 101 35
12 2 1 2 108 34
13 2 2 2 103 34
14 2 2 2 105 3
CodePudding user response:
You can use a merge
join_vars <- c('Subject', 'Condition', 'test', 'Replication')
merge(A2, A1[, join_vars], by = join_vars, all.x = FALSE)
#> Subject Condition test Replication Movement
#> 1 1 1 1 101 67
#> 2 1 1 1 108 4
#> 3 1 1 2 108 34
#> 4 1 2 1 103 45
#> 5 1 2 1 105 34
#> 6 1 2 2 105 23
#> 7 2 1 1 101 56
#> 8 2 1 1 108 43
#> 9 2 1 2 101 35
#> 10 2 1 2 108 34
#> 11 2 2 1 103 45
#> 12 2 2 1 105 43
#> 13 2 2 2 103 34
#> 14 2 2 2 105 3
Created on 2021-10-17 by the reprex package (v2.0.1)
CodePudding user response:
You may use na.omit
, also Reduce
makes life easier.
A2[na.omit(match(Reduce(paste, A1[-5]), Reduce(paste, A2[-5]))), ]
# Subject Condition test Replication Movement
# 10 1 1 1 101 67
# 9 1 1 1 108 4
# 11 1 2 1 103 45
# 12 1 2 1 105 34
# 14 1 1 2 108 34
# 16 1 2 2 105 23
# 2 2 1 1 101 56
# 1 2 1 1 108 43
# 3 2 2 1 103 45
# 4 2 2 1 105 43
# 5 2 1 2 101 35
# 6 2 1 2 108 34
# 7 2 2 2 103 34
# 8 2 2 2 105 3