Home > Mobile >  R - use apply to transfer one value from one data frame to another by match of two columns
R - use apply to transfer one value from one data frame to another by match of two columns

Time:12-09

I have two data frames. I want to transfer the Value from df B to df A according to the matching values T and Sample.

Data frame A
Person T   Sample Value
1      A   1
1      A   2
1      A   3
1      B   1
1      B   2
1      B   3
2      A   1
2      A   2
2      A   3
2      B   1
2      B   2
2      B   3
2      C   1
2      C   2
2      C   3
...

Data frame B
T   Sample Value
A   1      30.2
A   2      12.5
A   3      77.5
B   1      22.2
B   2      11.0
B   3      23.6

My solution is currently two nested for loops which are way too slow because the data frames are quite large. How can I do this more efficiently, e.g. with the apply family?

for (i in 1:nrow(A)) {
  for (p in 1:nrow(B)) {
    
    if(A$T[i] == B$T[p] & A$Sample[i] == B$Sample[p]){
      A$Value[i] <- B$Value[p]
    }
  }
}

CodePudding user response:

There is no need to use any apply function, just merge A and B like this.

A <- structure(list(Person = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L), T = c("A", "A", "A", "B", "B", "B", 
"A", "A", "A", "B", "B", "B", "C", "C", "C"), Sample = c(1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)), row.names = c(NA, 
-15L), class = c("data.frame"))

B <- structure(list(T = c("A", "A", "A", "B", "B", "B"), Sample = c(1L, 
2L, 3L, 1L, 2L, 3L), Value = c(30.2, 12.5, 77.5, 22.2, 11, 23.6
)), row.names = c(NA, -6L), class = c("data.frame"))

# all = T if you wish to keep all in A without values in B

merge(A, B, by = c("T", "Sample"), all = T)

   T Sample Person Value
1  A      1      1  30.2
2  A      1      2  30.2
3  A      2      2  12.5
4  A      2      1  12.5
5  A      3      2  77.5
6  A      3      1  77.5
7  B      1      1  22.2
8  B      1      2  22.2
9  B      2      1  11.0
10 B      2      2  11.0
11 B      3      2  23.6
12 B      3      1  23.6
13 C      1      2    NA
14 C      2      2    NA
15 C      3      2    NA

CodePudding user response:

I would favour an existing join function for this. Such as:

left_join(
  select(dfA, -Value),
  dfB, 
  by = c('T', 'Sample')
)

Data:


dfA <- structure(list(Person = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), T = c("A", "A", "A", "B", "B", "B", "A", "A", "A", "B", "B", "B", "C", "C", "C"), Sample = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), Value = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, -15L))

dfB <- structure(list(T = c("A", "A", "A", "B", "B", "B"), Sample = c(1L, 2L, 3L, 1L, 2L, 3L), Value = c(30.2, 12.5, 77.5, 22.2, 11, 23.6 )), class = "data.frame", row.names = c(NA, -6L)) 

Output


   Person T Sample Value
1       1 A      1  30.2
2       1 A      2  12.5
3       1 A      3  77.5
4       1 B      1  22.2
5       1 B      2  11.0
6       1 B      3  23.6
7       2 A      1  30.2
8       2 A      2  12.5
9       2 A      3  77.5
10      2 B      1  22.2
11      2 B      2  11.0
12      2 B      3  23.6
13      2 C      1    NA
14      2 C      2    NA
15      2 C      3    NA
  •  Tags:  
  • r
  • Related