Home > Enterprise >  Referencing a data frame by a given matrix
Referencing a data frame by a given matrix

Time:10-04

I want to manipulate (e.g. delete, override, ...) specific values of a data frame. The references to these values are given in a matrix.

MRE:

data <- data.frame(anotherCol  = 1:5,
                   anotherCol2 = 6:10,
                   Name        = c('A', 'B', 'A', 'C', 'C'),
                   Value1      = sample(1:99, 5, TRUE),
                   Value2      = sample(1:99, 5, TRUE),
                   Value3      = sample(1:99, 5, TRUE),
                   Value4      = sample(1:99, 5, TRUE))

del <- matrix(sample(c(TRUE,FALSE), 12, TRUE),
              nrow = 3,
              dimnames = list(name  = c('A', 'B', 'C'),
                              value = paste0('Value', 1:4)))

Giving

> data
  anotherCol anotherCol2 Name Value1 Value2 Value3 Value4
1          1           6    A     29     53     49     39
2          2           7    B     15     83     54      6
3          3           8    A     46     33     16     59
4          4           9    C     47     99     22     12
5          5          10    C     58     91     24     49

and

> del
    value
name Value1 Value2 Value3 Value4
   A   TRUE  FALSE   TRUE   TRUE
   B  FALSE   TRUE   TRUE  FALSE
   C   TRUE   TRUE  FALSE  FALSE

Expected output:

> data
  anotherCol anotherCol2 Name Value1 Value2 Value3 Value4
1          1           6    A      -     53      -      -
2          2           7    B     15      -      -      6
3          3           8    A      -     33      -      -
4          4           9    C      -      -     22     12
5          5          10    C      -      -     24     49

CodePudding user response:

Try this base R approach:

newdata <- transform(data.frame(del), Name = rownames(del)) |>
  merge(data, by = "Name", all.y = TRUE)
newdata
#   Name Value1.x Value2.x Value3.x Value4.x anotherCol anotherCol2 Value1.y Value2.y Value3.y Value4.y
# 1    A    FALSE     TRUE    FALSE     TRUE          1           6        2       44       90       20
# 2    A    FALSE     TRUE    FALSE     TRUE          3           8       93        9       48       55
# 3    B    FALSE    FALSE     TRUE     TRUE          2           7       28       66       30       13
# 4    C    FALSE    FALSE     TRUE    FALSE          4           9       27       42       85        6
# 5    C    FALSE    FALSE     TRUE    FALSE          5          10       87       80       36       53

nms.x <- paste0(grep("^Value", names(data), value = TRUE), ".x")
nms.y <- sub("x$", "y", nms.x)
nms.x <- nms.x[nms.y %in% names(newdata)]
nms.y <- intersect(nms.y, names(newdata))
newdata[,nms.y] <- Map(function(a, b) ifelse(b, NA, a), newdata[,nms.y], newdata[,nms.x])
newdata
#   Name Value1.x Value2.x Value3.x Value4.x anotherCol anotherCol2 Value1.y Value2.y Value3.y Value4.y
# 1    A    FALSE     TRUE    FALSE     TRUE          1           6        2       NA       90       NA
# 2    A    FALSE     TRUE    FALSE     TRUE          3           8       93       NA       48       NA
# 3    B    FALSE    FALSE     TRUE     TRUE          2           7       28       66       NA       NA
# 4    C    FALSE    FALSE     TRUE    FALSE          4           9       27       42       NA        6
# 5    C    FALSE    FALSE     TRUE    FALSE          5          10       87       80       NA       53
newdata[,nms.x] <- NULL
names(newdata)[match(nms.y, names(newdata))] <- sub("\\.y$", "", nms.y)
newdata
#   Name anotherCol anotherCol2 Value1 Value2 Value3 Value4
# 1    A          1           6      2     NA     90     NA
# 2    A          3           8     93     NA     48     NA
# 3    B          2           7     28     66     NA     NA
# 4    C          4           9     27     42     NA      6
# 5    C          5          10     87     80     NA     53

CodePudding user response:

You can set the FALSE values to NA, and then use data.table to join the two. Finish by column binding the first three columns to the product of Value columns:

library(data.table)

del[del ==FALSE] <- NA
data = setDT(data)[data.table(del)[,Name:=rownames(del)], on=.(Name)]
cbind(data[,c(1:3)],data[,c(4:7)] * data[,c(8:11)])

Output:

   anotherCol anotherCol2   Name Value1 Value2 Value3 Value4
        <int>       <int> <char>  <int>  <int>  <int>  <int>
1:          1           6      A     NA     42     NA     92
2:          3           8      A     NA     43     NA     93
3:          2           7      B     79     NA     NA      9
4:          4           9      C     NA     14     91     99
5:          5          10      C     NA     25     57     72

Input:

set.seed(123)
data <- data.frame(anotherCol  = 1:5,
                   anotherCol2 = 6:10,
                   Name        = c('A', 'B', 'A', 'C', 'C'),
                   Value1      = sample(1:99, 5, TRUE),
                   Value2      = sample(1:99, 5, TRUE),
                   Value3      = sample(1:99, 5, TRUE),
                   Value4      = sample(1:99, 5, TRUE))

del <- matrix(sample(c(TRUE,FALSE), 12, TRUE),
              nrow = 3,
              dimnames = list(name  = c('A', 'B', 'C'),
                              value = paste0('Value', 1:4)))

  • Related