Home > OS >  R: Add new column by specific patterns in another column of the dataframe
R: Add new column by specific patterns in another column of the dataframe

Time:02-19

my dataframe A looks like this:

**Group**    **Pattern**
One         Black & White 
Two         Black OR Pink
Three           Red
Four            Pink
Five        White & Green
Six         Green & Orange
Seven           Orange
Eight        Pink & Red
Nine        Black OR White
Ten             Green
.                 .
.                 .
.                 .

I have then dataframe B which looks like this:

**Color**    **Value**
Orange         12
Pink            2
Red             4
Green          22
Black          84
White         100

I would like to add a new column, called Value, in dataframe A, based on its Pattern column. I want it to be in a way that if there is any (&), the values are summed up (for example if it is Black & White, I want it to become 184) and if there is any (OR), I want to have the higher number (in the same example, it will be 100).

I can join them with dplyr inner_join, but rows with &/OR are excluded.Is there any other way?

Cheers!

CodePudding user response:

dfA <- data.frame(group=seq(1,4), pattern=c("Black & White", "Black OR Pink", "Red", "Pink"), stringsAsFactors=F)
dfB <- data.frame(color=c("Pink", "Red", "Black", "White"), value=c(2,4,84,100), stringsAsFactors=F)
    
getVal2return <- function(i, dfA, dfB){
  
  andv <- unlist(strsplit(dfA$pattern[i], split=" & "))
  orv <- unlist(strsplit(dfA$pattern[i], split=" OR "))
  if (length(andv) > 1) {
    val <- sum(dfB$value[match(andv, dfB$color)])
  } else if (length(orv)> 1){
    val <- max(dfB$value[match(orv, dfB$color)])
  } else {
  val <- dfB$value[match(dfA$pattern[i], dfB$color)]
  }
  return(val)
}
    
dfA$newVal <- sapply(1:nrow(dfA), function(x) { getVal2return(x, dfA, dfB) })

> dfA
      group       pattern newVal
    1     1 Black & White    184
    2     2 Black OR Pink     84
    3     3           Red      4
    4     4          Pink      2

CodePudding user response:

This is a fairly pedestrian method, but effective:

A$Value <- A$Pattern
for(i in seq(nrow(B))) A$Value <- gsub(B$Color[i], B$Value[i], A$Value)
A$Value <- sub("&", " ", A$Value)
A$Value <- sub("^(\\d ) OR (\\d )$", "max(\\1, \\2)", A$Value)
A$Value <- vapply(A$Value, function(x) eval(parse(text = x)), numeric(1))
A
#>    Group        Pattern Value
#> 1    One  Black & White   184
#> 2    Two  Black OR Pink    84
#> 3  Three            Red     4
#> 4   Four           Pink     2
#> 5   Five  White & Green   122
#> 6    Six Green & Orange    34
#> 7  Seven         Orange    12
#> 8  Eight     Pink & Red     6
#> 9   Nine Black OR White   100
#> 10   Ten          Green    22

Created on 2022-02-18 by the reprex package (v2.0.1)


DATA

A <- structure(list(Group = c("One", "Two", "Three", "Four", "Five", 
"Six", "Seven", "Eight", "Nine", "Ten"), Pattern = c("Black & White", 
"Black OR Pink", "Red", "Pink", "White & Green", "Green & Orange", 
"Orange", "Pink & Red", "Black OR White", "Green")), class = "data.frame", 
row.names = c(NA, -10L))

B <- structure(list(Color = c("Orange", "Pink", "Red", "Green", "Black", 
"White"), Value = c(12L, 2L, 4L, 22L, 84L, 100L)), class = "data.frame", 
row.names = c(NA, -6L))

CodePudding user response:

I would try something like this, I like R base better Assuming df2 as the second dataframe

df['Value'] = apply(df['Pattern'], 1, function(Pattern){
  s = strsplit(Pattern, ' & ')[[1]]
  if (length(s) == 2) {
     return(with(df2, Value[Color == s[1]]   Value[Color == s[2]]))
  }
  s = strsplit(Pattern, ' OR ')[[1]]
  if (length(s) == 2) {
     return(with(df2, max(Value[Color == s[1]], Value[Color == s[2]])))
  }
  return(df2[df2$Color == Pattern,]$Value)
})

df
#>    Group        Pattern Value
#> 1    One  Black & White   184
#> 2    Two  Black OR Pink    84
#> 3  Three            Red     4
#> 4   Four           Pink     2
#> 5   Five  White & Green   122
#> 6    Six Green & Orange    34
#> 7  Seven         Orange    12
#> 8  Eight     Pink & Red     6
#> 9   Nine Black OR White   100
#> 10   Ten          Green    22
  •  Tags:  
  • r
  • Related