I have the following data.frame, where I am trying to creat a new field with the following rules.

tibble [212 × 9] (S3: tbl_df/tbl/data.frame)

$ Observation : num [1:212] 1 2 3 4 5 6 7 8 9 10 ...

 $ Gender      : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 2 1 1 ...

 $ Education   : Factor w/ 3 levels "Bachelors","Masters",..: 2 2 3 1 2 3 3 1 2 2 ...

 $ Salary      : num [1:212] 64233855 7955556 97531875 89785395 6956943 ...

 $ Graduation  : Date[1:212], format: "2015-09-22" "2020-06-15" "2008-05-07" ...

 $ License     : logi [1:212] TRUE FALSE TRUE FALSE FALSE TRUE ...

 $ Expenses    : num [1:212] 3356768 247988 274816 2447352 4069344 ...

$ Satisfaction: Factor w/ 5 levels "1","2","3","4",..: 3 1 4 3 5 2 3 2 4 3 ...

$ Stress : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 2 1 2 1 1 ...

Rules

a. Increase the current Salary by 15% for female whose satisfaction score is either 2 or 3 and whose Stress is ’Yes’).
b. Increase the current Salary by 7.5% for male whose satisfaction score is either 1 or 2 and whose Stress is ’No’).
c. For the rest of the rows, do nothing.

I have tried to create an IF statment to solve it. But I get an error message:

Error: unexpected '&' in: " if (df$Gender[j]=='female') &"

for (j in 1:i[1]) {
  
  if (df$Gender[j]=='female') 
    & df$Satisfaction[j] 2 | 3 & df$Stress[j] == 'Yes' 
    df$SalaryNew[j] <- df$Salary[j]*1.15
  
    else if (df$Gender[j]=='male'), & df$Satisfaction[j]  2 | 3 & df$Stress[j] == 'No' 
    df$SalaryNew[j] <- df$Salary[j]*1.075
  else
    df$SalaryNew[j] <- df$Salary[j]
    
}

Structure

structure(list(Observation = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 
    
Gender = structure(c(2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 
1L), levels = c("0", "1"), class = "factor"), Education = 
structure(c(2L, 2L, 3L, 1L, 2L, 3L, 3L, 1L, 2L, 2L), levels = c("Bachelors", "Masters", "PhD"), class = "factor"), Salary = c(64233855,7955556, 97531875, 89785395, 6956943, 12445419, 54293295, 
109647195, 113335215, 8171793), Graduation = structure(c(16700, 18428, 14006, 11782, 15333, 13879, 18873, 19085, 13067, 13529), class = "Date"), License = c(TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE), Expenses = c(3356768, 247988, 274816, 2447352, 4069344, 244264, 3398872, 2901072, 3346736, 2358584), Satisfaction = structure(c(3L, 1L, 4L,3L, 5L, 2L, 3L, 2L, 4L, 3L), levels = c("1", "2", "3", "4","5"), class = "factor"), Stress = structure(c(2L, 2L, 1L,1L, 2L, 2L, 1L, 2L, 1L, 1L), levels = c("No", "Yes"), class = "factor")), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))

CodePudding user response：

The are some problems with your sintax, such as parenthesis closing and the use of some operators, and I wrote in a way that is easier to see where to close and use each operator:

for (j in 1:(nrow(df))) {
  
  if(
    (df$Gender[j] == 'female') &
    ((df$Satisfaction[j] == 2) | (df$Satisfaction[j] == 3)) &
    (df$Stress[j] == 'Yes')
  ){
    df$SalaryNew[j] <- df$Salary[j] * 1.15
  }else if(
    (df$Gender[j] == 'male') &
    ((df$Satisfaction[j] == 2) | (df$Satisfaction[j] == 3)) &
    (df$Stress[j] == 'No')
  ){
    df$SalaryNew[j] <- df$Salary[j] * 1.075
  }else{
    df$SalaryNew[j] <- df$Salary[j]
  }      
}