Home > Mobile >  How to use one dataframe as a key for value limits in another dataframe?
How to use one dataframe as a key for value limits in another dataframe?

Time:01-20

I am trying to figure out a simple way to use one table of values for specific analyte and matrix combinations to evaluate test results in another table for Water Quality testing purposes.

I have created an example that has a "key" table showing maximum Water Quality values for three analytes in two different matrices.

  • Analytes: As (Arsenic), Cd (Cadmium), Cr (Chromium)
  • Matrices: Fish (fish tissue), Floc (flocculent)

The key table is produced with this code:

limits= matrix(c(30,33,9.79,
                 0.5,4.98,0.99,
                 0.88,111,43.4), nrow=3, ncol=3, byrow=TRUE)
colnames(limits) = c("wet_fish","dry_floc_PEC","dry_floc_TEC")
rownames(limits) = c("As","Cd","Cr")
limits=data.frame(limits)
> limits
   wet_fish dry_floc_PEC dry_floc_TEC
As    30.00        33.00         9.79
Cd     0.50         4.98         0.99
Cr     0.88       111.00        43.40

Note that there is only one limit value for the Fish matrix, which uses the analyte concentration by wet weight. The Floc matrix has two limits, the lower TEC (Threshold Effect Concentration) and the higher PEC (Probable Effect Concentration). These two limits use the analyte concentration by dry weight.

The data would normally be imported from an excel csv file, but is replicated using this code:

data = matrix(c("Floc","As","31","1",
                "Floc","Cd","4.99","0.1",
                "Floc","Cr","112","0.1",
                "Fish","As","3","34",
                "Fish","Cd","1","4.99",
                "Fish","Cr","1","50",
                "Floc","As","1","1",
                "Floc","Cd","0.04","0.002",
                "Floc","Cr","0.08","0.008",
                "Fish","As","0.002","0.2",
                "Fish","Cd","0.0005","0.05",
                "Fish","Cr","0.001","5"), ncol=4, byrow=T)

colnames(data) = c("Matrix","Analyte","ResultDry","ResultWet")
data = data.frame(data)
> data
   Matrix Analyte ResultDry ResultWet
1    Floc      As        31         1
2    Floc      Cd      4.99       0.1
3    Floc      Cr       112       0.1
4    Fish      As         3        34
5    Fish      Cd         1      4.99
6    Fish      Cr         1        50
7    Floc      As         1         1
8    Floc      Cd      0.04     0.002
9    Floc      Cr      0.08     0.008
10   Fish      As     0.002       0.2
11   Fish      Cd    0.0005      0.05
12   Fish      Cr     0.001         5

Joining the tables so that the analytes and matrices match up across both tables would result in new columns in the data table for the limit values and whether or not the data value in each row exceeds it. That would result in a final table that looks something like this:

> data
   Matrix Analyte ResultDry ResultWet LimitWet TECDry PECDry Exceed
1    Floc      As        31         1       NA   9.79     33    TEC
2    Floc      Cd      4.99       0.1       NA   0.99   4.98    PEC
3    Floc      Cr       112       0.1       NA   43.4    111    PEC
4    Fish      As         3        34       30     NA     NA   Fish
5    Fish      Cd         1      4.99      0.5     NA     NA   Fish
6    Fish      Cr         1        50     0.88     NA     NA   Fish
7    Floc      As         1         1       NA   9.79     33   None
8    Floc      Cd      0.04     0.002       NA   0.99   4.98   None
9    Floc      Cr      0.08     0.008       NA   43.4    111   None
10   Fish      As     0.002       0.2       30     NA     NA   None
11   Fish      Cd    0.0005      0.05      0.5     NA     NA   None
12   Fish      Cr     0.001         5     0.88     NA     NA   None

The closest I can get to this is to have 3 columns, each testing for the matrix and if it the result is higher than the limit:

Data_final = limits %>%
  full_join(data, by=c("Analyte"="Analyte")) %>%
  mutate(ResultDry = as.numeric(ResultDry),
         ResultWet = as.numeric(ResultWet),
         wet_fish = as.numeric(wet_fish),
         dry_floc_TEC = as.numeric(dry_floc_TEC),
         dry_floc_PEC = as.numeric(dry_floc_PEC)) %>%
  mutate(Exceed_Fish = ifelse(Matrix=="Fish",ResultWet>wet_fish,NA)) %>%
  mutate(Exceed_Floc_TEC = ifelse(Matrix=="Floc",ResultDry>dry_floc_TEC,NA)) %>%
  mutate(Exceed_Floc_PEC = ifelse(Matrix=="Floc",ResultDry>dry_floc_PEC,NA))
> Data_final
   Analyte wet_fish dry_floc_PEC dry_floc_TEC Matrix ResultDry ResultWet Exceed_Fish Exceed_Floc_TEC Exceed_Floc_PEC
1       As    30.00        33.00         9.79   Floc   31.0000     1.000          NA            TRUE           FALSE
2       As    30.00        33.00         9.79   Fish    3.0000    34.000        TRUE              NA              NA
3       As    30.00        33.00         9.79   Floc    1.0000     1.000          NA           FALSE           FALSE
4       As    30.00        33.00         9.79   Fish    0.0020     0.200       FALSE              NA              NA
5       Cd     0.50         4.98         0.99   Floc    4.9900     0.100          NA            TRUE            TRUE
6       Cd     0.50         4.98         0.99   Fish    1.0000     4.990        TRUE              NA              NA
7       Cd     0.50         4.98         0.99   Floc    0.0400     0.002          NA           FALSE           FALSE
8       Cd     0.50         4.98         0.99   Fish    0.0005     0.050       FALSE              NA              NA
9       Cr     0.88       111.00        43.40   Floc  112.0000     0.100          NA            TRUE            TRUE
10      Cr     0.88       111.00        43.40   Fish    1.0000    50.000        TRUE              NA              NA
11      Cr     0.88       111.00        43.40   Floc    0.0800     0.008          NA           FALSE           FALSE
12      Cr     0.88       111.00        43.40   Fish    0.0010     5.000        TRUE              NA              NA

This is on the right track, but when I try to nest ifelse functions within a mutate to combine the three columns, it doesn't work correctly:

Data_combined = Data_final %>%
  mutate(Exceed = ifelse(Exceed_Fish==TRUE,"Yes - Fish",
                         ifelse(Exceed_Floc_TEC==TRUE&Exceed_Floc_PEC==FALSE, "Yes - Floc TEC",
                                ifelse(Exceed_Floc_PEC==TRUE, "Yes - Floc PEC", "No"))))
> Data_combined
   Analyte wet_fish dry_floc_PEC dry_floc_TEC Matrix ResultDry ResultWet Exceed_Fish Exceed_Floc_TEC Exceed_Floc_PEC     Exceed
1       As    30.00        33.00         9.79   Floc   31.0000     1.000          NA            TRUE           FALSE       <NA>
2       As    30.00        33.00         9.79   Fish    3.0000    34.000        TRUE              NA              NA Yes - Fish
3       As    30.00        33.00         9.79   Floc    1.0000     1.000          NA           FALSE           FALSE       <NA>
4       As    30.00        33.00         9.79   Fish    0.0020     0.200       FALSE              NA              NA       <NA>
5       Cd     0.50         4.98         0.99   Floc    4.9900     0.100          NA            TRUE            TRUE       <NA>
6       Cd     0.50         4.98         0.99   Fish    1.0000     4.990        TRUE              NA              NA Yes - Fish
7       Cd     0.50         4.98         0.99   Floc    0.0400     0.002          NA           FALSE           FALSE       <NA>
8       Cd     0.50         4.98         0.99   Fish    0.0005     0.050       FALSE              NA              NA       <NA>
9       Cr     0.88       111.00        43.40   Floc  112.0000     0.100          NA            TRUE            TRUE       <NA>
10      Cr     0.88       111.00        43.40   Fish    1.0000    50.000        TRUE              NA              NA Yes - Fish
11      Cr     0.88       111.00        43.40   Floc    0.0800     0.008          NA           FALSE           FALSE       <NA>
12      Cr     0.88       111.00        43.40   Fish    0.0010     5.000        TRUE              NA              NA Yes - Fish

CodePudding user response:

I think this is what you want. I changed the Limits data frame a little and then used case_when instead of ifelse

limits= as.data.frame(matrix(c(30,33,9.79,
                 0.5,4.98,0.99,
                 0.88,111,43.4), nrow=3, ncol=3, byrow=TRUE))
colnames(limits) = c("wet_fish","dry_floc_PEC","dry_floc_TEC")
#rownames(limits) = c("As","Cd","Cr")
limits$Analyte <- c("As","Cd","Cr")

limits

data = matrix(c("Floc","As","31","1",
                "Floc","Cd","4.99","0.1",
                "Floc","Cr","112","0.1",
                "Fish","As","3","34",
                "Fish","Cd","1","4.99",
                "Fish","Cr","1","50",
                "Floc","As","1","1",
                "Floc","Cd","0.04","0.002",
                "Floc","Cr","0.08","0.008",
                "Fish","As","0.002","0.2",
                "Fish","Cd","0.0005","0.05",
                "Fish","Cr","0.001","5"), ncol=4, byrow=T)

colnames(data) = c("Matrix","Analyte","ResultDry","ResultWet")
data = data.frame(data)
data

Data_final <-  
  limits %>%
  full_join(data, by=c("Analyte"="Analyte")) %>%
  mutate(ResultDry = as.numeric(ResultDry),
         ResultWet = as.numeric(ResultWet),
         wet_fish = as.numeric(wet_fish),
         dry_floc_TEC = as.numeric(dry_floc_TEC),
         dry_floc_PEC = as.numeric(dry_floc_PEC)) %>%
  mutate(Exceed_Fish = ifelse(Matrix=="Fish",ResultWet>wet_fish,NA)) %>%
  mutate(Exceed_Floc_TEC = ifelse(Matrix=="Floc",ResultDry>dry_floc_TEC,NA)) %>%
  mutate(Exceed_Floc_PEC = ifelse(Matrix=="Floc",ResultDry>dry_floc_PEC,NA))

Data_combined <-  
  Data_final %>%
  mutate(Exceed = case_when(Exceed_Fish==TRUE ~"Yes - Fish",
      Exceed_Floc_TEC == TRUE & Exceed_Floc_PEC == FALSE ~ "Yes - Floc TEC",
          Exceed_Floc_PEC==TRUE ~ "Yes - Floc PEC", 
          TRUE ~ "No"))

CodePudding user response:

I would calculate Exceed using case_when instead of nested ifelse. case_when runs tests in order, so if the first condition is TRUE, it never goes to the next steps, which simplifies the conditionals (ie. for the second case, we can assume that ResultWet is not greater than LimitWet, and so don't have to test for that). In addition, by wrapping the tests in isTRUE, we can automatically coerce any operation involving an NA to FALSE

data %>%
    left_join(rownames_to_column(limits, 'Analyte'), by='Analyte') %>%
    mutate_at(vars(starts_with('Result')), ~as.numeric(.)) %>%
    mutate(LimitWet=if_else(ResultWet>ResultDry, wet_fish, NA_real_),
           TECDry=if_else(ResultDry>ResultWet, dry_floc_TEC, NA_real_),
           PECDry=if_else(ResultDry>ResultWet, dry_floc_PEC, NA_real_)) %>%
    select(-wet_fish, -starts_with('dry_floc')) %>%
    rowwise() %>%
    mutate(Exceed=case_when(isTRUE(ResultWet>=LimitWet) ~ 'Fish',
                            isTRUE(ResultDry>=PECDry) ~ 'PEC',
                            isTRUE(ResultDry>=TECDry) ~ 'TEC',
                            TRUE ~ 'None'))

   Matrix Analyte ResultDry ResultWet LimitWet TECDry PECDry Exceed
 1 Floc   As        31          1        NA      9.79  33    TEC   
 2 Floc   Cd         4.99       0.1      NA      0.99   4.98 PEC   
 3 Floc   Cr       112          0.1      NA     43.4  111    PEC   
 4 Fish   As         3         34        30     NA     NA    Fish  
 5 Fish   Cd         1          4.99      0.5   NA     NA    Fish  
 6 Fish   Cr         1         50         0.88  NA     NA    Fish  
 7 Floc   As         1          1        NA     NA     NA    None  
 8 Floc   Cd         0.04       0.002    NA      0.99   4.98 None  
 9 Floc   Cr         0.08       0.008    NA     43.4  111    None  
10 Fish   As         0.002      0.2      30     NA     NA    None  
11 Fish   Cd         0.0005     0.05      0.5   NA     NA    None  
12 Fish   Cr         0.001      5         0.88  NA     NA    Fish  

CodePudding user response:

An approach using dplyr. Not quite sure though whats the logic behind Exceed ...

full_join(data, pivot_longer(limits, contains("_")) %>% 
    mutate(Matrix = str_to_title(gsub("^.{3}_|_.*", "", name)))) %>% 
  pivot_wider(names_from=name, values_from=value) %>% 
  rename(LimitWet = wet_fish, PECDry = dry_floc_PEC, TECDry = dry_floc_TEC)
Joining, by = c("Matrix", "Analyte")
# A tibble: 12 × 7
   Matrix Analyte ResultDry ResultWet PECDry TECDry LimitWet
   <chr>  <chr>       <dbl>     <dbl>  <dbl>  <dbl>    <dbl>
 1 Floc   As        31          1      33      9.79    NA   
 2 Floc   Cd         4.99       0.1     4.98   0.99    NA   
 3 Floc   Cr       112          0.1   111     43.4     NA   
 4 Fish   As         3         34      NA     NA       30   
 5 Fish   Cd         1          4.99   NA     NA        0.5 
 6 Fish   Cr         1         50      NA     NA        0.88
 7 Floc   As         1          1      33      9.79    NA   
 8 Floc   Cd         0.04       0.002   4.98   0.99    NA   
 9 Floc   Cr         0.08       0.008 111     43.4     NA   
10 Fish   As         0.002      0.2    NA     NA       30   
11 Fish   Cd         0.0005     0.05   NA     NA        0.5 
12 Fish   Cr         0.001      5      NA     NA        0.88

with Exceed if its just checking against ResultDry and ResultWet

full_join(data, pivot_longer(limits, contains("_")) %>% 
    mutate(Matrix = str_to_title(gsub("^.{3}_|_.*", "", name)))) %>% 
  pivot_wider(names_from=name, values_from=value) %>% 
  rename(LimitWet = wet_fish, PECDry = dry_floc_PEC, TECDry = dry_floc_TEC) %>% 
  mutate(Exceed = case_when(
    ResultWet >= LimitWet ~ "Fish", 
    ResultDry >= PECDry ~ "PEC", 
    ResultDry >= TECDry ~ "TEC", TRUE ~ "None"))
Joining, by = c("Matrix", "Analyte")
# A tibble: 12 × 8
   Matrix Analyte ResultDry ResultWet PECDry TECDry LimitWet Exceed
   <chr>  <chr>       <dbl>     <dbl>  <dbl>  <dbl>    <dbl> <chr> 
 1 Floc   As        31          1      33      9.79    NA    TEC   
 2 Floc   Cd         4.99       0.1     4.98   0.99    NA    PEC   
 3 Floc   Cr       112          0.1   111     43.4     NA    PEC   
 4 Fish   As         3         34      NA     NA       30    Fish  
 5 Fish   Cd         1          4.99   NA     NA        0.5  Fish  
 6 Fish   Cr         1         50      NA     NA        0.88 Fish  
 7 Floc   As         1          1      33      9.79    NA    None  
 8 Floc   Cd         0.04       0.002   4.98   0.99    NA    None  
 9 Floc   Cr         0.08       0.008 111     43.4     NA    None  
10 Fish   As         0.002      0.2    NA     NA       30    None  
11 Fish   Cd         0.0005     0.05   NA     NA        0.5  None  
12 Fish   Cr         0.001      5      NA     NA        0.88 Fish

Data

data <- structure(list(Matrix = c("Floc", "Floc", "Floc", "Fish", "Fish", 
"Fish", "Floc", "Floc", "Floc", "Fish", "Fish", "Fish"), Analyte = c("As", 
"Cd", "Cr", "As", "Cd", "Cr", "As", "Cd", "Cr", "As", "Cd", "Cr"
), ResultDry = c(31, 4.99, 112, 3, 1, 1, 1, 0.04, 0.08, 0.002, 
5e-04, 0.001), ResultWet = c(1, 0.1, 0.1, 34, 4.99, 50, 1, 0.002, 
0.008, 0.2, 0.05, 5)), row.names = c(NA, -12L), class = "data.frame")

limits <- structure(list(wet_fish = c(30, 0.5, 0.88), dry_floc_PEC = c(33, 
4.98, 111), dry_floc_TEC = c(9.79, 0.99, 43.4), Analyte = c("As", 
"Cd", "Cr")), class = "data.frame", row.names = c("As", "Cd", 
"Cr"))
  • Related