I am trying to figure out a simple way to use one table of values for specific analyte and matrix combinations to evaluate test results in another table for Water Quality testing purposes.
I have created an example that has a "key" table showing maximum Water Quality values for three analytes in two different matrices.
- Analytes: As (Arsenic), Cd (Cadmium), Cr (Chromium)
- Matrices: Fish (fish tissue), Floc (flocculent)
The key table is produced with this code:
limits= matrix(c(30,33,9.79,
0.5,4.98,0.99,
0.88,111,43.4), nrow=3, ncol=3, byrow=TRUE)
colnames(limits) = c("wet_fish","dry_floc_PEC","dry_floc_TEC")
rownames(limits) = c("As","Cd","Cr")
limits=data.frame(limits)
> limits
wet_fish dry_floc_PEC dry_floc_TEC
As 30.00 33.00 9.79
Cd 0.50 4.98 0.99
Cr 0.88 111.00 43.40
Note that there is only one limit value for the Fish matrix, which uses the analyte concentration by wet weight. The Floc matrix has two limits, the lower TEC (Threshold Effect Concentration) and the higher PEC (Probable Effect Concentration). These two limits use the analyte concentration by dry weight.
The data would normally be imported from an excel csv file, but is replicated using this code:
data = matrix(c("Floc","As","31","1",
"Floc","Cd","4.99","0.1",
"Floc","Cr","112","0.1",
"Fish","As","3","34",
"Fish","Cd","1","4.99",
"Fish","Cr","1","50",
"Floc","As","1","1",
"Floc","Cd","0.04","0.002",
"Floc","Cr","0.08","0.008",
"Fish","As","0.002","0.2",
"Fish","Cd","0.0005","0.05",
"Fish","Cr","0.001","5"), ncol=4, byrow=T)
colnames(data) = c("Matrix","Analyte","ResultDry","ResultWet")
data = data.frame(data)
> data
Matrix Analyte ResultDry ResultWet
1 Floc As 31 1
2 Floc Cd 4.99 0.1
3 Floc Cr 112 0.1
4 Fish As 3 34
5 Fish Cd 1 4.99
6 Fish Cr 1 50
7 Floc As 1 1
8 Floc Cd 0.04 0.002
9 Floc Cr 0.08 0.008
10 Fish As 0.002 0.2
11 Fish Cd 0.0005 0.05
12 Fish Cr 0.001 5
Joining the tables so that the analytes and matrices match up across both tables would result in new columns in the data table for the limit values and whether or not the data value in each row exceeds it. That would result in a final table that looks something like this:
> data
Matrix Analyte ResultDry ResultWet LimitWet TECDry PECDry Exceed
1 Floc As 31 1 NA 9.79 33 TEC
2 Floc Cd 4.99 0.1 NA 0.99 4.98 PEC
3 Floc Cr 112 0.1 NA 43.4 111 PEC
4 Fish As 3 34 30 NA NA Fish
5 Fish Cd 1 4.99 0.5 NA NA Fish
6 Fish Cr 1 50 0.88 NA NA Fish
7 Floc As 1 1 NA 9.79 33 None
8 Floc Cd 0.04 0.002 NA 0.99 4.98 None
9 Floc Cr 0.08 0.008 NA 43.4 111 None
10 Fish As 0.002 0.2 30 NA NA None
11 Fish Cd 0.0005 0.05 0.5 NA NA None
12 Fish Cr 0.001 5 0.88 NA NA None
The closest I can get to this is to have 3 columns, each testing for the matrix and if it the result is higher than the limit:
Data_final = limits %>%
full_join(data, by=c("Analyte"="Analyte")) %>%
mutate(ResultDry = as.numeric(ResultDry),
ResultWet = as.numeric(ResultWet),
wet_fish = as.numeric(wet_fish),
dry_floc_TEC = as.numeric(dry_floc_TEC),
dry_floc_PEC = as.numeric(dry_floc_PEC)) %>%
mutate(Exceed_Fish = ifelse(Matrix=="Fish",ResultWet>wet_fish,NA)) %>%
mutate(Exceed_Floc_TEC = ifelse(Matrix=="Floc",ResultDry>dry_floc_TEC,NA)) %>%
mutate(Exceed_Floc_PEC = ifelse(Matrix=="Floc",ResultDry>dry_floc_PEC,NA))
> Data_final
Analyte wet_fish dry_floc_PEC dry_floc_TEC Matrix ResultDry ResultWet Exceed_Fish Exceed_Floc_TEC Exceed_Floc_PEC
1 As 30.00 33.00 9.79 Floc 31.0000 1.000 NA TRUE FALSE
2 As 30.00 33.00 9.79 Fish 3.0000 34.000 TRUE NA NA
3 As 30.00 33.00 9.79 Floc 1.0000 1.000 NA FALSE FALSE
4 As 30.00 33.00 9.79 Fish 0.0020 0.200 FALSE NA NA
5 Cd 0.50 4.98 0.99 Floc 4.9900 0.100 NA TRUE TRUE
6 Cd 0.50 4.98 0.99 Fish 1.0000 4.990 TRUE NA NA
7 Cd 0.50 4.98 0.99 Floc 0.0400 0.002 NA FALSE FALSE
8 Cd 0.50 4.98 0.99 Fish 0.0005 0.050 FALSE NA NA
9 Cr 0.88 111.00 43.40 Floc 112.0000 0.100 NA TRUE TRUE
10 Cr 0.88 111.00 43.40 Fish 1.0000 50.000 TRUE NA NA
11 Cr 0.88 111.00 43.40 Floc 0.0800 0.008 NA FALSE FALSE
12 Cr 0.88 111.00 43.40 Fish 0.0010 5.000 TRUE NA NA
This is on the right track, but when I try to nest ifelse functions within a mutate to combine the three columns, it doesn't work correctly:
Data_combined = Data_final %>%
mutate(Exceed = ifelse(Exceed_Fish==TRUE,"Yes - Fish",
ifelse(Exceed_Floc_TEC==TRUE&Exceed_Floc_PEC==FALSE, "Yes - Floc TEC",
ifelse(Exceed_Floc_PEC==TRUE, "Yes - Floc PEC", "No"))))
> Data_combined
Analyte wet_fish dry_floc_PEC dry_floc_TEC Matrix ResultDry ResultWet Exceed_Fish Exceed_Floc_TEC Exceed_Floc_PEC Exceed
1 As 30.00 33.00 9.79 Floc 31.0000 1.000 NA TRUE FALSE <NA>
2 As 30.00 33.00 9.79 Fish 3.0000 34.000 TRUE NA NA Yes - Fish
3 As 30.00 33.00 9.79 Floc 1.0000 1.000 NA FALSE FALSE <NA>
4 As 30.00 33.00 9.79 Fish 0.0020 0.200 FALSE NA NA <NA>
5 Cd 0.50 4.98 0.99 Floc 4.9900 0.100 NA TRUE TRUE <NA>
6 Cd 0.50 4.98 0.99 Fish 1.0000 4.990 TRUE NA NA Yes - Fish
7 Cd 0.50 4.98 0.99 Floc 0.0400 0.002 NA FALSE FALSE <NA>
8 Cd 0.50 4.98 0.99 Fish 0.0005 0.050 FALSE NA NA <NA>
9 Cr 0.88 111.00 43.40 Floc 112.0000 0.100 NA TRUE TRUE <NA>
10 Cr 0.88 111.00 43.40 Fish 1.0000 50.000 TRUE NA NA Yes - Fish
11 Cr 0.88 111.00 43.40 Floc 0.0800 0.008 NA FALSE FALSE <NA>
12 Cr 0.88 111.00 43.40 Fish 0.0010 5.000 TRUE NA NA Yes - Fish
CodePudding user response:
I think this is what you want. I changed the Limits data frame a little and then used case_when
instead of ifelse
limits= as.data.frame(matrix(c(30,33,9.79,
0.5,4.98,0.99,
0.88,111,43.4), nrow=3, ncol=3, byrow=TRUE))
colnames(limits) = c("wet_fish","dry_floc_PEC","dry_floc_TEC")
#rownames(limits) = c("As","Cd","Cr")
limits$Analyte <- c("As","Cd","Cr")
limits
data = matrix(c("Floc","As","31","1",
"Floc","Cd","4.99","0.1",
"Floc","Cr","112","0.1",
"Fish","As","3","34",
"Fish","Cd","1","4.99",
"Fish","Cr","1","50",
"Floc","As","1","1",
"Floc","Cd","0.04","0.002",
"Floc","Cr","0.08","0.008",
"Fish","As","0.002","0.2",
"Fish","Cd","0.0005","0.05",
"Fish","Cr","0.001","5"), ncol=4, byrow=T)
colnames(data) = c("Matrix","Analyte","ResultDry","ResultWet")
data = data.frame(data)
data
Data_final <-
limits %>%
full_join(data, by=c("Analyte"="Analyte")) %>%
mutate(ResultDry = as.numeric(ResultDry),
ResultWet = as.numeric(ResultWet),
wet_fish = as.numeric(wet_fish),
dry_floc_TEC = as.numeric(dry_floc_TEC),
dry_floc_PEC = as.numeric(dry_floc_PEC)) %>%
mutate(Exceed_Fish = ifelse(Matrix=="Fish",ResultWet>wet_fish,NA)) %>%
mutate(Exceed_Floc_TEC = ifelse(Matrix=="Floc",ResultDry>dry_floc_TEC,NA)) %>%
mutate(Exceed_Floc_PEC = ifelse(Matrix=="Floc",ResultDry>dry_floc_PEC,NA))
Data_combined <-
Data_final %>%
mutate(Exceed = case_when(Exceed_Fish==TRUE ~"Yes - Fish",
Exceed_Floc_TEC == TRUE & Exceed_Floc_PEC == FALSE ~ "Yes - Floc TEC",
Exceed_Floc_PEC==TRUE ~ "Yes - Floc PEC",
TRUE ~ "No"))
CodePudding user response:
I would calculate Exceed using case_when
instead of nested ifelse. case_when
runs tests in order, so if the first condition is TRUE, it never goes to the next steps, which simplifies the conditionals (ie. for the second case, we can assume that ResultWet is not greater than LimitWet, and so don't have to test for that). In addition, by wrapping the tests in isTRUE
, we can automatically coerce any operation involving an NA
to FALSE
data %>%
left_join(rownames_to_column(limits, 'Analyte'), by='Analyte') %>%
mutate_at(vars(starts_with('Result')), ~as.numeric(.)) %>%
mutate(LimitWet=if_else(ResultWet>ResultDry, wet_fish, NA_real_),
TECDry=if_else(ResultDry>ResultWet, dry_floc_TEC, NA_real_),
PECDry=if_else(ResultDry>ResultWet, dry_floc_PEC, NA_real_)) %>%
select(-wet_fish, -starts_with('dry_floc')) %>%
rowwise() %>%
mutate(Exceed=case_when(isTRUE(ResultWet>=LimitWet) ~ 'Fish',
isTRUE(ResultDry>=PECDry) ~ 'PEC',
isTRUE(ResultDry>=TECDry) ~ 'TEC',
TRUE ~ 'None'))
Matrix Analyte ResultDry ResultWet LimitWet TECDry PECDry Exceed
1 Floc As 31 1 NA 9.79 33 TEC
2 Floc Cd 4.99 0.1 NA 0.99 4.98 PEC
3 Floc Cr 112 0.1 NA 43.4 111 PEC
4 Fish As 3 34 30 NA NA Fish
5 Fish Cd 1 4.99 0.5 NA NA Fish
6 Fish Cr 1 50 0.88 NA NA Fish
7 Floc As 1 1 NA NA NA None
8 Floc Cd 0.04 0.002 NA 0.99 4.98 None
9 Floc Cr 0.08 0.008 NA 43.4 111 None
10 Fish As 0.002 0.2 30 NA NA None
11 Fish Cd 0.0005 0.05 0.5 NA NA None
12 Fish Cr 0.001 5 0.88 NA NA Fish
CodePudding user response:
An approach using dplyr
. Not quite sure though whats the logic behind Exceed ...
full_join(data, pivot_longer(limits, contains("_")) %>%
mutate(Matrix = str_to_title(gsub("^.{3}_|_.*", "", name)))) %>%
pivot_wider(names_from=name, values_from=value) %>%
rename(LimitWet = wet_fish, PECDry = dry_floc_PEC, TECDry = dry_floc_TEC)
Joining, by = c("Matrix", "Analyte")
# A tibble: 12 × 7
Matrix Analyte ResultDry ResultWet PECDry TECDry LimitWet
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Floc As 31 1 33 9.79 NA
2 Floc Cd 4.99 0.1 4.98 0.99 NA
3 Floc Cr 112 0.1 111 43.4 NA
4 Fish As 3 34 NA NA 30
5 Fish Cd 1 4.99 NA NA 0.5
6 Fish Cr 1 50 NA NA 0.88
7 Floc As 1 1 33 9.79 NA
8 Floc Cd 0.04 0.002 4.98 0.99 NA
9 Floc Cr 0.08 0.008 111 43.4 NA
10 Fish As 0.002 0.2 NA NA 30
11 Fish Cd 0.0005 0.05 NA NA 0.5
12 Fish Cr 0.001 5 NA NA 0.88
with Exceed if its just checking against ResultDry and ResultWet
full_join(data, pivot_longer(limits, contains("_")) %>%
mutate(Matrix = str_to_title(gsub("^.{3}_|_.*", "", name)))) %>%
pivot_wider(names_from=name, values_from=value) %>%
rename(LimitWet = wet_fish, PECDry = dry_floc_PEC, TECDry = dry_floc_TEC) %>%
mutate(Exceed = case_when(
ResultWet >= LimitWet ~ "Fish",
ResultDry >= PECDry ~ "PEC",
ResultDry >= TECDry ~ "TEC", TRUE ~ "None"))
Joining, by = c("Matrix", "Analyte")
# A tibble: 12 × 8
Matrix Analyte ResultDry ResultWet PECDry TECDry LimitWet Exceed
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 Floc As 31 1 33 9.79 NA TEC
2 Floc Cd 4.99 0.1 4.98 0.99 NA PEC
3 Floc Cr 112 0.1 111 43.4 NA PEC
4 Fish As 3 34 NA NA 30 Fish
5 Fish Cd 1 4.99 NA NA 0.5 Fish
6 Fish Cr 1 50 NA NA 0.88 Fish
7 Floc As 1 1 33 9.79 NA None
8 Floc Cd 0.04 0.002 4.98 0.99 NA None
9 Floc Cr 0.08 0.008 111 43.4 NA None
10 Fish As 0.002 0.2 NA NA 30 None
11 Fish Cd 0.0005 0.05 NA NA 0.5 None
12 Fish Cr 0.001 5 NA NA 0.88 Fish
Data
data <- structure(list(Matrix = c("Floc", "Floc", "Floc", "Fish", "Fish",
"Fish", "Floc", "Floc", "Floc", "Fish", "Fish", "Fish"), Analyte = c("As",
"Cd", "Cr", "As", "Cd", "Cr", "As", "Cd", "Cr", "As", "Cd", "Cr"
), ResultDry = c(31, 4.99, 112, 3, 1, 1, 1, 0.04, 0.08, 0.002,
5e-04, 0.001), ResultWet = c(1, 0.1, 0.1, 34, 4.99, 50, 1, 0.002,
0.008, 0.2, 0.05, 5)), row.names = c(NA, -12L), class = "data.frame")
limits <- structure(list(wet_fish = c(30, 0.5, 0.88), dry_floc_PEC = c(33,
4.98, 111), dry_floc_TEC = c(9.79, 0.99, 43.4), Analyte = c("As",
"Cd", "Cr")), class = "data.frame", row.names = c("As", "Cd",
"Cr"))