I am trying to create a Partial Dependence Plot for tidymodels
using the R package DALEXtra
, but an error occurs: Error in scream()
: Can't convert from data$sqft
to sqft
due to loss of precision.
library(tidymodels)
data(Sacramento, package = "modeldata")
Sacramento <- Sacramento %>%
mutate_if(is.character, as.factor)
set.seed(123)
data_split <- initial_split(Sacramento, prop = 0.75, strata = price)
Sac_train <- training(data_split)
Sac_test <- testing(data_split)
rf_mod <- rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>%
set_engine("ranger", importance = "permutation", keep.inbag = TRUE) %>%
set_mode("regression")
Sac_recipe <- recipe(price ~ ., data = Sac_train) %>%
step_rm(zip, latitude, longitude) %>%
step_normalize(all_numeric_predictors()) %>%
step_dummy(all_nominal_predictors())
rf_workflow <- workflow() %>%
add_model(rf_mod) %>%
add_recipe(Sac_recipe)
set.seed(123)
Sac_folds <- vfold_cv(Sac_train, v = 10, repeats = 2, strata = price)
set.seed(123)
rf_res <- rf_workflow %>%
tune_grid(grid = 3,
resamples = Sac_folds,
control = control_grid(save_pred = TRUE),
metrics = metric_set(rmse))
rf_best <- rf_res %>%
select_best(metric = "rmse")
last_wf <- rf_workflow %>%
finalize_workflow(rf_best)
last_fit <- last_wf %>%
last_fit(data_split)
final_model <- extract_workflow(last_fit)
library(DALEXtra)
rf_explanier <- explain_tidymodels(model = final_model,
data = select(Sac_train, -price),
y = Sac_train$price)
pdp_sqft <- model_profile(explainer = rf_explanier, variables = "sqft",
N = NULL, groups = "type")
# Error
# Error in `scream()`: Can't convert from `data$sqft` <double> to `sqft` <integer> due to loss of precision.
CodePudding user response:
You problem appears to come from using out-of-date packages. Updating the tidymodels packages should fix your problem.
library(tidymodels)
data(Sacramento, package = "modeldata")
Sacramento <- Sacramento %>%
mutate_if(is.character, as.factor)
set.seed(123)
data_split <- initial_split(Sacramento, prop = 0.75, strata = price)
Sac_train <- training(data_split)
Sac_test <- testing(data_split)
rf_mod <- rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>%
set_engine("ranger", importance = "permutation", keep.inbag = TRUE) %>%
set_mode("regression")
Sac_recipe <- recipe(price ~ ., data = Sac_train) %>%
step_rm(zip, latitude, longitude) %>%
step_normalize(all_numeric_predictors()) %>%
step_dummy(all_nominal_predictors())
rf_workflow <- workflow() %>%
add_model(rf_mod) %>%
add_recipe(Sac_recipe)
set.seed(123)
Sac_folds <- vfold_cv(Sac_train, v = 10, repeats = 2, strata = price)
set.seed(123)
rf_res <- rf_workflow %>%
tune_grid(grid = 3,
resamples = Sac_folds,
control = control_grid(save_pred = TRUE),
metrics = metric_set(rmse))
#> i Creating pre-processing data to finalize unknown parameter: mtry
rf_best <- rf_res %>%
select_best(metric = "rmse")
last_wf <- rf_workflow %>%
finalize_workflow(rf_best)
last_fit <- last_wf %>%
last_fit(data_split)
final_model <- extract_workflow(last_fit)
library(DALEXtra)
rf_explanier <- explain_tidymodels(model = final_model,
data = select(Sac_train, -price),
y = Sac_train$price)
#> Preparation of a new explainer is initiated
#> -> model label : workflow ( default )
#> -> data : 698 rows 8 cols
#> -> data : tibble converted into a data.frame
#> -> target variable : 698 values
#> -> predict function : yhat.workflow will be used ( default )
#> -> predicted values : No value for predict function target column. ( default )
#> -> model_info : package tidymodels , ver. 1.0.0 , task regression ( default )
#> -> predicted values : numerical, min = 100683.5 , mean = 245393.1 , max = 692722.4
#> -> residual function : difference between y and yhat ( default )
#> -> residuals : numerical, min = -133229.2 , mean = 834.6177 , max = 300979.8
#> A new explainer has been created!
pdp_sqft <- model_profile(explainer = rf_explanier, variables = "sqft",
N = NULL, groups = "type")
#> Warning in FUN(X[[i]], ...): Variable: < sqft > has more than 201 unique
#> values and all of them will be used as variable splits in calculating
#> variable profiles. Use the `variable_splits` parameter to mannualy change this
#> behaviour. If you believe this warning to be a false positive, raise issue at
#> <https://github.com/ModelOriented/ingredients/issues>.
pdp_sqft
#> Top profiles :
#> _vname_ _label_ _x_ _groups_ _yhat_ _ids_
#> 1 sqft workflow_Condo 484 Condo 146246.5 0
#> 2 sqft workflow_Condo 539 Condo 148405.1 0
#> 3 sqft workflow_Condo 610 Condo 121844.8 0
#> 4 sqft workflow_Condo 611 Condo 121768.2 0
#> 5 sqft workflow_Condo 623 Condo 122501.8 0
#> 6 sqft workflow_Condo 625 Condo 123659.1 0
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.1 (2022-06-23)
#> os macOS Monterey 12.6
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Los_Angeles
#> date 2023-01-16
#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.2.0)
#> backports 1.4.1 2021-12-13 [2] CRAN (R 4.2.0)
#> broom * 1.0.2 2022-12-15 [1] CRAN (R 4.2.0)
#> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.1)
#> cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.1)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.1)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0)
#> DALEX * 2.4.3 2023-01-15 [1] CRAN (R 4.2.0)
#> DALEXtra * 2.2.1 2022-06-14 [1] CRAN (R 4.2.0)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0)
#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.1)
#> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.0)
#> digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.0)
#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
#> evaluate 0.19 2022-12-13 [1] CRAN (R 4.2.0)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0)
#> fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.2.0)
#> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0)
#> fs 1.5.2 2021-12-08 [2] CRAN (R 4.2.0)
#> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.0)
#> future 1.30.0 2022-12-16 [1] CRAN (R 4.2.0)
#> future.apply 1.10.0 2022-11-05 [1] CRAN (R 4.2.1)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.1)
#> globals 0.16.2 2022-11-21 [1] CRAN (R 4.2.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
#> gower 1.0.1 2022-12-22 [1] CRAN (R 4.2.0)
#> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.0)
#> gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.0)
#> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.0)
#> highr 0.10 2022-12-22 [1] CRAN (R 4.2.0)
#> htmltools 0.5.4 2022-12-07 [1] CRAN (R 4.2.0)
#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1)
#> ingredients 2.3.0 2023-01-15 [1] CRAN (R 4.2.0)
#> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.0)
#> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0)
#> jsonlite 1.8.4 2022-12-06 [1] CRAN (R 4.2.0)
#> knitr 1.41 2022-11-18 [1] CRAN (R 4.2.0)
#> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.1)
#> lava 1.7.1 2023-01-06 [1] CRAN (R 4.2.1)
#> lhs 1.1.6 2022-12-17 [1] CRAN (R 4.2.0)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.0)
#> listenv 0.9.0 2022-12-16 [1] CRAN (R 4.2.0)
#> lubridate 1.9.0 2022-11-06 [1] CRAN (R 4.2.1)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
#> MASS 7.3-57 2022-04-22 [2] CRAN (R 4.2.1)
#> Matrix 1.5-3 2022-11-11 [1] CRAN (R 4.2.0)
#> modeldata * 1.0.1 2022-09-06 [1] CRAN (R 4.2.1)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
#> nnet 7.3-17 2022-01-16 [2] CRAN (R 4.2.1)
#> parallelly 1.34.0 2023-01-13 [1] CRAN (R 4.2.0)
#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0)
#> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
#> png 0.1-8 2022-11-29 [1] CRAN (R 4.2.0)
#> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.0)
#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.0)
#> R.cache 0.16.0 2022-07-21 [2] CRAN (R 4.2.0)
#> R.methodsS3 1.8.2 2022-06-13 [2] CRAN (R 4.2.0)
#> R.oo 1.25.0 2022-06-12 [2] CRAN (R 4.2.0)
#> R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.2.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
#> ranger * 0.14.1 2022-06-18 [1] CRAN (R 4.2.0)
#> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.0)
#> recipes * 1.0.4 2023-01-11 [1] CRAN (R 4.2.0)
#> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0)
#> reticulate 1.27 2023-01-07 [1] CRAN (R 4.2.1)
#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0)
#> rmarkdown 2.19 2022-12-15 [1] CRAN (R 4.2.0)
#> rpart 4.1.16 2022-01-24 [2] CRAN (R 4.2.1)
#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.0)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0)
#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.0)
#> sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.2.0)
#> stringi 1.7.12 2023-01-11 [1] CRAN (R 4.2.0)
#> stringr 1.5.0 2022-12-02 [1] CRAN (R 4.2.1)
#> styler 1.9.0 2023-01-15 [1] CRAN (R 4.2.0)
#> survival 3.3-1 2022-03-03 [2] CRAN (R 4.2.1)
#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0)
#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0)
#> tidyr * 1.2.1 2022-09-08 [1] CRAN (R 4.2.0)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.0)
#> timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.0)
#> timeDate 4022.108 2023-01-07 [1] CRAN (R 4.2.1)
#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0)
#> vctrs 0.5.1 2022-11-16 [1] CRAN (R 4.2.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0)
#> workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.0)
#> xfun 0.36 2022-12-21 [1] CRAN (R 4.2.0)
#> yaml 2.3.6 2022-10-18 [1] CRAN (R 4.2.0)
#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0)
#>
#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.2/library
#> [2] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────
Created on 2023-01-16 with reprex v2.0.2