Home > Mobile >  Error in package "DALEXtra": Can't convert from `data$sqft` <double> to `sqft`
Error in package "DALEXtra": Can't convert from `data$sqft` <double> to `sqft`

Time:01-17

I am trying to create a Partial Dependence Plot for tidymodels using the R package DALEXtra, but an error occurs: Error in scream(): Can't convert from data$sqft to sqft due to loss of precision.

library(tidymodels) 
data(Sacramento, package = "modeldata") 
Sacramento <- Sacramento %>%
          mutate_if(is.character, as.factor)

set.seed(123)
data_split <- initial_split(Sacramento, prop = 0.75, strata = price)
Sac_train <- training(data_split)
Sac_test <- testing(data_split)

rf_mod <- rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
      set_engine("ranger", importance = "permutation", keep.inbag = TRUE) %>% 
      set_mode("regression")

Sac_recipe <- recipe(price ~ ., data = Sac_train) %>% 
          step_rm(zip, latitude, longitude) %>% 
          step_normalize(all_numeric_predictors()) %>%
          step_dummy(all_nominal_predictors())

rf_workflow <- workflow() %>% 
           add_model(rf_mod) %>% 
           add_recipe(Sac_recipe)

set.seed(123)
Sac_folds <- vfold_cv(Sac_train, v = 10, repeats = 2, strata = price)

set.seed(123)
rf_res <- rf_workflow %>% 
          tune_grid(grid = 3,
                    resamples = Sac_folds, 
                    control = control_grid(save_pred = TRUE),
                    metrics = metric_set(rmse))

rf_best <- rf_res %>%
       select_best(metric = "rmse")
last_wf <- rf_workflow %>% 
       finalize_workflow(rf_best)
last_fit <- last_wf %>%
        last_fit(data_split)
final_model <- extract_workflow(last_fit)

library(DALEXtra)
rf_explanier  <- explain_tidymodels(model = final_model, 
                                    data = select(Sac_train, -price), 
                                    y = Sac_train$price)
pdp_sqft <- model_profile(explainer = rf_explanier, variables = "sqft", 
                          N = NULL, groups = "type")

# Error
# Error in `scream()`:  Can't convert from `data$sqft` <double> to `sqft` <integer> due to loss of precision.

CodePudding user response:

You problem appears to come from using out-of-date packages. Updating the tidymodels packages should fix your problem.

library(tidymodels) 
data(Sacramento, package = "modeldata") 
Sacramento <- Sacramento %>%
  mutate_if(is.character, as.factor)

set.seed(123)
data_split <- initial_split(Sacramento, prop = 0.75, strata = price)
Sac_train <- training(data_split)
Sac_test <- testing(data_split)

rf_mod <- rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
  set_engine("ranger", importance = "permutation", keep.inbag = TRUE) %>% 
  set_mode("regression")

Sac_recipe <- recipe(price ~ ., data = Sac_train) %>% 
  step_rm(zip, latitude, longitude) %>% 
  step_normalize(all_numeric_predictors()) %>%
  step_dummy(all_nominal_predictors())

rf_workflow <- workflow() %>% 
  add_model(rf_mod) %>% 
  add_recipe(Sac_recipe)

set.seed(123)
Sac_folds <- vfold_cv(Sac_train, v = 10, repeats = 2, strata = price)

set.seed(123)
rf_res <- rf_workflow %>% 
  tune_grid(grid = 3,
            resamples = Sac_folds, 
            control = control_grid(save_pred = TRUE),
            metrics = metric_set(rmse))
#> i Creating pre-processing data to finalize unknown parameter: mtry

rf_best <- rf_res %>%
  select_best(metric = "rmse")
last_wf <- rf_workflow %>% 
  finalize_workflow(rf_best)
last_fit <- last_wf %>%
  last_fit(data_split)
final_model <- extract_workflow(last_fit)

library(DALEXtra)

rf_explanier  <- explain_tidymodels(model = final_model, 
                                    data = select(Sac_train, -price), 
                                    y = Sac_train$price)
#> Preparation of a new explainer is initiated
#>   -> model label       :  workflow  (  default  )
#>   -> data              :  698  rows  8  cols 
#>   -> data              :  tibble converted into a data.frame 
#>   -> target variable   :  698  values 
#>   -> predict function  :  yhat.workflow  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package tidymodels , ver. 1.0.0 , task regression (  default  ) 
#>   -> predicted values  :  numerical, min =  100683.5 , mean =  245393.1 , max =  692722.4  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -133229.2 , mean =  834.6177 , max =  300979.8  
#>   A new explainer has been created!

pdp_sqft <- model_profile(explainer = rf_explanier, variables = "sqft", 
                          N = NULL, groups = "type")
#> Warning in FUN(X[[i]], ...): Variable: < sqft > has more than 201 unique
#> values and all of them will be used as variable splits in calculating
#> variable profiles. Use the `variable_splits` parameter to mannualy change this
#> behaviour. If you believe this warning to be a false positive, raise issue at
#> <https://github.com/ModelOriented/ingredients/issues>.

pdp_sqft
#> Top profiles    : 
#>   _vname_        _label_ _x_ _groups_   _yhat_ _ids_
#> 1    sqft workflow_Condo 484    Condo 146246.5     0
#> 2    sqft workflow_Condo 539    Condo 148405.1     0
#> 3    sqft workflow_Condo 610    Condo 121844.8     0
#> 4    sqft workflow_Condo 611    Condo 121768.2     0
#> 5    sqft workflow_Condo 623    Condo 122501.8     0
#> 6    sqft workflow_Condo 625    Condo 123659.1     0

sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.1 (2022-06-23)
#>  os       macOS Monterey 12.6
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       America/Los_Angeles
#>  date     2023-01-16
#>  pandoc   2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package      * version    date (UTC) lib source
#>  assertthat     0.2.1      2019-03-21 [2] CRAN (R 4.2.0)
#>  backports      1.4.1      2021-12-13 [2] CRAN (R 4.2.0)
#>  broom        * 1.0.2      2022-12-15 [1] CRAN (R 4.2.0)
#>  class          7.3-20     2022-01-16 [2] CRAN (R 4.2.1)
#>  cli            3.6.0      2023-01-09 [1] CRAN (R 4.2.1)
#>  codetools      0.2-18     2020-11-04 [2] CRAN (R 4.2.1)
#>  colorspace     2.0-3      2022-02-21 [1] CRAN (R 4.2.0)
#>  DALEX        * 2.4.3      2023-01-15 [1] CRAN (R 4.2.0)
#>  DALEXtra     * 2.2.1      2022-06-14 [1] CRAN (R 4.2.0)
#>  DBI            1.1.3      2022-06-18 [1] CRAN (R 4.2.0)
#>  dials        * 1.1.0      2022-11-04 [1] CRAN (R 4.2.1)
#>  DiceDesign     1.9        2021-02-13 [1] CRAN (R 4.2.0)
#>  digest         0.6.31     2022-12-11 [1] CRAN (R 4.2.0)
#>  dplyr        * 1.0.10     2022-09-01 [1] CRAN (R 4.2.0)
#>  ellipsis       0.3.2      2021-04-29 [1] CRAN (R 4.2.0)
#>  evaluate       0.19       2022-12-13 [1] CRAN (R 4.2.0)
#>  fansi          1.0.3      2022-03-24 [1] CRAN (R 4.2.0)
#>  fastmap        1.1.0      2021-01-25 [2] CRAN (R 4.2.0)
#>  foreach        1.5.2      2022-02-02 [1] CRAN (R 4.2.0)
#>  fs             1.5.2      2021-12-08 [2] CRAN (R 4.2.0)
#>  furrr          0.3.1      2022-08-15 [1] CRAN (R 4.2.0)
#>  future         1.30.0     2022-12-16 [1] CRAN (R 4.2.0)
#>  future.apply   1.10.0     2022-11-05 [1] CRAN (R 4.2.1)
#>  generics       0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
#>  ggplot2      * 3.4.0      2022-11-04 [1] CRAN (R 4.2.1)
#>  globals        0.16.2     2022-11-21 [1] CRAN (R 4.2.0)
#>  glue           1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
#>  gower          1.0.1      2022-12-22 [1] CRAN (R 4.2.0)
#>  GPfit          1.0-8      2019-02-08 [1] CRAN (R 4.2.0)
#>  gtable         0.3.1      2022-09-01 [1] CRAN (R 4.2.0)
#>  hardhat        1.2.0      2022-06-30 [1] CRAN (R 4.2.0)
#>  highr          0.10       2022-12-22 [1] CRAN (R 4.2.0)
#>  htmltools      0.5.4      2022-12-07 [1] CRAN (R 4.2.0)
#>  infer        * 1.0.4      2022-12-02 [1] CRAN (R 4.2.1)
#>  ingredients    2.3.0      2023-01-15 [1] CRAN (R 4.2.0)
#>  ipred          0.9-13     2022-06-02 [1] CRAN (R 4.2.0)
#>  iterators      1.0.14     2022-02-05 [1] CRAN (R 4.2.0)
#>  jsonlite       1.8.4      2022-12-06 [1] CRAN (R 4.2.0)
#>  knitr          1.41       2022-11-18 [1] CRAN (R 4.2.0)
#>  lattice        0.20-45    2021-09-22 [2] CRAN (R 4.2.1)
#>  lava           1.7.1      2023-01-06 [1] CRAN (R 4.2.1)
#>  lhs            1.1.6      2022-12-17 [1] CRAN (R 4.2.0)
#>  lifecycle      1.0.3      2022-10-07 [1] CRAN (R 4.2.0)
#>  listenv        0.9.0      2022-12-16 [1] CRAN (R 4.2.0)
#>  lubridate      1.9.0      2022-11-06 [1] CRAN (R 4.2.1)
#>  magrittr       2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
#>  MASS           7.3-57     2022-04-22 [2] CRAN (R 4.2.1)
#>  Matrix         1.5-3      2022-11-11 [1] CRAN (R 4.2.0)
#>  modeldata    * 1.0.1      2022-09-06 [1] CRAN (R 4.2.1)
#>  munsell        0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
#>  nnet           7.3-17     2022-01-16 [2] CRAN (R 4.2.1)
#>  parallelly     1.34.0     2023-01-13 [1] CRAN (R 4.2.0)
#>  parsnip      * 1.0.3      2022-11-11 [1] CRAN (R 4.2.0)
#>  pillar         1.8.1      2022-08-19 [1] CRAN (R 4.2.0)
#>  pkgconfig      2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
#>  png            0.1-8      2022-11-29 [1] CRAN (R 4.2.0)
#>  prodlim        2019.11.13 2019-11-17 [1] CRAN (R 4.2.0)
#>  purrr        * 1.0.1      2023-01-10 [1] CRAN (R 4.2.0)
#>  R.cache        0.16.0     2022-07-21 [2] CRAN (R 4.2.0)
#>  R.methodsS3    1.8.2      2022-06-13 [2] CRAN (R 4.2.0)
#>  R.oo           1.25.0     2022-06-12 [2] CRAN (R 4.2.0)
#>  R.utils        2.12.2     2022-11-11 [1] CRAN (R 4.2.0)
#>  R6             2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
#>  ranger       * 0.14.1     2022-06-18 [1] CRAN (R 4.2.0)
#>  Rcpp           1.0.9      2022-07-08 [1] CRAN (R 4.2.0)
#>  recipes      * 1.0.4      2023-01-11 [1] CRAN (R 4.2.0)
#>  reprex         2.0.2      2022-08-17 [1] CRAN (R 4.2.0)
#>  reticulate     1.27       2023-01-07 [1] CRAN (R 4.2.1)
#>  rlang          1.0.6      2022-09-24 [1] CRAN (R 4.2.0)
#>  rmarkdown      2.19       2022-12-15 [1] CRAN (R 4.2.0)
#>  rpart          4.1.16     2022-01-24 [2] CRAN (R 4.2.1)
#>  rsample      * 1.1.1      2022-12-07 [1] CRAN (R 4.2.0)
#>  rstudioapi     0.14       2022-08-22 [1] CRAN (R 4.2.0)
#>  scales       * 1.2.1      2022-08-20 [1] CRAN (R 4.2.0)
#>  sessioninfo    1.2.2      2021-12-06 [2] CRAN (R 4.2.0)
#>  stringi        1.7.12     2023-01-11 [1] CRAN (R 4.2.0)
#>  stringr        1.5.0      2022-12-02 [1] CRAN (R 4.2.1)
#>  styler         1.9.0      2023-01-15 [1] CRAN (R 4.2.0)
#>  survival       3.3-1      2022-03-03 [2] CRAN (R 4.2.1)
#>  tibble       * 3.1.8      2022-07-22 [1] CRAN (R 4.2.0)
#>  tidymodels   * 1.0.0      2022-07-13 [1] CRAN (R 4.2.0)
#>  tidyr        * 1.2.1      2022-09-08 [1] CRAN (R 4.2.0)
#>  tidyselect     1.2.0      2022-10-10 [1] CRAN (R 4.2.0)
#>  timechange     0.2.0      2023-01-11 [1] CRAN (R 4.2.0)
#>  timeDate       4022.108   2023-01-07 [1] CRAN (R 4.2.1)
#>  tune         * 1.0.1      2022-10-09 [1] CRAN (R 4.2.0)
#>  utf8           1.2.2      2021-07-24 [1] CRAN (R 4.2.0)
#>  vctrs          0.5.1      2022-11-16 [1] CRAN (R 4.2.0)
#>  withr          2.5.0      2022-03-03 [1] CRAN (R 4.2.0)
#>  workflows    * 1.1.2      2022-11-16 [1] CRAN (R 4.2.0)
#>  workflowsets * 1.0.0      2022-07-12 [1] CRAN (R 4.2.0)
#>  xfun           0.36       2022-12-21 [1] CRAN (R 4.2.0)
#>  yaml           2.3.6      2022-10-18 [1] CRAN (R 4.2.0)
#>  yardstick    * 1.1.0      2022-09-07 [1] CRAN (R 4.2.0)
#> 
#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.2/library
#>  [2] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────

Created on 2023-01-16 with reprex v2.0.2

  • Related