Skip to content

Why is "preproc" designated by the type rather than the name of the preprocessor? #181

@joranE

Description

@joranE

Perhaps I'm missing something obvious, but why is the preproc field in the info column populated with the preprocessor type rather than the name, as passed in when calling workflow_set?

My expectation would have been that autoplot would distinguish between different preprocessors using shape, but that appears to only happen if they are passed in as different types:

library(workflowsets)
library(workflows)
library(modeldata)
library(recipes)
library(parsnip)
library(dplyr)
library(rsample)
library(tune)
library(yardstick)

# ------------------------------------------------------------------------------

data(two_class_dat, package = "modeldata")

set.seed(1)
folds <- vfold_cv(two_class_dat, v = 5)

# ------------------------------------------------------------------------------

decision_tree_rpart_spec <-
  decision_tree(min_n = tune(), cost_complexity = tune()) |>
  set_engine('rpart') |>
  set_mode('classification')

logistic_reg_glm_spec <-
  logistic_reg() |>
  set_engine('glm')

mars_earth_spec <-
  mars(prod_degree = tune()) |>
  set_engine('earth') |>
  set_mode('classification')

# ------------------------------------------------------------------------------

def_recipe <- 
  recipe(Class ~ A + B,data = two_class_dat)

yj_recipe <-
  recipe(Class ~ ., data = two_class_dat) |>
  step_YeoJohnson(A, B)

# ------------------------------------------------------------------------------

two_class_set1 <-
  workflow_set(
    preproc = list(none = Class ~ A + B, yj_trans = yj_recipe),
    models = list(cart = decision_tree_rpart_spec, glm = logistic_reg_glm_spec,
                  mars = mars_earth_spec)
  )

two_class_set2 <-
  workflow_set(
    preproc = list(none = def_recipe, yj_trans = yj_recipe),
    models = list(cart = decision_tree_rpart_spec, glm = logistic_reg_glm_spec,
                  mars = mars_earth_spec)
  )


# ------------------------------------------------------------------------------

two_class_res1 <-
  two_class_set1 |>
  workflow_map(
    resamples = folds,
    grid = 10,
    seed = 2,
    verbose = TRUE,
    control = control_grid(save_workflow = TRUE)
  )

two_class_res2 <-
  two_class_set2 |>
  workflow_map(
    resamples = folds,
    grid = 10,
    seed = 2,
    verbose = TRUE,
    control = control_grid(save_workflow = TRUE)
  )

# Uses shape for the two different preprocessors
autoplot(two_class_res1)

# All preprocessors are simply "recipe" and you can't tell them apart
autoplot(two_class_res2)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions