diff --git a/dev/pkgdown.yml b/dev/pkgdown.yml index 7f828e29..71060b84 100644 --- a/dev/pkgdown.yml +++ b/dev/pkgdown.yml @@ -5,7 +5,7 @@ articles: forge: forge.html mold: mold.html package: package.html -last_built: 2024-10-21T12:29Z +last_built: 2024-10-22T16:07Z urls: reference: https://hardhat.tidymodels.org/reference article: https://hardhat.tidymodels.org/articles diff --git a/dev/reference/model_frame.html b/dev/reference/model_frame.html index d7563948..0384b975 100644 --- a/dev/reference/model_frame.html +++ b/dev/reference/model_frame.html @@ -147,7 +147,7 @@

Examples#> attr(,"response") #> [1] 1 #> attr(,".Environment") -#> <environment: 0x55cc08c0ec28> +#> <environment: 0x55db7703ac50> #> attr(,"predvars") #> list(Species, Sepal.Width) #> attr(,"dataClasses") diff --git a/dev/search.json b/dev/search.json index 05e55e5f..ce03c3a0 100644 --- a/dev/search.json +++ b/dev/search.json @@ -1 +1 @@ -[{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to tidymodels","title":"Contributing to tidymodels","text":"detailed information contributing tidymodels packages, see development contributing guide.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CONTRIBUTING.html","id":"documentation","dir":"","previous_headings":"","what":"Documentation","title":"Contributing to tidymodels","text":"Typos grammatical errors documentation may edited directly using GitHub web interface, long changes made source file. YES ✅: edit roxygen comment .R file R/ directory. 🚫: edit .Rd file man/ directory. use roxygen2, Markdown syntax, documentation.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CONTRIBUTING.html","id":"code","dir":"","previous_headings":"","what":"Code","title":"Contributing to tidymodels","text":"submit 🎯 pull request tidymodels package, always file issue confirm tidymodels team agrees idea happy basic proposal. tidymodels packages work together. package contains unit tests, integration tests tests using packages contained extratests. recommend create Git branch pull request (PR). Look build status making changes. README contains badges continuous integration services used package. New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. user-facing changes, add bullet top NEWS.md current development version header describing changes made followed GitHub username, links relevant issue(s)/PR(s). use testthat. Contributions test cases included easier accept. contribution spans use one package, consider building extratests changes check breakages /adding new tests . Let us know PR ran extra tests.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"Code","what":"Code of Conduct","title":"Contributing to tidymodels","text":"project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"https://hardhat.tidymodels.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 hardhat authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Forging data for predictions","text":"counterpart mold() (can read vignette(\"mold\", \"hardhat\")), forge(). mold() used preprocess training data, forge() used preprocess new data going use generate predictions model. Like mold(), forge() intended used interactively. Instead, called predict() method model. learn using forge() modeling package, see vignette(\"package\", \"hardhat\"). rest vignette focused many features forge() offers.","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"connection-with-mold","dir":"Articles","previous_headings":"","what":"Connection with mold()","title":"Forging data for predictions","text":"mold() used, one returned objects blueprint. key preprocessing new data forge(). instance, assume ’ve called mold() like : formula blueprint returned , knows predictors outcomes used training time, knows don’t want expand species dummy variables setting indicators = \"none\". time predict() new data, data passed forge() along blueprint just created. Note predictors, species expanded blueprint knew preprocessing options set mold() called. forge() always returns three things, look familiar used mold(). predictors holds tibble predictors. outcomes returned NULL default, predict() methods assume access new predictors. Alternatively, read moment, can contain tibble new outcomes. extras varies per blueprint, catch-slot hold kind extra objects returned blueprint mold() called.","code":"penguin_train <- penguins[1:300,] penguin_test <- penguins[-(1:300),] penguin_form <- mold( log(body_mass_g) ~ species + bill_length_mm, penguin_train, blueprint = default_formula_blueprint(indicators = \"none\") ) formula_eng <- penguin_form$blueprint formula_eng #> Formula blueprint: #> #> # Predictors: 2 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> Indicators: none forge(penguin_test, formula_eng) #> $predictors #> # A tibble: 33 × 2 #> bill_length_mm species #> #> 1 47.5 Chinstrap #> 2 47.6 Chinstrap #> 3 52 Chinstrap #> 4 46.9 Chinstrap #> 5 53.5 Chinstrap #> 6 49 Chinstrap #> 7 46.2 Chinstrap #> 8 50.9 Chinstrap #> 9 45.5 Chinstrap #> 10 50.9 Chinstrap #> # ℹ 23 more rows #> #> $outcomes #> NULL #> #> $extras #> $extras$offset #> NULL"},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"outcomes","dir":"Articles","previous_headings":"","what":"Outcomes","title":"Forging data for predictions","text":"Generally generating predictions need know new predictors. However, performing resampling need processed outcomes well can compute cross validated performance statistics decide multiple models, choose hyperparameters. can easily request outcomes well outcomes = TRUE. Just like predictors, get processed using steps done outcomes fit time.","code":"forge(penguin_test, formula_eng, outcomes = TRUE) #> $predictors #> # A tibble: 33 × 2 #> bill_length_mm species #> #> 1 47.5 Chinstrap #> 2 47.6 Chinstrap #> 3 52 Chinstrap #> 4 46.9 Chinstrap #> 5 53.5 Chinstrap #> 6 49 Chinstrap #> 7 46.2 Chinstrap #> 8 50.9 Chinstrap #> 9 45.5 Chinstrap #> 10 50.9 Chinstrap #> # ℹ 23 more rows #> #> $outcomes #> # A tibble: 33 × 1 #> `log(body_mass_g)` #> #> 1 8.27 #> 2 8.26 #> 3 8.48 #> 4 7.90 #> 5 8.41 #> 6 8.28 #> 7 8.20 #> 8 8.17 #> 9 8.16 #> 10 8.21 #> # ℹ 23 more rows #> #> $extras #> $extras$offset #> NULL"},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"validation","dir":"Articles","previous_headings":"","what":"Validation","title":"Forging data for predictions","text":"One useful things forge() robustness malformed new data. isn’t unreasonable enforce new data user provides prediction time type data used fit time. Type defined vctrs sense, uses essentially means number checks test data pass, including: column names testing data training data must . type column testing data must columns found training data. means: classes must (e.g. factor training, must factor testing). attributes must (e.g. levels factors must also ). Almost validation possible use vctrs::vec_cast(), called forge().","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"column-existence","dir":"Articles","previous_headings":"Validation","what":"Column existence","title":"Forging data for predictions","text":"easiest example demonstrate missing columns testing data. forge() won’t let continue required predictors used training also present new data.","code":"test_missing_column <- subset(penguin_test, select = -species) forge(test_missing_column, formula_eng) #> Error in `validate_column_names()`: #> ! The following required columns are missing: 'species'."},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"column-types","dir":"Articles","previous_headings":"Validation","what":"Column types","title":"Forging data for predictions","text":"initial scan column names done, deeper scan column performed, checking type column. instance, happens new species column double, factor? error thrown, indicating double can’t cast factor.","code":"test_species_double <- penguin_test test_species_double$species <- as.double(test_species_double$species) forge(test_species_double, formula_eng) #> Error in `scream()`: #> ! Can't convert `data$species` to match type of `species` >."},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"lossless-conversion","dir":"Articles","previous_headings":"Validation","what":"Lossless conversion","title":"Forging data for predictions","text":"error message suggests cases can automatically cast one type another, fact true! Rather double, species just character? Interesting, case can actually convert factor, class even levels restored. key lossless conversion. lost information converting character species factor unique character values subset original levels. example conversion lossy character species column value level training data. case: lossy warning thrown species column still converted factor right levels novel level removed value set NA","code":"test_species_character <- penguin_test test_species_character$species <- as.character(test_species_character$species) forged_char <- forge(test_species_character, formula_eng) forged_char$predictors #> # A tibble: 33 × 2 #> bill_length_mm species #> #> 1 47.5 Chinstrap #> 2 47.6 Chinstrap #> 3 52 Chinstrap #> 4 46.9 Chinstrap #> 5 53.5 Chinstrap #> 6 49 Chinstrap #> 7 46.2 Chinstrap #> 8 50.9 Chinstrap #> 9 45.5 Chinstrap #> 10 50.9 Chinstrap #> # ℹ 23 more rows class(forged_char$predictors$species) #> [1] \"factor\" levels(forged_char$predictors$species) #> [1] \"Adelie\" \"Chinstrap\" \"Gentoo\" test_species_lossy <- penguin_test test_species_lossy$species <- as.character(test_species_lossy$species) test_species_lossy$species[2] <- \"im new!\" forged_lossy <- forge(test_species_lossy, formula_eng) #> Warning: Novel levels found in column 'species': 'im new!'. The levels #> have been removed, and values have been coerced to 'NA'. forged_lossy$predictors #> # A tibble: 33 × 2 #> bill_length_mm species #> #> 1 47.5 Chinstrap #> 2 47.6 NA #> 3 52 Chinstrap #> 4 46.9 Chinstrap #> 5 53.5 Chinstrap #> 6 49 Chinstrap #> 7 46.2 Chinstrap #> 8 50.9 Chinstrap #> 9 45.5 Chinstrap #> 10 50.9 Chinstrap #> # ℹ 23 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"recipes-and-forge","dir":"Articles","previous_headings":"","what":"Recipes and forge()","title":"Forging data for predictions","text":"Just like formula method, recipe can used preprocessor fit prediction time. hardhat handles calling prep(), juice(), bake() right times. instance, say recipe just creates dummy variables species. blueprint recipe blueprint. forge(), can request outcomes predictors outcomes separated like formula method.","code":"library(recipes) rec <- recipe(bill_length_mm ~ body_mass_g + species, penguin_train) %>% step_dummy(species) penguin_recipe <- mold(rec, penguin_train) penguin_recipe$predictors #> # A tibble: 300 × 3 #> body_mass_g species_Chinstrap species_Gentoo #> #> 1 3750 0 0 #> 2 3800 0 0 #> 3 3250 0 0 #> 4 3450 0 0 #> 5 3650 0 0 #> 6 3625 0 0 #> 7 4675 0 0 #> 8 3200 0 0 #> 9 3800 0 0 #> 10 4400 0 0 #> # ℹ 290 more rows recipe_eng <- penguin_recipe$blueprint recipe_eng #> Recipe blueprint: #> #> # Predictors: 2 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble forge(penguin_test, recipe_eng, outcomes = TRUE) #> $predictors #> # A tibble: 33 × 3 #> body_mass_g species_Chinstrap species_Gentoo #> #> 1 3900 1 0 #> 2 3850 1 0 #> 3 4800 1 0 #> 4 2700 1 0 #> 5 4500 1 0 #> 6 3950 1 0 #> 7 3650 1 0 #> 8 3550 1 0 #> 9 3500 1 0 #> 10 3675 1 0 #> # ℹ 23 more rows #> #> $outcomes #> # A tibble: 33 × 1 #> bill_length_mm #> #> 1 47.5 #> 2 47.6 #> 3 52 #> 4 46.9 #> 5 53.5 #> 6 49 #> 7 46.2 #> 8 50.9 #> 9 45.5 #> 10 50.9 #> # ℹ 23 more rows #> #> $extras #> $extras$roles #> NULL"},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"a-note-on-recipes","dir":"Articles","previous_headings":"Recipes and forge()","what":"A note on recipes","title":"Forging data for predictions","text":"One complication recipes , bake() step, processing happens predictors outcomes together. means might run situation outcomes seem required forge(), even aren’t requesting . new_data doesn’t outcome, baking recipe fail even don’t request outcomes returned forge(). way around use built-recipe argument, skip, step containing outcome. skips processing step bake() time. tradeoff need aware . just interested generating predictions completely new data, can safely use skip = TRUE almost never access corresponding true outcomes preprocess compare . know need resampling, likely access outcomes resampling step can cross-validate performance. case, can’t set skip = TRUE outcomes won’t processed, since access , shouldn’t need . example, used penguin_test recipe (outcome), bill_length_mm wouldn’t get centered forge() called. probably skipped step knew test data outcome.","code":"rec2 <- recipe(bill_length_mm ~ body_mass_g + species, penguin_train) %>% step_dummy(species) %>% step_center(bill_length_mm) # Here we modify the outcome penguin_recipe2 <- mold(rec2, penguin_train) recipe_eng_log_outcome <- penguin_recipe2$blueprint penguin_test_no_outcome <- subset(penguin_test, select = -bill_length_mm) forge(penguin_test_no_outcome, recipe_eng_log_outcome) #> Error in `step_center()`: #> ! The following required column is missing from `new_data` in step #> 'center_UUEdL': bill_length_mm. rec3 <- recipe(bill_length_mm ~ body_mass_g + species, penguin_train) %>% step_dummy(species) %>% step_center(bill_length_mm, skip = TRUE) penguin_recipe3 <- mold(rec3, penguin_train) recipe_eng_skip_outcome <- penguin_recipe3$blueprint forge(penguin_test_no_outcome, recipe_eng_skip_outcome) #> $predictors #> # A tibble: 33 × 3 #> body_mass_g species_Chinstrap species_Gentoo #> #> 1 3900 1 0 #> 2 3850 1 0 #> 3 4800 1 0 #> 4 2700 1 0 #> 5 4500 1 0 #> 6 3950 1 0 #> 7 3650 1 0 #> 8 3550 1 0 #> 9 3500 1 0 #> 10 3675 1 0 #> # ℹ 23 more rows #> #> $outcomes #> NULL #> #> $extras #> $extras$roles #> NULL forge(penguin_test, recipe_eng_skip_outcome, outcomes = TRUE)$outcomes #> # A tibble: 33 × 1 #> bill_length_mm #> #> 1 47.5 #> 2 47.6 #> 3 52 #> 4 46.9 #> 5 53.5 #> 6 49 #> 7 46.2 #> 8 50.9 #> 9 45.5 #> 10 50.9 #> # ℹ 23 more rows # Notice that the `outcome` values haven't been centered # and are the same as before head(penguin_test$bill_length_mm) #> [1] 47.5 47.6 52.0 46.9 53.5 49.0"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Molding data for modeling","text":"modeling functions, data must accepted user format outcomes predictors specified. next step often validate preprocess input way prepare actual modeling implementation function. example, formula method used, R provides infrastructure preprocessing user input model.frame() model.matrix() functions. formula method way specify modeling terms. also XY method, x y supplied directly, , recently, recipe implementation can used preprocess data using set sequential steps. developer, likely won’t want care details methods work, (hopefully) still want provide three interfaces shiny new model. mold() makes easy , takes care details preprocessing user input methods. intended use mold() called user facing modeling function. see action, look vignette found : vignette(\"package\", \"hardhat\"). rest vignette focused various different ways use mold(), keep mind generally used interactive function like .","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"a-first-example","dir":"Articles","previous_headings":"","what":"A First Example","title":"Molding data for modeling","text":"familiar interface R users likely formula interface. case, terms specified using formula notation: outcomes ~ predictors. Generally, developer, call model.frame() model.matrix() result coerce right format ingestion model. mold() handles . mold() returns four things. Two immediately useful, almost always applicable modeling implementation created. first predictors, returned tibble. required processing done , just focus modeling implementation. Second outcomes, also returned tibble. used , processing outcome specified formula also done . Beyond two elements, mold() also returns slot extras might generated preprocessing, aren’t specifically predictors outcomes. example, offset() can specified directly formula, isn’t technically predictor. Lastly, mold() returns important object, blueprint. responsible knowing preprocess training data, new data prediction time. developer, attach blueprint model object returning user. information , see package development vignette, vignette(\"package\", \"hardhat\").","code":"penguin_form <- mold(body_mass_g ~ log(bill_length_mm), penguins) names(penguin_form) #> [1] \"predictors\" \"outcomes\" \"blueprint\" \"extras\" penguin_form$predictors #> # A tibble: 333 × 1 #> `log(bill_length_mm)` #> #> 1 3.67 #> 2 3.68 #> 3 3.70 #> 4 3.60 #> 5 3.67 #> 6 3.66 #> 7 3.67 #> 8 3.72 #> 9 3.65 #> 10 3.54 #> # ℹ 323 more rows penguin_form$outcomes #> # A tibble: 333 × 1 #> body_mass_g #> #> 1 3750 #> 2 3800 #> 3 3250 #> 4 3450 #> 5 3650 #> 6 3625 #> 7 4675 #> 8 3200 #> 9 3800 #> 10 4400 #> # ℹ 323 more rows mold(body_mass_g ~ log(bill_length_mm) + offset(bill_depth_mm), penguins)$extras #> $offset #> # A tibble: 333 × 1 #> .offset #> #> 1 18.7 #> 2 17.4 #> 3 18 #> 4 19.3 #> 5 20.6 #> 6 17.8 #> 7 19.6 #> 8 17.6 #> 9 21.2 #> 10 21.1 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"blueprints","dir":"Articles","previous_headings":"","what":"blueprints","title":"Molding data for modeling","text":"mentioned , one objects mold() returns blueprint responsible controlling preprocessing. multiple blueprints available hardhat, call mold() one selected automatically . following two calls generate result, using default formula blueprint. blueprint can tweaked change processing interface occurs, options vary per blueprint. understand ’d ever want , read !","code":"identical( mold(~ body_mass_g, penguins), mold(~ body_mass_g, penguins, blueprint = default_formula_blueprint()) ) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"formulas","dir":"Articles","previous_headings":"","what":"Formulas","title":"Molding data for modeling","text":"Now basic idea mold() works, can talk interesting functionality.","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"intercepts","dir":"Articles","previous_headings":"Formulas","what":"Intercepts","title":"Molding data for modeling","text":"One challenge standard formula interface , default, intercepts always implicitly present added data set automatically. works great simple regression case. However, models might either always require never allow intercept, still use formula interface convenience (example, earth). led many ad hoc solutions prevent user removing adding intercept. get around , mold() never add intercept default. Instead, addition intercept completely controlled formula blueprint argument, intercept. error thrown intercept removal term specified:","code":"no_intercept <- mold(~ body_mass_g, penguins) no_intercept$predictors #> # A tibble: 333 × 1 #> body_mass_g #> #> 1 3750 #> 2 3800 #> 3 3250 #> 4 3450 #> 5 3650 #> 6 3625 #> 7 4675 #> 8 3200 #> 9 3800 #> 10 4400 #> # ℹ 323 more rows with_intercept <- mold( ~ body_mass_g, penguins, blueprint = default_formula_blueprint(intercept = TRUE) ) with_intercept$predictors #> # A tibble: 333 × 2 #> `(Intercept)` body_mass_g #> #> 1 1 3750 #> 2 1 3800 #> 3 1 3250 #> 4 1 3450 #> 5 1 3650 #> 6 1 3625 #> 7 1 4675 #> 8 1 3200 #> 9 1 3800 #> 10 1 4400 #> # ℹ 323 more rows mold(~ body_mass_g - 1, penguins) #> Error in `mold_formula_default_clean()`: #> ! `formula` must not contain the intercept removal term: `- 1`. mold(~ body_mass_g + 0, penguins) #> Error in `mold_formula_default_clean()`: #> ! `formula` must not contain the intercept removal term: `+ 0` or #> `0 +`."},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"dummy-variables","dir":"Articles","previous_headings":"Formulas","what":"Dummy variables","title":"Molding data for modeling","text":"One nice things formula interface expands factors dummy variable columns . Like intercepts, great…isn’t. example, ranger fits random forest, can take factors directly, still uses formula notation. case, great factor columns specified predictors weren’t expanded. job blueprint argument, indicators. Note: ’s worth mentioning intercept present, base R expands first factor completely K indicator columns corresponding K levels present factor (also known one-hot encoding). Subsequent columns expanded traditional K - 1 columns. intercept present, K - 1 columns generated factor predictors.","code":"expanded_dummies <- mold(~ body_mass_g + species, penguins) expanded_dummies$predictors #> # A tibble: 333 × 4 #> body_mass_g speciesAdelie speciesChinstrap speciesGentoo #> #> 1 3750 1 0 0 #> 2 3800 1 0 0 #> 3 3250 1 0 0 #> 4 3450 1 0 0 #> 5 3650 1 0 0 #> 6 3625 1 0 0 #> 7 4675 1 0 0 #> 8 3200 1 0 0 #> 9 3800 1 0 0 #> 10 4400 1 0 0 #> # ℹ 323 more rows non_expanded_dummies <- mold( ~ body_mass_g + species, penguins, blueprint = default_formula_blueprint(indicators = \"none\") ) non_expanded_dummies$predictors #> # A tibble: 333 × 2 #> body_mass_g species #> #> 1 3750 Adelie #> 2 3800 Adelie #> 3 3250 Adelie #> 4 3450 Adelie #> 5 3650 Adelie #> 6 3625 Adelie #> 7 4675 Adelie #> 8 3200 Adelie #> 9 3800 Adelie #> 10 4400 Adelie #> # ℹ 323 more rows k_cols <- mold(~ species, penguins) k_minus_one_cols <- mold( ~ species, penguins, blueprint = default_formula_blueprint(intercept = TRUE) ) colnames(k_cols$predictors) #> [1] \"speciesAdelie\" \"speciesChinstrap\" \"speciesGentoo\" colnames(k_minus_one_cols$predictors) #> [1] \"(Intercept)\" \"speciesChinstrap\" \"speciesGentoo\""},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"multivariate-outcomes","dir":"Articles","previous_headings":"Formulas","what":"Multivariate outcomes","title":"Molding data for modeling","text":"One frustrating things working formula method multivariate outcomes bit clunky specify. might look like 3 columns, actually 2, first column named cbind(body_mass_g, bill_length_mm), actually matrix 2 columns, body_mass_g bill_length_mm inside . default formula blueprint used mold() allows specify multiple outcomes like specify multiple predictors. can even inline transformations outcome, although much , ’d advise using recipe instead. outcomes holds two outcomes columns.","code":".f <- cbind(body_mass_g, bill_length_mm) ~ bill_depth_mm frame <- model.frame(.f, penguins) head(frame) #> cbind(body_mass_g, bill_length_mm).body_mass_g #> 1 3750.0 #> 2 3800.0 #> 3 3250.0 #> 4 3450.0 #> 5 3650.0 #> 6 3625.0 #> cbind(body_mass_g, bill_length_mm).bill_length_mm bill_depth_mm #> 1 39.1 18.7 #> 2 39.5 17.4 #> 3 40.3 18.0 #> 4 36.7 19.3 #> 5 39.3 20.6 #> 6 38.9 17.8 ncol(frame) #> [1] 2 class(frame$`cbind(body_mass_g, bill_length_mm)`) #> [1] \"matrix\" \"array\" head(frame$`cbind(body_mass_g, bill_length_mm)`) #> body_mass_g bill_length_mm #> [1,] 3750 39.1 #> [2,] 3800 39.5 #> [3,] 3250 40.3 #> [4,] 3450 36.7 #> [5,] 3650 39.3 #> [6,] 3625 38.9 multivariate <- mold(body_mass_g + log(bill_length_mm) ~ bill_depth_mm, penguins) multivariate$outcomes #> # A tibble: 333 × 2 #> body_mass_g `log(bill_length_mm)` #> #> 1 3750 3.67 #> 2 3800 3.68 #> 3 3250 3.70 #> 4 3450 3.60 #> 5 3650 3.67 #> 6 3625 3.66 #> 7 4675 3.67 #> 8 3200 3.72 #> 9 3800 3.65 #> 10 4400 3.54 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"xy","dir":"Articles","previous_headings":"","what":"XY","title":"Molding data for modeling","text":"second interface XY interface, useful predictors outcomes specified separately. interface doesn’t much way preprocessing, let specify intercept blueprint specific arguments. Rather default_formula_blueprint(), uses default_xy_blueprint().","code":"x <- subset(penguins, select = -body_mass_g) y <- subset(penguins, select = body_mass_g) penguin_xy <- mold(x, y) penguin_xy$predictors #> # A tibble: 333 × 6 #> species island bill_length_mm bill_depth_mm flipper_length_mm sex #> #> 1 Adelie Torgersen 39.1 18.7 181 male #> 2 Adelie Torgersen 39.5 17.4 186 female #> 3 Adelie Torgersen 40.3 18 195 female #> 4 Adelie Torgersen 36.7 19.3 193 female #> 5 Adelie Torgersen 39.3 20.6 190 male #> 6 Adelie Torgersen 38.9 17.8 181 female #> 7 Adelie Torgersen 39.2 19.6 195 male #> 8 Adelie Torgersen 41.1 17.6 182 female #> 9 Adelie Torgersen 38.6 21.2 191 male #> 10 Adelie Torgersen 34.6 21.1 198 male #> # ℹ 323 more rows penguin_xy$outcomes #> # A tibble: 333 × 1 #> body_mass_g #> #> 1 3750 #> 2 3800 #> 3 3250 #> 4 3450 #> 5 3650 #> 6 3625 #> 7 4675 #> 8 3200 #> 9 3800 #> 10 4400 #> # ℹ 323 more rows xy_with_intercept <- mold(x, y, blueprint = default_xy_blueprint(intercept = TRUE)) xy_with_intercept$predictors #> # A tibble: 333 × 7 #> `(Intercept)` species island bill_length_mm bill_depth_mm #> #> 1 1 Adelie Torgersen 39.1 18.7 #> 2 1 Adelie Torgersen 39.5 17.4 #> 3 1 Adelie Torgersen 40.3 18 #> 4 1 Adelie Torgersen 36.7 19.3 #> 5 1 Adelie Torgersen 39.3 20.6 #> 6 1 Adelie Torgersen 38.9 17.8 #> 7 1 Adelie Torgersen 39.2 19.6 #> 8 1 Adelie Torgersen 41.1 17.6 #> 9 1 Adelie Torgersen 38.6 21.2 #> 10 1 Adelie Torgersen 34.6 21.1 #> # ℹ 323 more rows #> # ℹ 2 more variables: flipper_length_mm , sex "},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"vector-outcomes","dir":"Articles","previous_headings":"XY","what":"Vector outcomes","title":"Molding data for modeling","text":"y bit special XY interface, univariate case users might expect able pass vector, 1 column data frame, matrix. mold() prepared cases, vector case requires special attention. consistent mold() interfaces, outcomes slot return value tibble. achieve y supplied vector, default column name created, \".outcome\".","code":"mold(x, y$body_mass_g)$outcomes #> # A tibble: 333 × 1 #> .outcome #> #> 1 3750 #> 2 3800 #> 3 3250 #> 4 3450 #> 5 3650 #> 6 3625 #> 7 4675 #> 8 3200 #> 9 3800 #> 10 4400 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"recipe","dir":"Articles","previous_headings":"","what":"Recipe","title":"Molding data for modeling","text":"last three interfaces relatively new recipes interface. default_recipe_blueprint() knows prep() recipe, juice() extract predictors outcomes. far flexible way preprocess data. special thing can tweak recipe blueprint whether intercept added.","code":"library(recipes) rec <- recipe(bill_length_mm ~ species + bill_depth_mm, penguins) %>% step_log(bill_length_mm) %>% step_dummy(species) penguin_recipe <- mold(rec, penguins) penguin_recipe$predictors #> # A tibble: 333 × 3 #> bill_depth_mm species_Chinstrap species_Gentoo #> #> 1 18.7 0 0 #> 2 17.4 0 0 #> 3 18 0 0 #> 4 19.3 0 0 #> 5 20.6 0 0 #> 6 17.8 0 0 #> 7 19.6 0 0 #> 8 17.6 0 0 #> 9 21.2 0 0 #> 10 21.1 0 0 #> # ℹ 323 more rows penguin_recipe$outcomes #> # A tibble: 333 × 1 #> bill_length_mm #> #> 1 3.67 #> 2 3.68 #> 3 3.70 #> 4 3.60 #> 5 3.67 #> 6 3.66 #> 7 3.67 #> 8 3.72 #> 9 3.65 #> 10 3.54 #> # ℹ 323 more rows recipe_with_intercept <- mold( rec, penguins, blueprint = default_recipe_blueprint(intercept = TRUE) ) recipe_with_intercept$predictors #> # A tibble: 333 × 4 #> `(Intercept)` bill_depth_mm species_Chinstrap species_Gentoo #> #> 1 1 18.7 0 0 #> 2 1 17.4 0 0 #> 3 1 18 0 0 #> 4 1 19.3 0 0 #> 5 1 20.6 0 0 #> 6 1 17.8 0 0 #> 7 1 19.6 0 0 #> 8 1 17.6 0 0 #> 9 1 21.2 0 0 #> 10 1 21.1 0 0 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Creating Modeling Packages With hardhat","text":"goal vignette teach use mold() forge() modeling package. intended use functions, even though can also called interactively. Creating new modeling package two main stages: creating model fitting function, implementing predict method. stages break like : Stage 1 - Model Fitting Create model constructor. Create fitting implementation function. Create common bridge go high level user facing methods lower level constructor implementation function. Create user facing function methods data frame, matrix, formula, recipe inputs. imagine comes together internal pieces power user facing methods model. Stage 2 - Model Prediction Create one prediction implementation functions, varying \"type\" prediction make. Create common bridge high level predict method lower level prediction implementation functions. Create user facing predict method. case, 2 user facing methods. Many models multiple internal implementation functions ’ll switch , depending \"type\". end result single high level modeling function methods multiple different “interfaces”, corresponding predict method make predictions using one models along new data (“interfaces”, just mean different types inputs, : data frame, matrix, formula recipe). obviously things might want modeling package . instance, might implement plot() summary() method. two stages described necessary almost every model, involve inputs outputs hardhat helps .","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"whats-our-model","dir":"Articles","previous_headings":"Introduction","what":"What’s Our Model?","title":"Creating Modeling Packages With hardhat","text":"use underlying lm() infrastructure, lm.fit(), create model. Linear regression recognizable many, can focus understanding mold() forge() fit bigger picture, rather trying understand model works. lm.fit() takes x y directly, rather using formula method. serve core part modeling implementation function. generally, easiest core implementation function algorithm takes x y manner, since mold() standardize inputs. call model simple_lm(). won’t features normal linear regression (weights, offsets, etc), serve nice dummy model show features get hardhat.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"model-constructor","dir":"Articles","previous_headings":"Model Fitting","what":"Model Constructor","title":"Creating Modeling Packages With hardhat","text":"first thing need modeling constructor. Constructors simple functions creates new objects model class. arguments constructor supply individual pieces, wraps model object. hardhat function new_model() can help creating . model constructor : name new_(). Take required model elements named arguments, including required blueprint. Validate types new elements. Pass named elements new_model() along setting class \"\". want learn details constructors creating S3 classes, take look S3 section Advanced R. \"simple_lm\" object just enough information make numeric predictions new data, can store things well enable model object work extra post-fitting functionality. can test manually generating model object. Along custom class provided, object also \"hardhat_model\" class. simple print method objects type. Specifically, prints name class top, prints custom elements (.e. blueprint).","code":"new_simple_lm <- function(coefs, coef_names, blueprint) { if (!is.numeric(coefs)) { stop(\"`coefs` should be a numeric vector.\", call. = FALSE) } if (!is.character(coef_names)) { stop(\"`coef_names` should be a character vector.\", call. = FALSE) } if (length(coefs) != length(coef_names)) { stop(\"`coefs` and `coef_names` must have the same length.\") } new_model( coefs = coefs, coef_names = coef_names, blueprint = blueprint, class = \"simple_lm\" ) } manual_model <- new_simple_lm(1, \"my_coef\", default_xy_blueprint()) manual_model #> #> $coefs #> [1] 1 #> #> $coef_names #> [1] \"my_coef\" names(manual_model) #> [1] \"coefs\" \"coef_names\" \"blueprint\" manual_model$blueprint #> XY blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"model-fitting-implementation","dir":"Articles","previous_headings":"Model Fitting","what":"Model Fitting Implementation","title":"Creating Modeling Packages With hardhat","text":"implementation function hard work done. generally recommend naming _impl(). accept predictors outcomes whatever form required algorithm, run algorithm, return named list new elements added model constructor. might also arguments extra options can used tweak internal algorithm. simple linear regression implementation just calls lm.fit() x = predictors y = outcomes. lm.fit() expects matrix predictors vector outcomes (least univariate regression). moment discuss create .","code":"simple_lm_impl <- function(predictors, outcomes) { lm_fit <- lm.fit(predictors, outcomes) coefs <- lm_fit$coefficients coef_names <- names(coefs) coefs <- unname(coefs) list( coefs = coefs, coef_names = coef_names ) } predictors <- as.matrix(subset(penguins, select = bill_length_mm)) outcomes <- penguins$body_mass_g simple_lm_impl(predictors, outcomes) #> $coefs #> [1] 95.49649 #> #> $coef_names #> [1] \"bill_length_mm\""},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"model-fitting-bridge","dir":"Articles","previous_headings":"Model Fitting","what":"Model Fitting Bridge","title":"Creating Modeling Packages With hardhat","text":"Now constructor implementation function, can create common function used top level methods (data frames, matrices, formulas, recipes). call implementation function, use information along blueprint create new instance model. argument output call mold(), ’ve called processed. object (minimum) predictors, outcomes, blueprint. might also arguments additional options pass implementation function. bridge function take standardized predictors outcomes convert lower level types implementation function requires. predictors outcomes returned mold() always data frames, case can convert matrices vectors directly use lower level function. also good place use hardhat’s validation functions. case, always expect outcome single column since univariate model, can use validate_outcomes_is_univariate() enforce . point, can simulate user input pass bridge run model. Multiple outcomes error:","code":"simple_lm_bridge <- function(processed) { validate_outcomes_are_univariate(processed$outcomes) predictors <- as.matrix(processed$predictors) outcomes <- processed$outcomes[[1]] fit <- simple_lm_impl(predictors, outcomes) new_simple_lm( coefs = fit$coefs, coef_names = fit$coef_names, blueprint = processed$blueprint ) } # Simulate formula interface processed_1 <- mold(bill_length_mm ~ body_mass_g + species, penguins) # Simulate xy interface processed_2 <- mold(x = penguins[\"body_mass_g\"], y = penguins$bill_length_mm) simple_lm_bridge(processed_1) #> #> $coefs #> [1] 0.003754612 24.908763524 34.817525835 28.447942512 #> #> $coef_names #> [1] \"body_mass_g\" \"speciesAdelie\" \"speciesChinstrap\" #> [4] \"speciesGentoo\" simple_lm_bridge(processed_2) #> #> $coefs #> [1] 0.01022951 #> #> $coef_names #> [1] \"body_mass_g\" multi_outcome <- mold(bill_length_mm + bill_depth_mm ~ body_mass_g + species, penguins) simple_lm_bridge(multi_outcome) #> Error in `validate_outcomes_are_univariate()`: #> ! The outcome must be univariate, but 2 columns were found."},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"user-facing-fitting-function","dir":"Articles","previous_headings":"Model Fitting","what":"User Facing Fitting Function","title":"Creating Modeling Packages With hardhat","text":"pieces place, everything need create high level modeling interface. generic function, generally methods data frames, matrices, formulas, recipes. method call mold() method specific inputs run preprocessing, pass bridge function run actual model. also good practice provide default method nice error message unknown types. Let’s give try: can use preprocessing well, handled mold().","code":"# Generic simple_lm <- function(x, ...) { UseMethod(\"simple_lm\") } # Default simple_lm.default <- function(x, ...) { stop( \"`simple_lm()` is not defined for a '\", class(x)[1], \"'.\", call. = FALSE ) } # XY method - data frame simple_lm.data.frame <- function(x, y, ...) { processed <- mold(x, y) simple_lm_bridge(processed) } # XY method - matrix simple_lm.matrix <- function(x, y, ...) { processed <- mold(x, y) simple_lm_bridge(processed) } # Formula method simple_lm.formula <- function(formula, data, ...) { processed <- mold(formula, data) simple_lm_bridge(processed) } # Recipe method simple_lm.recipe <- function(x, data, ...) { processed <- mold(x, data) simple_lm_bridge(processed) } predictors <- penguins[c(\"bill_length_mm\", \"bill_depth_mm\")] outcomes_vec <- penguins$body_mass_g outcomes_df <- penguins[\"body_mass_g\"] # Vector outcome simple_lm(predictors, outcomes_vec) #> #> $coefs #> [1] 110.88151 -40.16918 #> #> $coef_names #> [1] \"bill_length_mm\" \"bill_depth_mm\" # 1 column data frame outcome simple_lm(predictors, outcomes_df) #> #> $coefs #> [1] 110.88151 -40.16918 #> #> $coef_names #> [1] \"bill_length_mm\" \"bill_depth_mm\" # Formula interface simple_lm(body_mass_g ~ bill_length_mm + bill_depth_mm, penguins) #> #> $coefs #> [1] 110.88151 -40.16918 #> #> $coef_names #> [1] \"bill_length_mm\" \"bill_depth_mm\" library(recipes) # - Log a predictor # - Generate dummy variables for factors simple_lm(body_mass_g ~ log(bill_length_mm) + species, penguins) #> #> $coefs #> [1] 3985.047 -10865.973 -11753.182 -10290.188 #> #> $coef_names #> [1] \"log(bill_length_mm)\" \"speciesAdelie\" \"speciesChinstrap\" #> [4] \"speciesGentoo\" # Same, but with a recipe rec <- recipe(body_mass_g ~ bill_length_mm + species, penguins) %>% step_log(bill_length_mm) %>% step_dummy(species, one_hot = TRUE) simple_lm(rec, penguins) #> #> $coefs #> [1] 3985.047 -10865.973 -11753.182 -10290.188 #> #> $coef_names #> [1] \"bill_length_mm\" \"species_Adelie\" \"species_Chinstrap\" #> [4] \"species_Gentoo\""},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"adding-an-intercept-option","dir":"Articles","previous_headings":"Model Fitting > User Facing Fitting Function","what":"Adding an Intercept Option","title":"Creating Modeling Packages With hardhat","text":"might noticed linear regression isn’t adding intercept. Generally, linear regression models want default intercept added . accomplish , can add intercept argument user facing function, use tweak blueprint otherwise created automatically. Note even formula method respects intercept argument. recap, default mold() automatically add intercept method, including formula method.","code":"simple_lm <- function(x, ...) { UseMethod(\"simple_lm\") } simple_lm.data.frame <- function(x, y, intercept = TRUE, ...) { blueprint <- default_xy_blueprint(intercept = intercept) processed <- mold(x, y, blueprint = blueprint) simple_lm_bridge(processed) } simple_lm.matrix <- function(x, y, intercept = TRUE,...) { blueprint <- default_xy_blueprint(intercept = intercept) processed <- mold(x, y, blueprint = blueprint) simple_lm_bridge(processed) } simple_lm.formula <- function(formula, data, intercept = TRUE, ...) { blueprint <- default_formula_blueprint(intercept = intercept) processed <- mold(formula, data, blueprint = blueprint) simple_lm_bridge(processed) } simple_lm.recipe <- function(x, data, intercept = TRUE, ...) { blueprint <- default_recipe_blueprint(intercept = intercept) processed <- mold(x, data, blueprint = blueprint) simple_lm_bridge(processed) } # By default an intercept is included simple_lm(predictors, outcomes_df) #> #> $coefs #> [1] 3413.45185 74.81263 -145.50718 #> #> $coef_names #> [1] \"(Intercept)\" \"bill_length_mm\" \"bill_depth_mm\" # But the user can turn this off simple_lm(body_mass_g ~ log(bill_length_mm) + species, penguins, intercept = FALSE) #> #> $coefs #> [1] 3985.047 -10865.973 -11753.182 -10290.188 #> #> $coef_names #> [1] \"log(bill_length_mm)\" \"speciesAdelie\" \"speciesChinstrap\" #> [4] \"speciesGentoo\""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"prediction-implementation","dir":"Articles","previous_headings":"Model Prediction","what":"Prediction Implementation","title":"Creating Modeling Packages With hardhat","text":"prediction side, need implementation functions like fitting model. vary based \"type\" argument predict(). \"type\" might \"numeric\" numeric predictions use , \"class\" hard class predictions, \"prob\" class probabilities, . set recommended names \"type\" can found Model Predictions section implementation principles. model, focus returning numeric predictions. generally like name prediction implementation functions predict__(). arguments implementation functions include model object predictors form prediction algorithm expects (, matrix). Also used another hardhat function standardizing prediction output, spruce_numeric(). function tidies numeric response output, automatically standardizes match recommendations principles guide. output always tibble, \"numeric\" type 1 column, .pred. test , run model call forge() output manually. higher level user facing function automatically.","code":"predict_simple_lm_numeric <- function(object, predictors) { coefs <- object$coefs pred <- as.vector(predictors %*% coefs) out <- spruce_numeric(pred) out } model <- simple_lm(bill_length_mm ~ body_mass_g + species, penguins) predictors <- forge(penguins, model$blueprint)$predictors predictors <- as.matrix(predictors) predict_simple_lm_numeric(model, predictors) #> # A tibble: 333 × 1 #> .pred #> #> 1 39.0 #> 2 39.2 #> 3 37.1 #> 4 37.9 #> 5 38.6 #> 6 38.5 #> 7 42.5 #> 8 36.9 #> 9 39.2 #> 10 41.4 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"prediction-bridge","dir":"Articles","previous_headings":"Model Prediction","what":"Prediction Bridge","title":"Creating Modeling Packages With hardhat","text":"prediction bridge converts standardized predictors lower level type prediction implementation functions expect. predictors always data frame, part return value call forge(). Since prediction implementation function takes matrix, convert . Additionally, switch() type argument decide prediction implementation functions call. , type == \"numeric\", predict_simple_lm_numeric() called. also like using rlang::arg_match() validate type one accepted prediction types. advantage match.arg() partial matches allowed, error messages bit nicer. Let’s test:","code":"predict_simple_lm_bridge <- function(type, object, predictors) { type <- rlang::arg_match(type, \"numeric\") predictors <- as.matrix(predictors) switch( type, numeric = predict_simple_lm_numeric(object, predictors) ) } model <- simple_lm(bill_length_mm ~ body_mass_g + species, penguins) # Pass in the data frame predictors <- forge(penguins, model$blueprint)$predictors predict_simple_lm_bridge(\"numeric\", model, predictors) #> # A tibble: 333 × 1 #> .pred #> #> 1 39.0 #> 2 39.2 #> 3 37.1 #> 4 37.9 #> 5 38.6 #> 6 38.5 #> 7 42.5 #> 8 36.9 #> 9 39.2 #> 10 41.4 #> # ℹ 323 more rows # Partial matches are an error predict_simple_lm_bridge(\"numer\", model, predictors) #> Error in `predict_simple_lm_bridge()`: #> ! `type` must be one of \"numeric\", not \"numer\". #> ℹ Did you mean \"numeric\"?"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"user-facing-prediction-function","dir":"Articles","previous_headings":"Model Prediction","what":"User Facing Prediction Function","title":"Creating Modeling Packages With hardhat","text":"Finally, can create S3 method generic predict() function. match modeling principles, use new_data accept matrix data frame new predictors. first thing predict() method call forge() new_data blueprint attached simple_lm model fit time. performs required preprocessing new data, checks type data frame supplied new_data matches type data frame supplied fit time. one valuable features forge(), adds large amount robustness predict()ion function. see examples end vignette. calling forge(), pass bridge function call correct prediction function based type. Finally, good practice call hardhat function, validate_prediction_size(), return value original new_data ensure number rows output number rows input. prediction made row new_data, NA value placed instead. Mainly, validation function check model developer ensure always return output sane length.","code":"predict.simple_lm <- function(object, new_data, type = \"numeric\", ...) { # Enforces column order, type, column names, etc processed <- forge(new_data, object$blueprint) out <- predict_simple_lm_bridge(type, object, processed$predictors) validate_prediction_size(out, new_data) out }"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"final-testing","dir":"Articles","previous_headings":"","what":"Final Testing","title":"Creating Modeling Packages With hardhat","text":"Finally, can test top level modeling function along corresponding predict() method. using forge(), automatically get powerful type checking ensure new_data form expect.","code":"model <- simple_lm(bill_length_mm ~ log(body_mass_g) + species, penguins) predict(model, penguins) #> # A tibble: 333 × 1 #> .pred #> #> 1 39.1 #> 2 39.3 #> 3 36.9 #> 4 37.9 #> 5 38.7 #> 6 38.6 #> 7 42.5 #> 8 36.7 #> 9 39.3 #> 10 41.5 #> # ℹ 323 more rows # `new_data` isn't a data frame predict(model, penguins$species) #> Error in `forge()`: #> ! The class of `new_data`, 'factor', is not recognized. # Missing a required column predict(model, subset(penguins, select = -body_mass_g)) #> Error in `validate_column_names()`: #> ! The following required columns are missing: 'body_mass_g'. # In this case, 'species' is a character, # but can be losslessy converted to a factor. # That happens for you automatically and silently. penguins_chr_species <- transform(penguins, species = as.character(species)) predict(model, penguins_chr_species) #> # A tibble: 333 × 1 #> .pred #> #> 1 39.1 #> 2 39.3 #> 3 36.9 #> 4 37.9 #> 5 38.7 #> 6 38.6 #> 7 42.5 #> 8 36.7 #> 9 39.3 #> 10 41.5 #> # ℹ 323 more rows # Slightly different from above. Here, 'species' is a character, # AND has an extra unexpected factor level. It is # removed with a warning, but you still get a factor # with the correct levels penguins_chr_bad_species <- penguins_chr_species penguins_chr_bad_species$species[1] <- \"new_level\" predict(model, penguins_chr_bad_species) #> Warning: Novel levels found in column 'species': 'new_level'. The levels #> have been removed, and values have been coerced to 'NA'. #> # A tibble: 333 × 1 #> .pred #> #> 1 NA #> 2 39.3 #> 3 36.9 #> 4 37.9 #> 5 38.7 #> 6 38.6 #> 7 42.5 #> 8 36.7 #> 9 39.3 #> 10 41.5 #> # ℹ 323 more rows # This case throws an error. # Here, 'species' is a double and # when it should have been a factor. # You can't cast a double to a factor! penguins_dbl_species <- transform(penguins, species = 1) predict(model, penguins_dbl_species) #> Error in `scream()`: #> ! Can't convert `data$species` to match type of `species` >."},{"path":"https://hardhat.tidymodels.org/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Hannah Frick. Author, maintainer. Davis Vaughan. Author. Max Kuhn. Author. . Copyright holder, funder.","code":""},{"path":"https://hardhat.tidymodels.org/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Frick H, Vaughan D, Kuhn M (2024). hardhat: Construct Modeling Packages. R package version 1.4.0.9002, https://hardhat.tidymodels.org, https://github.com/tidymodels/hardhat.","code":"@Manual{, title = {hardhat: Construct Modeling Packages}, author = {Hannah Frick and Davis Vaughan and Max Kuhn}, year = {2024}, note = {R package version 1.4.0.9002, https://hardhat.tidymodels.org}, url = {https://github.com/tidymodels/hardhat}, }"},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/index.html","id":"introduction","dir":"","previous_headings":"","what":"Introduction","title":"Construct Modeling Packages","text":"hardhat developer focused package designed ease creation new modeling packages, simultaneously promoting good R modeling package standards laid set opinionated Conventions R Modeling Packages. hardhat four main goals: Easily, consistently, robustly preprocess data fit time prediction time mold() forge(). Provide one source truth common input validation functions, checking new data prediction time contains required columns used fit time. Provide extra utility functions additional common tasks, adding intercept columns, standardizing predict() output, extracting valuable class factor level information predictors. Reimagine base R preprocessing infrastructure stats::model.matrix() stats::model.frame() using stricter approaches found model_matrix() model_frame(). idea reduce burden creating good modeling interface much possible, instead let package developer focus writing core implementation new model. benefits developer, also user modeling package, standardization allows users build set “expectations” around modeling function return, interact .","code":""},{"path":"https://hardhat.tidymodels.org/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Construct Modeling Packages","text":"can install released version hardhat CRAN : development version GitHub :","code":"install.packages(\"hardhat\") # install.packages(\"pak\") pak::pak(\"tidymodels/hardhat\")"},{"path":"https://hardhat.tidymodels.org/dev/index.html","id":"learning-more","dir":"","previous_headings":"","what":"Learning more","title":"Construct Modeling Packages","text":"learn use hardhat, check vignettes: vignette(\"mold\", \"hardhat\"): Learn preprocess data fit time mold(). vignette(\"forge\", \"hardhat\"): Learn preprocess new data prediction time forge(). vignette(\"package\", \"hardhat\"): Learn use mold() forge() help creating new modeling package. can also watch Max Kuhn discuss use hardhat build new modeling package scratch XI Jornadas de Usuarios de R conference .","code":""},{"path":"https://hardhat.tidymodels.org/dev/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Construct Modeling Packages","text":"project released Contributor Code Conduct. contributing project, agree abide terms. questions discussions tidymodels packages, modeling, machine learning, please post RStudio Community. think encountered bug, please submit issue. Either way, learn create share reprex (minimal, reproducible example), clearly communicate code. Check details contributing guidelines tidymodels packages get help.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":null,"dir":"Reference","previous_headings":"","what":"Add an intercept column to data — add_intercept_column","title":"Add an intercept column to data — add_intercept_column","text":"function adds integer column 1's data.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add an intercept column to data — add_intercept_column","text":"","code":"add_intercept_column(data, name = \"(Intercept)\")"},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add an intercept column to data — add_intercept_column","text":"data data frame matrix. name name intercept column. Defaults \"(Intercept)\", name stats::lm() uses.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add an intercept column to data — add_intercept_column","text":"data intercept column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add an intercept column to data — add_intercept_column","text":"column named name already exists data, data returned unchanged warning issued.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add an intercept column to data — add_intercept_column","text":"","code":"add_intercept_column(mtcars) #> (Intercept) mpg cyl disp hp drat wt qsec vs am #> Mazda RX4 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 #> Mazda RX4 Wag 1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 #> Datsun 710 1 22.8 4 108.0 93 3.85 2.320 18.61 1 1 #> Hornet 4 Drive 1 21.4 6 258.0 110 3.08 3.215 19.44 1 0 #> Hornet Sportabout 1 18.7 8 360.0 175 3.15 3.440 17.02 0 0 #> Valiant 1 18.1 6 225.0 105 2.76 3.460 20.22 1 0 #> Duster 360 1 14.3 8 360.0 245 3.21 3.570 15.84 0 0 #> Merc 240D 1 24.4 4 146.7 62 3.69 3.190 20.00 1 0 #> Merc 230 1 22.8 4 140.8 95 3.92 3.150 22.90 1 0 #> Merc 280 1 19.2 6 167.6 123 3.92 3.440 18.30 1 0 #> Merc 280C 1 17.8 6 167.6 123 3.92 3.440 18.90 1 0 #> Merc 450SE 1 16.4 8 275.8 180 3.07 4.070 17.40 0 0 #> Merc 450SL 1 17.3 8 275.8 180 3.07 3.730 17.60 0 0 #> Merc 450SLC 1 15.2 8 275.8 180 3.07 3.780 18.00 0 0 #> Cadillac Fleetwood 1 10.4 8 472.0 205 2.93 5.250 17.98 0 0 #> Lincoln Continental 1 10.4 8 460.0 215 3.00 5.424 17.82 0 0 #> Chrysler Imperial 1 14.7 8 440.0 230 3.23 5.345 17.42 0 0 #> Fiat 128 1 32.4 4 78.7 66 4.08 2.200 19.47 1 1 #> Honda Civic 1 30.4 4 75.7 52 4.93 1.615 18.52 1 1 #> Toyota Corolla 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 #> Toyota Corona 1 21.5 4 120.1 97 3.70 2.465 20.01 1 0 #> Dodge Challenger 1 15.5 8 318.0 150 2.76 3.520 16.87 0 0 #> AMC Javelin 1 15.2 8 304.0 150 3.15 3.435 17.30 0 0 #> Camaro Z28 1 13.3 8 350.0 245 3.73 3.840 15.41 0 0 #> Pontiac Firebird 1 19.2 8 400.0 175 3.08 3.845 17.05 0 0 #> Fiat X1-9 1 27.3 4 79.0 66 4.08 1.935 18.90 1 1 #> Porsche 914-2 1 26.0 4 120.3 91 4.43 2.140 16.70 0 1 #> Lotus Europa 1 30.4 4 95.1 113 3.77 1.513 16.90 1 1 #> Ford Pantera L 1 15.8 8 351.0 264 4.22 3.170 14.50 0 1 #> Ferrari Dino 1 19.7 6 145.0 175 3.62 2.770 15.50 0 1 #> Maserati Bora 1 15.0 8 301.0 335 3.54 3.570 14.60 0 1 #> Volvo 142E 1 21.4 4 121.0 109 4.11 2.780 18.60 1 1 #> gear carb #> Mazda RX4 4 4 #> Mazda RX4 Wag 4 4 #> Datsun 710 4 1 #> Hornet 4 Drive 3 1 #> Hornet Sportabout 3 2 #> Valiant 3 1 #> Duster 360 3 4 #> Merc 240D 4 2 #> Merc 230 4 2 #> Merc 280 4 4 #> Merc 280C 4 4 #> Merc 450SE 3 3 #> Merc 450SL 3 3 #> Merc 450SLC 3 3 #> Cadillac Fleetwood 3 4 #> Lincoln Continental 3 4 #> Chrysler Imperial 3 4 #> Fiat 128 4 1 #> Honda Civic 4 2 #> Toyota Corolla 4 1 #> Toyota Corona 3 1 #> Dodge Challenger 3 2 #> AMC Javelin 3 2 #> Camaro Z28 3 4 #> Pontiac Firebird 3 2 #> Fiat X1-9 4 1 #> Porsche 914-2 5 2 #> Lotus Europa 5 2 #> Ford Pantera L 5 4 #> Ferrari Dino 5 6 #> Maserati Bora 5 8 #> Volvo 142E 4 2 add_intercept_column(mtcars, \"intercept\") #> intercept mpg cyl disp hp drat wt qsec vs am #> Mazda RX4 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 #> Mazda RX4 Wag 1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 #> Datsun 710 1 22.8 4 108.0 93 3.85 2.320 18.61 1 1 #> Hornet 4 Drive 1 21.4 6 258.0 110 3.08 3.215 19.44 1 0 #> Hornet Sportabout 1 18.7 8 360.0 175 3.15 3.440 17.02 0 0 #> Valiant 1 18.1 6 225.0 105 2.76 3.460 20.22 1 0 #> Duster 360 1 14.3 8 360.0 245 3.21 3.570 15.84 0 0 #> Merc 240D 1 24.4 4 146.7 62 3.69 3.190 20.00 1 0 #> Merc 230 1 22.8 4 140.8 95 3.92 3.150 22.90 1 0 #> Merc 280 1 19.2 6 167.6 123 3.92 3.440 18.30 1 0 #> Merc 280C 1 17.8 6 167.6 123 3.92 3.440 18.90 1 0 #> Merc 450SE 1 16.4 8 275.8 180 3.07 4.070 17.40 0 0 #> Merc 450SL 1 17.3 8 275.8 180 3.07 3.730 17.60 0 0 #> Merc 450SLC 1 15.2 8 275.8 180 3.07 3.780 18.00 0 0 #> Cadillac Fleetwood 1 10.4 8 472.0 205 2.93 5.250 17.98 0 0 #> Lincoln Continental 1 10.4 8 460.0 215 3.00 5.424 17.82 0 0 #> Chrysler Imperial 1 14.7 8 440.0 230 3.23 5.345 17.42 0 0 #> Fiat 128 1 32.4 4 78.7 66 4.08 2.200 19.47 1 1 #> Honda Civic 1 30.4 4 75.7 52 4.93 1.615 18.52 1 1 #> Toyota Corolla 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 #> Toyota Corona 1 21.5 4 120.1 97 3.70 2.465 20.01 1 0 #> Dodge Challenger 1 15.5 8 318.0 150 2.76 3.520 16.87 0 0 #> AMC Javelin 1 15.2 8 304.0 150 3.15 3.435 17.30 0 0 #> Camaro Z28 1 13.3 8 350.0 245 3.73 3.840 15.41 0 0 #> Pontiac Firebird 1 19.2 8 400.0 175 3.08 3.845 17.05 0 0 #> Fiat X1-9 1 27.3 4 79.0 66 4.08 1.935 18.90 1 1 #> Porsche 914-2 1 26.0 4 120.3 91 4.43 2.140 16.70 0 1 #> Lotus Europa 1 30.4 4 95.1 113 3.77 1.513 16.90 1 1 #> Ford Pantera L 1 15.8 8 351.0 264 4.22 3.170 14.50 0 1 #> Ferrari Dino 1 19.7 6 145.0 175 3.62 2.770 15.50 0 1 #> Maserati Bora 1 15.0 8 301.0 335 3.54 3.570 14.60 0 1 #> Volvo 142E 1 21.4 4 121.0 109 4.11 2.780 18.60 1 1 #> gear carb #> Mazda RX4 4 4 #> Mazda RX4 Wag 4 4 #> Datsun 710 4 1 #> Hornet 4 Drive 3 1 #> Hornet Sportabout 3 2 #> Valiant 3 1 #> Duster 360 3 4 #> Merc 240D 4 2 #> Merc 230 4 2 #> Merc 280 4 4 #> Merc 280C 4 4 #> Merc 450SE 3 3 #> Merc 450SL 3 3 #> Merc 450SLC 3 3 #> Cadillac Fleetwood 3 4 #> Lincoln Continental 3 4 #> Chrysler Imperial 3 4 #> Fiat 128 4 1 #> Honda Civic 4 2 #> Toyota Corolla 4 1 #> Toyota Corona 3 1 #> Dodge Challenger 3 2 #> AMC Javelin 3 2 #> Camaro Z28 3 4 #> Pontiac Firebird 3 2 #> Fiat X1-9 4 1 #> Porsche 914-2 5 2 #> Lotus Europa 5 2 #> Ford Pantera L 5 4 #> Ferrari Dino 5 6 #> Maserati Bora 5 8 #> Volvo 142E 4 2 add_intercept_column(as.matrix(mtcars)) #> (Intercept) mpg cyl disp hp drat wt qsec vs am #> Mazda RX4 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 #> Mazda RX4 Wag 1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 #> Datsun 710 1 22.8 4 108.0 93 3.85 2.320 18.61 1 1 #> Hornet 4 Drive 1 21.4 6 258.0 110 3.08 3.215 19.44 1 0 #> Hornet Sportabout 1 18.7 8 360.0 175 3.15 3.440 17.02 0 0 #> Valiant 1 18.1 6 225.0 105 2.76 3.460 20.22 1 0 #> Duster 360 1 14.3 8 360.0 245 3.21 3.570 15.84 0 0 #> Merc 240D 1 24.4 4 146.7 62 3.69 3.190 20.00 1 0 #> Merc 230 1 22.8 4 140.8 95 3.92 3.150 22.90 1 0 #> Merc 280 1 19.2 6 167.6 123 3.92 3.440 18.30 1 0 #> Merc 280C 1 17.8 6 167.6 123 3.92 3.440 18.90 1 0 #> Merc 450SE 1 16.4 8 275.8 180 3.07 4.070 17.40 0 0 #> Merc 450SL 1 17.3 8 275.8 180 3.07 3.730 17.60 0 0 #> Merc 450SLC 1 15.2 8 275.8 180 3.07 3.780 18.00 0 0 #> Cadillac Fleetwood 1 10.4 8 472.0 205 2.93 5.250 17.98 0 0 #> Lincoln Continental 1 10.4 8 460.0 215 3.00 5.424 17.82 0 0 #> Chrysler Imperial 1 14.7 8 440.0 230 3.23 5.345 17.42 0 0 #> Fiat 128 1 32.4 4 78.7 66 4.08 2.200 19.47 1 1 #> Honda Civic 1 30.4 4 75.7 52 4.93 1.615 18.52 1 1 #> Toyota Corolla 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 #> Toyota Corona 1 21.5 4 120.1 97 3.70 2.465 20.01 1 0 #> Dodge Challenger 1 15.5 8 318.0 150 2.76 3.520 16.87 0 0 #> AMC Javelin 1 15.2 8 304.0 150 3.15 3.435 17.30 0 0 #> Camaro Z28 1 13.3 8 350.0 245 3.73 3.840 15.41 0 0 #> Pontiac Firebird 1 19.2 8 400.0 175 3.08 3.845 17.05 0 0 #> Fiat X1-9 1 27.3 4 79.0 66 4.08 1.935 18.90 1 1 #> Porsche 914-2 1 26.0 4 120.3 91 4.43 2.140 16.70 0 1 #> Lotus Europa 1 30.4 4 95.1 113 3.77 1.513 16.90 1 1 #> Ford Pantera L 1 15.8 8 351.0 264 4.22 3.170 14.50 0 1 #> Ferrari Dino 1 19.7 6 145.0 175 3.62 2.770 15.50 0 1 #> Maserati Bora 1 15.0 8 301.0 335 3.54 3.570 14.60 0 1 #> Volvo 142E 1 21.4 4 121.0 109 4.11 2.780 18.60 1 1 #> gear carb #> Mazda RX4 4 4 #> Mazda RX4 Wag 4 4 #> Datsun 710 4 1 #> Hornet 4 Drive 3 1 #> Hornet Sportabout 3 2 #> Valiant 3 1 #> Duster 360 3 4 #> Merc 240D 4 2 #> Merc 230 4 2 #> Merc 280 4 4 #> Merc 280C 4 4 #> Merc 450SE 3 3 #> Merc 450SL 3 3 #> Merc 450SLC 3 3 #> Cadillac Fleetwood 3 4 #> Lincoln Continental 3 4 #> Chrysler Imperial 3 4 #> Fiat 128 4 1 #> Honda Civic 4 2 #> Toyota Corolla 4 1 #> Toyota Corona 3 1 #> Dodge Challenger 3 2 #> AMC Javelin 3 2 #> Camaro Z28 3 4 #> Pontiac Firebird 3 2 #> Fiat X1-9 4 1 #> Porsche 914-2 5 2 #> Lotus Europa 5 2 #> Ford Pantera L 5 4 #> Ferrari Dino 5 6 #> Maserati Bora 5 8 #> Volvo 142E 4 2"},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":null,"dir":"Reference","previous_headings":"","what":"Check levels of quantiles — check_quantile_levels","title":"Check levels of quantiles — check_quantile_levels","text":"Check levels quantiles","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check levels of quantiles — check_quantile_levels","text":"","code":"check_quantile_levels(levels, call = rlang::caller_env())"},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check levels of quantiles — check_quantile_levels","text":"levels quantile levels. call Call shown error messages.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check levels of quantiles — check_quantile_levels","text":"Invisible TRUE","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Check levels of quantiles — check_quantile_levels","text":"Checks levels data type, range, uniqueness, order missingness.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/contr_one_hot.html","id":null,"dir":"Reference","previous_headings":"","what":"Contrast function for one-hot encodings — contr_one_hot","title":"Contrast function for one-hot encodings — contr_one_hot","text":"contrast function produces model matrix indicator columns level factor.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/contr_one_hot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Contrast function for one-hot encodings — contr_one_hot","text":"","code":"contr_one_hot(n, contrasts = TRUE, sparse = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/contr_one_hot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Contrast function for one-hot encodings — contr_one_hot","text":"n vector character factor levels number unique levels. contrasts argument backwards compatibility default TRUE supported. sparse argument backwards compatibility default FALSE supported.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/contr_one_hot.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Contrast function for one-hot encodings — contr_one_hot","text":"diagonal matrix n--n.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Default formula blueprint — default_formula_blueprint","title":"Default formula blueprint — default_formula_blueprint","text":"pages holds details formula preprocessing blueprint. blueprint used default mold() x formula.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Default formula blueprint — default_formula_blueprint","text":"","code":"default_formula_blueprint( intercept = FALSE, allow_novel_levels = FALSE, indicators = \"traditional\", composition = \"tibble\" ) # S3 method for class 'formula' mold(formula, data, ..., blueprint = NULL)"},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Default formula blueprint — default_formula_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). indicators single character string. Control factors expanded dummy variable indicator columns. One : \"traditional\" - default. Create dummy variables using traditional model.matrix() infrastructure. Generally creates K - 1 indicator columns factor, K number levels factor. \"none\" - Leave factor variables alone. expansion done. \"one_hot\" - Create dummy variables using one-hot encoding approach expands unordered factors K indicator columns, rather K - 1. composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. formula formula specifying predictors outcomes. data data frame matrix containing outcomes predictors. ... used. blueprint preprocessing blueprint. left NULL, default_formula_blueprint() used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Default formula blueprint — default_formula_blueprint","text":"default_formula_blueprint(), formula blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Default formula blueprint — default_formula_blueprint","text":"different base R, behavior expanding factors dummy variables indicators = \"traditional\" intercept present always intuitive documented. intercept present, factors expanded K-1 new columns, K number levels factor. intercept present, first factor expanded K columns (one-hot encoding), remaining factors expanded K-1 columns. behavior ensures meaningful predictions can made reference level first factor, exact \"intercept\" model requested. Without behavior, predictions reference level first factor always forced 0 intercept. Offsets can included formula method use inline function stats::offset(). returned tibble 1 column named \".offset\" $extras$offset slot return value.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"mold","dir":"Reference","previous_headings":"","what":"Mold","title":"Default formula blueprint — default_formula_blueprint","text":"mold() used default formula blueprint: Predictors RHS formula isolated, converted 1 sided formula: ~ RHS. Runs stats::model.frame() RHS formula uses data. indicators = \"traditional\", runs stats::model.matrix() result. indicators = \"none\", factors removed model.matrix() run, added back afterwards. interactions inline functions involving factors allowed. indicators = \"one_hot\", runs stats::model.matrix() result using contrast function creates indicator columns levels factors. offsets present using offset(), extracted model_offset(). intercept = TRUE, adds intercept column. Coerces result steps tibble. Outcomes LHS formula isolated, converted 1 sided formula: ~ LHS. Runs stats::model.frame() LHS formula uses data. Coerces result steps tibble.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"forge","dir":"Reference","previous_headings":"","what":"Forge","title":"Default formula blueprint — default_formula_blueprint","text":"forge() used default formula blueprint: calls shrink() trim new_data required columns coerce new_data tibble. calls scream() perform validation structure columns new_data. Predictors runs stats::model.frame() new_data using stored terms object corresponding predictors. , original mold() call, indicators = \"traditional\" set, runs stats::model.matrix() result. , original mold() call, indicators = \"none\" set, runs stats::model.matrix() result without factor columns, adds afterwards. , original mold() call, indicators = \"one_hot\" set, runs stats::model.matrix() result contrast function includes indicators levels factor columns. offsets present using offset() original call mold(), extracted model_offset(). intercept = TRUE original call mold(), intercept column added. coerces result steps tibble. Outcomes runs stats::model.frame() new_data using stored terms object corresponding outcomes. Coerces result tibble.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"differences-from-base-r","dir":"Reference","previous_headings":"","what":"Differences From Base R","title":"Default formula blueprint — default_formula_blueprint","text":"number differences base R regarding formulas processed mold() require explanation. Multivariate outcomes can specified LHS using syntax similar RHS (.e. outcome_1 + outcome_2 ~ predictors). complex calculations done LHS return matrices (like stats::poly()), matrices flattened multiple columns tibble call model.frame(). possible, recommended, large amount preprocessing required outcomes, better using recipes::recipe(). Global variables allowed formula. error thrown included. terms formula come data. need use inline functions formula, safest way prefix package name, like pkg::fn(). ensures function always available mold() (fit) forge() (prediction) time. said, package attached (.e. library()), able use inline function without prefix. default, intercepts included predictor output formula. include intercept, set blueprint = default_formula_blueprint(intercept = TRUE). rationale many packages either always require never allow intercept (example, earth package), large amount extra work keep user supplying one removing . interface standardizes flexibility one place.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Default formula blueprint — default_formula_blueprint","text":"","code":"# --------------------------------------------------------------------------- data(\"hardhat-example-data\") # --------------------------------------------------------------------------- # Formula Example # Call mold() with the training data processed <- mold( log(num_1) ~ num_2 + fac_1, example_train, blueprint = default_formula_blueprint(intercept = TRUE) ) # Then, call forge() with the blueprint and the test data # to have it preprocess the test data in the same way forge(example_test, processed$blueprint) #> $predictors #> # A tibble: 2 × 4 #> `(Intercept)` num_2 fac_1b fac_1c #> #> 1 1 0.967 0 0 #> 2 1 0.761 0 1 #> #> $outcomes #> NULL #> #> $extras #> $extras$offset #> NULL #> #> # Use `outcomes = TRUE` to also extract the preprocessed outcome forge(example_test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 2 × 4 #> `(Intercept)` num_2 fac_1b fac_1c #> #> 1 1 0.967 0 0 #> 2 1 0.761 0 1 #> #> $outcomes #> # A tibble: 2 × 1 #> `log(num_1)` #> #> 1 3.00 #> 2 3.04 #> #> $extras #> $extras$offset #> NULL #> #> # --------------------------------------------------------------------------- # Factors without an intercept # No intercept is added by default processed <- mold(num_1 ~ fac_1 + fac_2, example_train) # So, for factor columns, the first factor is completely expanded into all # `K` columns (the number of levels), and the subsequent factors are expanded # into `K - 1` columns. processed$predictors #> # A tibble: 12 × 4 #> fac_1a fac_1b fac_1c fac_2B #> #> 1 1 0 0 0 #> 2 1 0 0 1 #> 3 1 0 0 0 #> 4 1 0 0 1 #> 5 0 1 0 0 #> 6 0 1 0 1 #> 7 0 1 0 0 #> 8 0 1 0 1 #> 9 0 0 1 0 #> 10 0 0 1 1 #> 11 0 0 1 0 #> 12 0 0 1 1 # In the above example, `fac_1` is expanded into all three columns, # `fac_2` is not. This behavior comes from `model.matrix()`, and is somewhat # known in the R community, but can lead to a model that is difficult to # interpret since the corresponding p-values are testing wildly different # hypotheses. # To get all indicators for all columns (irrespective of the intercept), # use the `indicators = \"one_hot\"` option processed <- mold( num_1 ~ fac_1 + fac_2, example_train, blueprint = default_formula_blueprint(indicators = \"one_hot\") ) processed$predictors #> # A tibble: 12 × 5 #> fac_1a fac_1b fac_1c fac_2A fac_2B #> #> 1 1 0 0 1 0 #> 2 1 0 0 0 1 #> 3 1 0 0 1 0 #> 4 1 0 0 0 1 #> 5 0 1 0 1 0 #> 6 0 1 0 0 1 #> 7 0 1 0 1 0 #> 8 0 1 0 0 1 #> 9 0 0 1 1 0 #> 10 0 0 1 0 1 #> 11 0 0 1 1 0 #> 12 0 0 1 0 1 # It is not possible to construct a no-intercept model that expands all # factors into `K - 1` columns using the formula method. If required, a # recipe could be used to construct this model. # --------------------------------------------------------------------------- # Global variables y <- rep(1, times = nrow(example_train)) # In base R, global variables are allowed in a model formula frame <- model.frame(fac_1 ~ y + num_2, example_train) head(frame) #> fac_1 y num_2 #> 1 a 1 0.579 #> 2 a 1 0.338 #> 3 a 1 0.206 #> 4 a 1 0.546 #> 5 b 1 0.964 #> 6 b 1 0.631 # mold() does not allow them, and throws an error try(mold(fac_1 ~ y + num_2, example_train)) #> Error in get_all_predictors(formula, data) : #> The following predictors were not found in `data`: 'y'. # --------------------------------------------------------------------------- # Dummy variables and interactions # By default, factor columns are expanded # and interactions are created, both by # calling `model.matrix()`. Some models (like # tree based models) can take factors directly # but still might want to use the formula method. # In those cases, set `indicators = \"none\"` to not # run `model.matrix()` on factor columns. Interactions # are still allowed and are run on numeric columns. bp_no_indicators <- default_formula_blueprint(indicators = \"none\") processed <- mold( ~ fac_1 + num_1:num_2, example_train, blueprint = bp_no_indicators ) processed$predictors #> # A tibble: 12 × 2 #> `num_1:num_2` fac_1 #> #> 1 0.579 a #> 2 0.676 a #> 3 0.618 a #> 4 2.18 a #> 5 4.82 b #> 6 3.79 b #> 7 5.66 b #> 8 1.66 b #> 9 2.84 c #> 10 0.83 c #> 11 6.81 c #> 12 7.42 c # An informative error is thrown when `indicators = \"none\"` and # factors are present in interaction terms or in inline functions try(mold(num_1 ~ num_2:fac_1, example_train, blueprint = bp_no_indicators)) #> Error in mold_formula_default_process_predictors(blueprint = blueprint, : #> Interaction terms involving factors or characters have been #> detected on the RHS of `formula`. These are not allowed when `indicators #> = \"none\"`. #> ℹ Interactions terms involving factors were detected for \"fac_1\" in #> `num_2:fac_1`. try(mold(num_1 ~ paste0(fac_1), example_train, blueprint = bp_no_indicators)) #> Error in mold_formula_default_process_predictors(blueprint = blueprint, : #> Functions involving factors or characters have been detected on #> the RHS of `formula`. These are not allowed when `indicators = \"none\"`. #> ℹ Functions involving factors were detected for \"fac_1\" in #> `paste0(fac_1)`. # --------------------------------------------------------------------------- # Multivariate outcomes # Multivariate formulas can be specified easily processed <- mold(num_1 + log(num_2) ~ fac_1, example_train) processed$outcomes #> # A tibble: 12 × 2 #> num_1 `log(num_2)` #> #> 1 1 -0.546 #> 2 2 -1.08 #> 3 3 -1.58 #> 4 4 -0.605 #> 5 5 -0.0367 #> 6 6 -0.460 #> 7 7 -0.213 #> 8 8 -1.57 #> 9 9 -1.15 #> 10 10 -2.49 #> 11 11 -0.480 #> 12 12 -0.481 # Inline functions on the LHS are run, but any matrix # output is flattened (like what happens in `model.matrix()`) # (essentially this means you don't wind up with columns # in the tibble that are matrices) processed <- mold(poly(num_2, degree = 2) ~ fac_1, example_train) processed$outcomes #> # A tibble: 12 × 2 #> `poly(num_2, degree = 2).1` `poly(num_2, degree = 2).2` #> #> 1 0.0981 -0.254 #> 2 -0.177 -0.157 #> 3 -0.327 0.108 #> 4 0.0604 -0.270 #> 5 0.537 0.634 #> 6 0.157 -0.209 #> 7 0.359 0.120 #> 8 -0.325 0.103 #> 9 -0.202 -0.124 #> 10 -0.468 0.492 #> 11 0.144 -0.221 #> 12 0.143 -0.222 # TRUE ncol(processed$outcomes) == 2 #> [1] TRUE # Multivariate formulas specified in mold() # carry over into forge() forge(example_test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 2 × 3 #> fac_1a fac_1b fac_1c #> #> 1 1 0 0 #> 2 0 0 1 #> #> $outcomes #> # A tibble: 2 × 2 #> `poly(num_2, degree = 2).1` `poly(num_2, degree = 2).2` #> #> 1 0.541 0.646 #> 2 0.306 0.00619 #> #> $extras #> $extras$offset #> NULL #> #> # --------------------------------------------------------------------------- # Offsets # Offsets are handled specially in base R, so they deserve special # treatment here as well. You can add offsets using the inline function # `offset()` processed <- mold(num_1 ~ offset(num_2) + fac_1, example_train) processed$extras$offset #> # A tibble: 12 × 1 #> .offset #> #> 1 0.579 #> 2 0.338 #> 3 0.206 #> 4 0.546 #> 5 0.964 #> 6 0.631 #> 7 0.808 #> 8 0.208 #> 9 0.316 #> 10 0.083 #> 11 0.619 #> 12 0.618 # Multiple offsets can be included, and they get added together processed <- mold( num_1 ~ offset(num_2) + offset(num_3), example_train ) identical( processed$extras$offset$.offset, example_train$num_2 + example_train$num_3 ) #> [1] TRUE # Forging test data will also require # and include the offset forge(example_test, processed$blueprint) #> $predictors #> # A tibble: 2 × 0 #> #> $outcomes #> NULL #> #> $extras #> $extras$offset #> # A tibble: 2 × 1 #> .offset #> #> 1 1.06 #> 2 0.802 #> #> # --------------------------------------------------------------------------- # Intercept only # Because `1` and `0` are intercept modifying terms, they are # not allowed in the formula and are instead controlled by the # `intercept` argument of the blueprint. To use an intercept # only formula, you should supply `NULL` on the RHS of the formula. mold( ~NULL, example_train, blueprint = default_formula_blueprint(intercept = TRUE) ) #> $predictors #> # A tibble: 12 × 1 #> `(Intercept)` #> #> 1 1 #> 2 1 #> 3 1 #> 4 1 #> 5 1 #> 6 1 #> 7 1 #> 8 1 #> 9 1 #> 10 1 #> 11 1 #> 12 1 #> #> $outcomes #> # A tibble: 12 × 0 #> #> $blueprint #> Formula blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: TRUE #> Novel Levels: FALSE #> Composition: tibble #> Indicators: traditional #> #> $extras #> $extras$offset #> NULL #> #> # --------------------------------------------------------------------------- # Matrix output for predictors # You can change the `composition` of the predictor data set bp <- default_formula_blueprint(composition = \"dgCMatrix\") processed <- mold(log(num_1) ~ num_2 + fac_1, example_train, blueprint = bp) class(processed$predictors) #> [1] \"dgCMatrix\" #> attr(,\"package\") #> [1] \"Matrix\""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Default recipe blueprint — default_recipe_blueprint","title":"Default recipe blueprint — default_recipe_blueprint","text":"pages holds details recipe preprocessing blueprint. blueprint used default mold() x recipe.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Default recipe blueprint — default_recipe_blueprint","text":"","code":"default_recipe_blueprint( intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, strings_as_factors = TRUE, composition = \"tibble\" ) # S3 method for class 'recipe' mold(x, data, ..., blueprint = NULL)"},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Default recipe blueprint — default_recipe_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). fresh already trained operations re-trained prep() called? strings_as_factors character columns converted factors prep() called? composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. x unprepped recipe created recipes::recipe(). data data frame matrix containing outcomes predictors. ... used. blueprint preprocessing blueprint. left NULL, default_recipe_blueprint() used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Default recipe blueprint — default_recipe_blueprint","text":"default_recipe_blueprint(), recipe blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"mold","dir":"Reference","previous_headings":"","what":"Mold","title":"Default recipe blueprint — default_recipe_blueprint","text":"mold() used default recipe blueprint: calls recipes::prep() prep recipe. calls recipes::juice() extract outcomes predictors. returned tibbles. intercept = TRUE, adds intercept column predictors.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"forge","dir":"Reference","previous_headings":"","what":"Forge","title":"Default recipe blueprint — default_recipe_blueprint","text":"forge() used default recipe blueprint: calls shrink() trim new_data required columns coerce new_data tibble. calls scream() perform validation structure columns new_data. calls recipes::bake() new_data using prepped recipe used training. adds intercept column onto new_data intercept = TRUE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Default recipe blueprint — default_recipe_blueprint","text":"","code":"library(recipes) #> Loading required package: dplyr #> #> Attaching package: ‘dplyr’ #> The following objects are masked from ‘package:stats’: #> #> filter, lag #> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union #> #> Attaching package: ‘recipes’ #> The following object is masked from ‘package:stats’: #> #> step # --------------------------------------------------------------------------- # Setup train <- iris[1:100, ] test <- iris[101:150, ] # --------------------------------------------------------------------------- # Recipes example # Create a recipe that logs a predictor rec <- recipe(Species ~ Sepal.Length + Sepal.Width, train) %>% step_log(Sepal.Length) processed <- mold(rec, train) # Sepal.Length has been logged processed$predictors #> # A tibble: 100 × 2 #> Sepal.Length Sepal.Width #> #> 1 1.63 3.5 #> 2 1.59 3 #> 3 1.55 3.2 #> 4 1.53 3.1 #> 5 1.61 3.6 #> 6 1.69 3.9 #> 7 1.53 3.4 #> 8 1.61 3.4 #> 9 1.48 2.9 #> 10 1.59 3.1 #> # ℹ 90 more rows processed$outcomes #> # A tibble: 100 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 90 more rows # The underlying blueprint is a prepped recipe processed$blueprint$recipe #> #> ── Recipe ──────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 2 #> #> ── Training information #> Training data contained 100 data points and no incomplete rows. #> #> ── Operations #> • Log transformation on: Sepal.Length | Trained # Call forge() with the blueprint and the test data # to have it preprocess the test data in the same way forge(test, processed$blueprint) #> $predictors #> # A tibble: 50 × 2 #> Sepal.Length Sepal.Width #> #> 1 1.84 3.3 #> 2 1.76 2.7 #> 3 1.96 3 #> 4 1.84 2.9 #> 5 1.87 3 #> 6 2.03 3 #> 7 1.59 2.5 #> 8 1.99 2.9 #> 9 1.90 2.5 #> 10 1.97 3.6 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> $extras$roles #> NULL #> #> # Use `outcomes = TRUE` to also extract the preprocessed outcome! # This logged the Sepal.Length column of `new_data` forge(test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 2 #> Sepal.Length Sepal.Width #> #> 1 1.84 3.3 #> 2 1.76 2.7 #> 3 1.96 3 #> 4 1.84 2.9 #> 5 1.87 3 #> 6 2.03 3 #> 7 1.59 2.5 #> 8 1.99 2.9 #> 9 1.90 2.5 #> 10 1.97 3.6 #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $extras #> $extras$roles #> NULL #> #> # --------------------------------------------------------------------------- # With an intercept # You can add an intercept with `intercept = TRUE` processed <- mold(rec, train, blueprint = default_recipe_blueprint(intercept = TRUE)) processed$predictors #> # A tibble: 100 × 3 #> `(Intercept)` Sepal.Length Sepal.Width #> #> 1 1 1.63 3.5 #> 2 1 1.59 3 #> 3 1 1.55 3.2 #> 4 1 1.53 3.1 #> 5 1 1.61 3.6 #> 6 1 1.69 3.9 #> 7 1 1.53 3.4 #> 8 1 1.61 3.4 #> 9 1 1.48 2.9 #> 10 1 1.59 3.1 #> # ℹ 90 more rows # But you also could have used a recipe step rec2 <- step_intercept(rec) mold(rec2, iris)$predictors #> # A tibble: 150 × 3 #> intercept Sepal.Length Sepal.Width #> #> 1 1 1.63 3.5 #> 2 1 1.59 3 #> 3 1 1.55 3.2 #> 4 1 1.53 3.1 #> 5 1 1.61 3.6 #> 6 1 1.69 3.9 #> 7 1 1.53 3.4 #> 8 1 1.61 3.4 #> 9 1 1.48 2.9 #> 10 1 1.59 3.1 #> # ℹ 140 more rows # --------------------------------------------------------------------------- # Matrix output for predictors # You can change the `composition` of the predictor data set bp <- default_recipe_blueprint(composition = \"dgCMatrix\") processed <- mold(rec, train, blueprint = bp) class(processed$predictors) #> [1] \"dgCMatrix\" #> attr(,\"package\") #> [1] \"Matrix\" # --------------------------------------------------------------------------- # Non standard roles # If you have custom recipes roles, they are assumed to be required at # `bake()` time when passing in `new_data`. This is an assumption that both # recipes and hardhat makes, meaning that those roles are required at # `forge()` time as well. rec_roles <- recipe(train) %>% update_role(Sepal.Width, new_role = \"predictor\") %>% update_role(Species, new_role = \"outcome\") %>% update_role(Sepal.Length, new_role = \"id\") %>% update_role(Petal.Length, new_role = \"important\") processed_roles <- mold(rec_roles, train) # The custom roles will be in the `mold()` result in case you need # them for modeling. processed_roles$extras #> $roles #> $roles$id #> # A tibble: 100 × 1 #> Sepal.Length #> #> 1 5.1 #> 2 4.9 #> 3 4.7 #> 4 4.6 #> 5 5 #> 6 5.4 #> 7 4.6 #> 8 5 #> 9 4.4 #> 10 4.9 #> # ℹ 90 more rows #> #> $roles$important #> # A tibble: 100 × 1 #> Petal.Length #> #> 1 1.4 #> 2 1.4 #> 3 1.3 #> 4 1.5 #> 5 1.4 #> 6 1.7 #> 7 1.4 #> 8 1.5 #> 9 1.4 #> 10 1.5 #> # ℹ 90 more rows #> #> $roles$`NA` #> # A tibble: 100 × 1 #> Petal.Width #> #> 1 0.2 #> 2 0.2 #> 3 0.2 #> 4 0.2 #> 5 0.2 #> 6 0.4 #> 7 0.3 #> 8 0.2 #> 9 0.2 #> 10 0.1 #> # ℹ 90 more rows #> #> # And they are in the `forge()` result forge(test, processed_roles$blueprint)$extras #> $roles #> $roles$id #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $roles$important #> # A tibble: 50 × 1 #> Petal.Length #> #> 1 6 #> 2 5.1 #> 3 5.9 #> 4 5.6 #> 5 5.8 #> 6 6.6 #> 7 4.5 #> 8 6.3 #> 9 5.8 #> 10 6.1 #> # ℹ 40 more rows #> #> $roles$`NA` #> # A tibble: 50 × 1 #> Petal.Width #> #> 1 2.5 #> 2 1.9 #> 3 2.1 #> 4 1.8 #> 5 2.2 #> 6 2.1 #> 7 1.7 #> 8 1.8 #> 9 1.8 #> 10 2.5 #> # ℹ 40 more rows #> #> # If you remove a column with a custom role from the test data, then you # won't be able to `forge()` even though this recipe technically didn't # use that column in any steps test2 <- test test2$Petal.Length <- NULL try(forge(test2, processed_roles$blueprint)) #> Error in validate_column_names(data, cols) : #> The following required columns are missing: 'Petal.Length'. # Most of the time, if you find yourself in the above scenario, then we # suggest that you remove `Petal.Length` from the data that is supplied to # the recipe. If that isn't an option, you can declare that that column # isn't required at `bake()` time by using `update_role_requirements()` rec_roles <- update_role_requirements(rec_roles, \"important\", bake = FALSE) processed_roles <- mold(rec_roles, train) forge(test2, processed_roles$blueprint) #> $predictors #> # A tibble: 50 × 1 #> Sepal.Width #> #> 1 3.3 #> 2 2.7 #> 3 3 #> 4 2.9 #> 5 3 #> 6 3 #> 7 2.5 #> 8 2.9 #> 9 2.5 #> 10 3.6 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> $extras$roles #> $extras$roles$id #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $extras$roles$important #> # A tibble: 50 × 0 #> #> $extras$roles$`NA` #> # A tibble: 50 × 1 #> Petal.Width #> #> 1 2.5 #> 2 1.9 #> 3 2.1 #> 4 1.8 #> 5 2.2 #> 6 2.1 #> 7 1.7 #> 8 1.8 #> 9 1.8 #> 10 2.5 #> # ℹ 40 more rows #> #> #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Default XY blueprint — default_xy_blueprint","title":"Default XY blueprint — default_xy_blueprint","text":"pages holds details XY preprocessing blueprint. blueprint used default mold() x y provided separately (.e. XY interface used).","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Default XY blueprint — default_xy_blueprint","text":"","code":"default_xy_blueprint( intercept = FALSE, allow_novel_levels = FALSE, composition = \"tibble\" ) # S3 method for class 'data.frame' mold(x, y, ..., blueprint = NULL) # S3 method for class 'matrix' mold(x, y, ..., blueprint = NULL)"},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Default XY blueprint — default_xy_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. x data frame matrix containing predictors. y data frame, matrix, vector containing outcomes. ... used. blueprint preprocessing blueprint. left NULL, default_xy_blueprint() used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Default XY blueprint — default_xy_blueprint","text":"default_xy_blueprint(), XY blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Default XY blueprint — default_xy_blueprint","text":"documented standardize(), y vector, returned outcomes tibble 1 column standardized name \".outcome\". one special thing XY method's forge function behavior outcomes = TRUE vector y value provided original call mold(). case, mold() converts y tibble, default name .outcome. column forge() look new_data preprocess. See examples section demonstration .","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"mold","dir":"Reference","previous_headings":"","what":"Mold","title":"Default XY blueprint — default_xy_blueprint","text":"mold() used default xy blueprint: converts x tibble. adds intercept column x intercept = TRUE. runs standardize() y.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"forge","dir":"Reference","previous_headings":"","what":"Forge","title":"Default XY blueprint — default_xy_blueprint","text":"forge() used default xy blueprint: calls shrink() trim new_data required columns coerce new_data tibble. calls scream() perform validation structure columns new_data. adds intercept column onto new_data intercept = TRUE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Default XY blueprint — default_xy_blueprint","text":"","code":"# --------------------------------------------------------------------------- # Setup train <- iris[1:100, ] test <- iris[101:150, ] train_x <- train[\"Sepal.Length\"] train_y <- train[\"Species\"] test_x <- test[\"Sepal.Length\"] test_y <- test[\"Species\"] # --------------------------------------------------------------------------- # XY Example # First, call mold() with the training data processed <- mold(train_x, train_y) # Then, call forge() with the blueprint and the test data # to have it preprocess the test data in the same way forge(test_x, processed$blueprint) #> $predictors #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> NULL #> # --------------------------------------------------------------------------- # Intercept processed <- mold(train_x, train_y, blueprint = default_xy_blueprint(intercept = TRUE)) forge(test_x, processed$blueprint) #> $predictors #> # A tibble: 50 × 2 #> `(Intercept)` Sepal.Length #> #> 1 1 6.3 #> 2 1 5.8 #> 3 1 7.1 #> 4 1 6.3 #> 5 1 6.5 #> 6 1 7.6 #> 7 1 4.9 #> 8 1 7.3 #> 9 1 6.7 #> 10 1 7.2 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> NULL #> # --------------------------------------------------------------------------- # XY Method and forge(outcomes = TRUE) # You can request that the new outcome columns are preprocessed as well, but # they have to be present in `new_data`! processed <- mold(train_x, train_y) # Can't do this! try(forge(test_x, processed$blueprint, outcomes = TRUE)) #> Error in validate_column_names(data, cols) : #> The following required columns are missing: 'Species'. # Need to use the full test set, including `y` forge(test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $extras #> NULL #> # With the XY method, if the Y value used in `mold()` is a vector, # then a column name of `.outcome` is automatically generated. # This name is what forge() looks for in `new_data`. # Y is a vector! y_vec <- train_y$Species processed_vec <- mold(train_x, y_vec) # This throws an informative error that tell you # to include an `\".outcome\"` column in `new_data`. try(forge(iris, processed_vec$blueprint, outcomes = TRUE)) #> Error in validate_missing_name_isnt_.outcome(check$missing_names) : #> The following required columns are missing: '.outcome'. #> #> (This indicates that `mold()` was called with a vector for `y`. When this is the case, and the outcome columns are requested in `forge()`, `new_data` must include a column with the automatically generated name, '.outcome', containing the outcome.) test2 <- test test2$.outcome <- test2$Species test2$Species <- NULL # This works, and returns a tibble in the $outcomes slot forge(test2, processed_vec$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> .outcome #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $extras #> NULL #> # --------------------------------------------------------------------------- # Matrix output for predictors # You can change the `composition` of the predictor data set bp <- default_xy_blueprint(composition = \"dgCMatrix\") processed <- mold(train_x, train_y, blueprint = bp) class(processed$predictors) #> [1] \"dgCMatrix\" #> attr(,\"package\") #> [1] \"Matrix\""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":null,"dir":"Reference","previous_headings":"","what":"Delete the response from a terms object — delete_response","title":"Delete the response from a terms object — delete_response","text":"delete_response() exactly delete.response(), except fixes long standing bug also removing part \"dataClasses\" attribute corresponding response, exists.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Delete the response from a terms object — delete_response","text":"","code":"delete_response(terms)"},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Delete the response from a terms object — delete_response","text":"terms terms object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Delete the response from a terms object — delete_response","text":"terms response sections removed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Delete the response from a terms object — delete_response","text":"bug described : https://stat.ethz.ch/pipermail/r-devel/2012-January/062942.html","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Delete the response from a terms object — delete_response","text":"","code":"framed <- model_frame(Species ~ Sepal.Width, iris) attr(delete.response(framed$terms), \"dataClasses\") #> Species Sepal.Width #> \"factor\" \"numeric\" attr(delete_response(framed$terms), \"dataClasses\") #> Sepal.Width #> \"numeric\""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract a prototype — extract_ptype","title":"Extract a prototype — extract_ptype","text":"extract_ptype() extracts tibble 0 rows data. contains required information column names, classes, factor levels required check structure new data prediction time.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract a prototype — extract_ptype","text":"","code":"extract_ptype(data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract a prototype — extract_ptype","text":"data data frame matrix.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract a prototype — extract_ptype","text":"0 row slice data converting tibble.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract a prototype — extract_ptype","text":"extract_ptype() useful creating new preprocessing blueprint. extracts required information used validation checks prediction time.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract a prototype — extract_ptype","text":"","code":"hardhat:::extract_ptype(iris) #> # A tibble: 0 × 5 #> # ℹ 5 variables: Sepal.Length , Sepal.Width , #> # Petal.Length , Petal.Width , Species "},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":null,"dir":"Reference","previous_headings":"","what":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"fct_encode_one_hot() encodes factor one-hot indicator matrix. matrix consists length(x) rows length(levels(x)) columns. Every value row matrix filled 0L except column name x[[]], instead filled 1L.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"","code":"fct_encode_one_hot(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"x factor. x contain missing values. x allowed ordered factor.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"integer matrix length(x) rows length(levels(x)) columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"columns returned order levels(x). x names, names propagated onto result row names.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"","code":"fct_encode_one_hot(factor(letters)) #> a b c d e f g h i j k l m n o p q r s t u v w x y z #> [1,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [2,] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [3,] 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [4,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [5,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [6,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [7,] 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [8,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [9,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [10,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [11,] 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [12,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [13,] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [14,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 #> [15,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 #> [16,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 #> [17,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 #> [18,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 #> [19,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 #> [20,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 #> [21,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 #> [22,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 #> [23,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 #> [24,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 #> [25,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 #> [26,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 fct_encode_one_hot(factor(letters[1:2], levels = letters)) #> a b c d e f g h i j k l m n o p q r s t u v w x y z #> [1,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [2,] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 set.seed(1234) fct_encode_one_hot(factor(sample(letters[1:4], 10, TRUE))) #> a b c d #> [1,] 0 0 0 1 #> [2,] 0 0 0 1 #> [3,] 0 1 0 0 #> [4,] 0 1 0 0 #> [5,] 1 0 0 0 #> [6,] 0 0 0 1 #> [7,] 0 0 1 0 #> [8,] 1 0 0 0 #> [9,] 1 0 0 0 #> [10,] 0 1 0 0"},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":null,"dir":"Reference","previous_headings":"","what":"Forge prediction-ready data — forge","title":"Forge prediction-ready data — forge","text":"forge() applies transformations requested specific blueprint set new_data. new_data contains new predictors (potentially outcomes) used generate predictions. blueprints consistent return values others, unique enough help page. Click learn use one conjunction forge(). XY Method - default_xy_blueprint() Formula Method - default_formula_blueprint() Recipes Method - default_recipe_blueprint()","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Forge prediction-ready data — forge","text":"","code":"forge(new_data, blueprint, ..., outcomes = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Forge prediction-ready data — forge","text":"new_data data frame matrix predictors process. outcomes = TRUE, also contain outcomes process. blueprint preprocessing blueprint. ... used. outcomes logical. outcomes processed returned well?","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Forge prediction-ready data — forge","text":"named list 3 elements: predictors: tibble containing preprocessed new_data predictors. outcomes: outcomes = TRUE, tibble containing preprocessed outcomes found new_data. Otherwise, NULL. extras: Either NULL blueprint returns extra information, named list containing extra information.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Forge prediction-ready data — forge","text":"outcomes present new_data, can optionally processed returned outcomes slot returned list setting outcomes = TRUE. useful cross validation need preprocess outcomes test set computing performance.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Forge prediction-ready data — forge","text":"","code":"# See the blueprint specific documentation linked above # for various ways to call forge with different # blueprints. train <- iris[1:100, ] test <- iris[101:150, ] # Formula processed <- mold( log(Sepal.Width) ~ Species, train, blueprint = default_formula_blueprint(indicators = \"none\") ) forge(test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> `log(Sepal.Width)` #> #> 1 1.19 #> 2 0.993 #> 3 1.10 #> 4 1.06 #> 5 1.10 #> 6 1.10 #> 7 0.916 #> 8 1.06 #> 9 0.916 #> 10 1.28 #> # ℹ 40 more rows #> #> $extras #> $extras$offset #> NULL #> #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Frequency weights — frequency_weights","title":"Frequency weights — frequency_weights","text":"frequency_weights() creates vector frequency weights allow compactly repeat observation set number times. Frequency weights supplied non-negative integer vector, whole numbers allowed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Frequency weights — frequency_weights","text":"","code":"frequency_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Frequency weights — frequency_weights","text":"x integer vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Frequency weights — frequency_weights","text":"new frequency weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Frequency weights — frequency_weights","text":"Frequency weights integers denote many times particular row data observed. help compress redundant rows single entry. tidymodels, frequency weights used parts preprocessing, model fitting, performance estimation operations.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Frequency weights — frequency_weights","text":"","code":"# Record that the first observation has 10 replicates, the second has 12 # replicates, and so on frequency_weights(c(10, 12, 2, 1)) #> #> [1] 10 12 2 1 # Fractional values are not allowed try(frequency_weights(c(1.5, 2.3, 10))) #> Error in frequency_weights(c(1.5, 2.3, 10)) : #> Can't convert from `x` to due to loss of precision. #> • Locations: 1, 2"},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract data classes from a data frame or matrix — get_data_classes","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"predicting model, often important new_data classes original data used fit model. get_data_classes() extracts classes original training data.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"","code":"get_data_classes(data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"data data frame matrix.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"named list. names column names data values character vectors containing class column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"","code":"get_data_classes(iris) #> $Sepal.Length #> [1] \"numeric\" #> #> $Sepal.Width #> [1] \"numeric\" #> #> $Petal.Length #> [1] \"numeric\" #> #> $Petal.Width #> [1] \"numeric\" #> #> $Species #> [1] \"factor\" #> get_data_classes(as.matrix(mtcars)) #> $mpg #> [1] \"numeric\" #> #> $cyl #> [1] \"numeric\" #> #> $disp #> [1] \"numeric\" #> #> $hp #> [1] \"numeric\" #> #> $drat #> [1] \"numeric\" #> #> $wt #> [1] \"numeric\" #> #> $qsec #> [1] \"numeric\" #> #> $vs #> [1] \"numeric\" #> #> $am #> [1] \"numeric\" #> #> $gear #> [1] \"numeric\" #> #> $carb #> [1] \"numeric\" #> # Unlike .MFclass(), the full class # vector is returned data <- data.frame(col = ordered(c(\"a\", \"b\"))) .MFclass(data$col) #> [1] \"ordered\" get_data_classes(data) #> $col #> [1] \"ordered\" \"factor\" #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract factor levels from a data frame — get_levels","title":"Extract factor levels from a data frame — get_levels","text":"get_levels() extracts levels factor columns data. mainly useful extracting original factor levels predictors training set. get_outcome_levels() small wrapper around get_levels() extracting levels factor outcome first calls standardize() y.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract factor levels from a data frame — get_levels","text":"","code":"get_levels(data) get_outcome_levels(y)"},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract factor levels from a data frame — get_levels","text":"data data.frame extract levels . y outcome. can : factor vector numeric vector 1D numeric array numeric matrix column names 2D numeric array column names data frame numeric factor columns","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract factor levels from a data frame — get_levels","text":"named list many elements factor columns data y. names names factor columns, values character vectors levels. factor columns, NULL returned.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract factor levels from a data frame — get_levels","text":"","code":"# Factor columns are returned with their levels get_levels(iris) #> $Species #> [1] \"setosa\" \"versicolor\" \"virginica\" #> # No factor columns get_levels(mtcars) #> NULL # standardize() is first run on `y` # which converts the input to a data frame # with an automatically named column, `\".outcome\"` get_outcome_levels(y = factor(letters[1:5])) #> $.outcome #> [1] \"a\" \"b\" \"c\" \"d\" \"e\" #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-example-data.html","id":null,"dir":"Reference","previous_headings":"","what":"Example data for hardhat — hardhat-example-data","title":"Example data for hardhat — hardhat-example-data","text":"Example data hardhat","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-example-data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Example data for hardhat — hardhat-example-data","text":"example_train,example_test tibbles","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-example-data.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Example data for hardhat — hardhat-example-data","text":"Data objects training test set variables: three numeric two factor columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-example-data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Example data for hardhat — hardhat-example-data","text":"","code":"data(\"hardhat-example-data\")"},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Generics for object extraction — hardhat-extract","title":"Generics for object extraction — hardhat-extract","text":"generics used extract elements various model objects. Methods defined packages, tune, workflows, workflowsets, returned object always . extract_fit_engine() returns engine specific fit embedded within parsnip model fit. example, using parsnip::linear_reg() \"lm\" engine, returns underlying lm object. extract_fit_parsnip() returns parsnip model fit. extract_mold() returns preprocessed \"mold\" object returned mold(). contains information preprocessing, including either prepped recipe, formula terms object, variable selectors. extract_spec_parsnip() returns parsnip model specification. extract_preprocessor() returns formula, recipe, variable expressions used preprocessing. extract_recipe() returns recipe, possibly estimated. extract_workflow() returns workflow, possibly fit. extract_parameter_dials() returns single dials parameter object. extract_parameter_set_dials() returns set dials parameter objects. extract_fit_time() returns tibble fit times.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generics for object extraction — hardhat-extract","text":"","code":"extract_workflow(x, ...) extract_recipe(x, ...) extract_spec_parsnip(x, ...) extract_fit_parsnip(x, ...) extract_fit_engine(x, ...) extract_mold(x, ...) extract_preprocessor(x, ...) extract_postprocessor(x, ...) extract_parameter_dials(x, ...) extract_parameter_set_dials(x, ...) extract_fit_time(x, ...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generics for object extraction — hardhat-extract","text":"x object. ... Extra arguments passed methods.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Generics for object extraction — hardhat-extract","text":"","code":"# See packages where methods are defined for examples, such as `parsnip` or # `workflows`."},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-package.html","id":null,"dir":"Reference","previous_headings":"","what":"hardhat: Construct Modeling Packages — hardhat-package","title":"hardhat: Construct Modeling Packages — hardhat-package","text":"Building modeling packages hard. large amount effort generally goes providing implementation new method efficient, fast, correct, often less emphasis put user interface. good interface requires specialized knowledge S3 methods formulas, average package developer might . goal 'hardhat' reduce burden around building new modeling packages providing functionality preprocessing, predicting, validating input.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"hardhat: Construct Modeling Packages — hardhat-package","text":"Maintainer: Hannah Frick hannah@posit.co (ORCID) Authors: Davis Vaughan davis@posit.co Max Kuhn max@posit.co contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Importance weights — importance_weights","title":"Importance weights — importance_weights","text":"importance_weights() creates vector importance weights allow apply context dependent weight observations. Importance weights supplied non-negative double vector, fractional values allowed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Importance weights — importance_weights","text":"","code":"importance_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Importance weights — importance_weights","text":"x double vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Importance weights — importance_weights","text":"new importance weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Importance weights — importance_weights","text":"Importance weights focus much row data set influence model estimation. can based data arbitrarily set achieve goal. tidymodels, importance weights affect model estimation supervised recipes steps. used yardstick functions calculating measures model performance.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Importance weights — importance_weights","text":"","code":"importance_weights(c(1.5, 2.3, 10)) #> #> [1] 1.5 2.3 10.0"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x a preprocessing blueprint? — is_blueprint","title":"Is x a preprocessing blueprint? — is_blueprint","text":"is_blueprint() checks x inherits \"hardhat_blueprint\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x a preprocessing blueprint? — is_blueprint","text":"","code":"is_blueprint(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x a preprocessing blueprint? — is_blueprint","text":"x object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x a preprocessing blueprint? — is_blueprint","text":"","code":"is_blueprint(default_xy_blueprint()) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x a case weights vector? — is_case_weights","title":"Is x a case weights vector? — is_case_weights","text":"is_case_weights() checks x inherits \"hardhat_case_weights\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x a case weights vector? — is_case_weights","text":"","code":"is_case_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x a case weights vector? — is_case_weights","text":"x object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is x a case weights vector? — is_case_weights","text":"single TRUE FALSE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x a case weights vector? — is_case_weights","text":"","code":"is_case_weights(1) #> [1] FALSE is_case_weights(frequency_weights(1)) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x a frequency weights vector? — is_frequency_weights","title":"Is x a frequency weights vector? — is_frequency_weights","text":"is_frequency_weights() checks x inherits \"hardhat_frequency_weights\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x a frequency weights vector? — is_frequency_weights","text":"","code":"is_frequency_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x a frequency weights vector? — is_frequency_weights","text":"x object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is x a frequency weights vector? — is_frequency_weights","text":"single TRUE FALSE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x a frequency weights vector? — is_frequency_weights","text":"","code":"is_frequency_weights(1) #> [1] FALSE is_frequency_weights(frequency_weights(1)) #> [1] TRUE is_frequency_weights(importance_weights(1)) #> [1] FALSE"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x an importance weights vector? — is_importance_weights","title":"Is x an importance weights vector? — is_importance_weights","text":"is_importance_weights() checks x inherits \"hardhat_importance_weights\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x an importance weights vector? — is_importance_weights","text":"","code":"is_importance_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x an importance weights vector? — is_importance_weights","text":"x object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is x an importance weights vector? — is_importance_weights","text":"single TRUE FALSE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x an importance weights vector? — is_importance_weights","text":"","code":"is_importance_weights(1) #> [1] FALSE is_importance_weights(frequency_weights(1)) #> [1] FALSE is_importance_weights(importance_weights(1)) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct a model frame — model_frame","title":"Construct a model frame — model_frame","text":"model_frame() stricter version stats::model.frame(). number differences, main rows never dropped return value list frame terms separated two distinct objects.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct a model frame — model_frame","text":"","code":"model_frame(formula, data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct a model frame — model_frame","text":"formula formula terms object representing terms model frame. data data frame matrix containing terms formula.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct a model frame — model_frame","text":"named list two elements: \"data\": tibble containing model frame. \"terms\": terms object containing terms model frame.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Construct a model frame — model_frame","text":"following explains rationale difference arguments compared stats::model.frame(): subset: allowed number rows model_frame() run always . na.action: allowed forced \"na.pass\" number rows model_frame() run always . drop.unused.levels: allowed seems inconsistent data result model_frame() ever factor column different levels, unless specified though original_levels. required, done recipe step explicitly. xlev: allowed check done ahead time. Use scream() check integrity data training set required. ...: exposed offsets handled separately, necessary pass weights rows never dropped (weights subset alongside rest design matrix). non-predictor columns required, use \"roles\" features recipes. important always use results model_frame() model_matrix() rather stats::model.matrix() tibble result model_frame() terms object attached. model.matrix(, ) called directly, call model.frame() made automatically, can give faulty results.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Construct a model frame — model_frame","text":"","code":"# --------------------------------------------------------------------------- # Example usage framed <- model_frame(Species ~ Sepal.Width, iris) framed$data #> # A tibble: 150 × 2 #> Species Sepal.Width #> #> 1 setosa 3.5 #> 2 setosa 3 #> 3 setosa 3.2 #> 4 setosa 3.1 #> 5 setosa 3.6 #> 6 setosa 3.9 #> 7 setosa 3.4 #> 8 setosa 3.4 #> 9 setosa 2.9 #> 10 setosa 3.1 #> # ℹ 140 more rows framed$terms #> Species ~ Sepal.Width #> attr(,\"variables\") #> list(Species, Sepal.Width) #> attr(,\"factors\") #> Sepal.Width #> Species 0 #> Sepal.Width 1 #> attr(,\"term.labels\") #> [1] \"Sepal.Width\" #> attr(,\"order\") #> [1] 1 #> attr(,\"intercept\") #> [1] 1 #> attr(,\"response\") #> [1] 1 #> attr(,\".Environment\") #> #> attr(,\"predvars\") #> list(Species, Sepal.Width) #> attr(,\"dataClasses\") #> Species Sepal.Width #> \"factor\" \"numeric\" # --------------------------------------------------------------------------- # Missing values never result in dropped rows iris2 <- iris iris2$Sepal.Width[1] <- NA framed2 <- model_frame(Species ~ Sepal.Width, iris2) head(framed2$data) #> # A tibble: 6 × 2 #> Species Sepal.Width #> #> 1 setosa NA #> 2 setosa 3 #> 3 setosa 3.2 #> 4 setosa 3.1 #> 5 setosa 3.6 #> 6 setosa 3.9 nrow(framed2$data) == nrow(iris2) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct a design matrix — model_matrix","title":"Construct a design matrix — model_matrix","text":"model_matrix() stricter version stats::model.matrix(). Notably, model_matrix() never drop rows, result tibble.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct a design matrix — model_matrix","text":"","code":"model_matrix(terms, data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct a design matrix — model_matrix","text":"terms terms object construct model matrix . typically terms object returned corresponding call model_frame(). data tibble construct design matrix . typically tibble returned corresponding call model_frame().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct a design matrix — model_matrix","text":"tibble containing design matrix.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Construct a design matrix — model_matrix","text":"following explains rationale difference arguments compared stats::model.matrix(): contrasts.arg: Set contrasts argument, options(\"contrasts\") globally, assign contrast factor interest directly using stats::contrasts(). See examples section. xlev: allowed model.frame() never called, unnecessary. ...: allowed default method model.matrix() use , lm method uses pass potential offsets weights , handled differently hardhat.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Construct a design matrix — model_matrix","text":"","code":"# --------------------------------------------------------------------------- # Example usage framed <- model_frame(Sepal.Width ~ Species, iris) model_matrix(framed$terms, framed$data) #> # A tibble: 150 × 3 #> `(Intercept)` Speciesversicolor Speciesvirginica #> #> 1 1 0 0 #> 2 1 0 0 #> 3 1 0 0 #> 4 1 0 0 #> 5 1 0 0 #> 6 1 0 0 #> 7 1 0 0 #> 8 1 0 0 #> 9 1 0 0 #> 10 1 0 0 #> # ℹ 140 more rows # --------------------------------------------------------------------------- # Missing values never result in dropped rows iris2 <- iris iris2$Species[1] <- NA framed2 <- model_frame(Sepal.Width ~ Species, iris2) model_matrix(framed2$terms, framed2$data) #> # A tibble: 150 × 3 #> `(Intercept)` Speciesversicolor Speciesvirginica #> #> 1 1 NA NA #> 2 1 0 0 #> 3 1 0 0 #> 4 1 0 0 #> 5 1 0 0 #> 6 1 0 0 #> 7 1 0 0 #> 8 1 0 0 #> 9 1 0 0 #> 10 1 0 0 #> # ℹ 140 more rows # --------------------------------------------------------------------------- # Contrasts # Default contrasts y <- factor(c(\"a\", \"b\")) x <- data.frame(y = y) framed <- model_frame(~y, x) # Setting contrasts directly y_with_contrast <- y contrasts(y_with_contrast) <- contr.sum(2) x2 <- data.frame(y = y_with_contrast) framed2 <- model_frame(~y, x2) # Compare! model_matrix(framed$terms, framed$data) #> # A tibble: 2 × 2 #> `(Intercept)` yb #> #> 1 1 0 #> 2 1 1 model_matrix(framed2$terms, framed2$data) #> # A tibble: 2 × 2 #> `(Intercept)` y1 #> #> 1 1 1 #> 2 1 -1 # Also, can set the contrasts globally global_override <- c(unordered = \"contr.sum\", ordered = \"contr.poly\") rlang::with_options( .expr = { model_matrix(framed$terms, framed$data) }, contrasts = global_override ) #> # A tibble: 2 × 2 #> `(Intercept)` y1 #> #> 1 1 1 #> 2 1 -1"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract a model offset — model_offset","title":"Extract a model offset — model_offset","text":"model_offset() extracts numeric offset model frame. inspired stats::model.offset(), nicer error messages slightly stricter.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract a model offset — model_offset","text":"","code":"model_offset(terms, data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract a model offset — model_offset","text":"terms \"terms\" object corresponding data, returned call model_frame(). data data frame returned call model_frame().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract a model offset — model_offset","text":"numeric vector representing offset.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract a model offset — model_offset","text":"column tagged offset numeric, nice error message thrown telling exactly column problematic. stats::model.offset() also allows column named \"(offset)\" considered offset along others tagged stats::offset(). However, stats::model.matrix() recognize columns offsets (remove ). inconsistency, columns named \"(offset)\" treated specially model_offset().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract a model offset — model_offset","text":"","code":"x <- model.frame(Species ~ offset(Sepal.Width), iris) model_offset(terms(x), x) #> [1] 3.5 3.0 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 3.7 3.4 3.0 3.0 4.0 4.4 3.9 #> [18] 3.5 3.8 3.8 3.4 3.7 3.6 3.3 3.4 3.0 3.4 3.5 3.4 3.2 3.1 3.4 4.1 4.2 #> [35] 3.1 3.2 3.5 3.6 3.0 3.4 3.5 2.3 3.2 3.5 3.8 3.0 3.8 3.2 3.7 3.3 3.2 #> [52] 3.2 3.1 2.3 2.8 2.8 3.3 2.4 2.9 2.7 2.0 3.0 2.2 2.9 2.9 3.1 3.0 2.7 #> [69] 2.2 2.5 3.2 2.8 2.5 2.8 2.9 3.0 2.8 3.0 2.9 2.6 2.4 2.4 2.7 2.7 3.0 #> [86] 3.4 3.1 2.3 3.0 2.5 2.6 3.0 2.6 2.3 2.7 3.0 2.9 2.9 2.5 2.8 3.3 2.7 #> [103] 3.0 2.9 3.0 3.0 2.5 2.9 2.5 3.6 3.2 2.7 3.0 2.5 2.8 3.2 3.0 3.8 2.6 #> [120] 2.2 3.2 2.8 2.8 2.7 3.3 3.2 2.8 3.0 2.8 3.0 2.8 3.8 2.8 2.8 2.6 3.0 #> [137] 3.4 3.1 3.0 3.1 3.1 3.1 2.7 3.2 3.3 3.0 2.5 3.0 3.4 3.0 xx <- model.frame(Species ~ offset(Sepal.Width) + offset(Sepal.Length), iris) model_offset(terms(xx), xx) #> [1] 8.6 7.9 7.9 7.7 8.6 9.3 8.0 8.4 7.3 8.0 9.1 8.2 7.8 #> [14] 7.3 9.8 10.1 9.3 8.6 9.5 8.9 8.8 8.8 8.2 8.4 8.2 8.0 #> [27] 8.4 8.7 8.6 7.9 7.9 8.8 9.3 9.7 8.0 8.2 9.0 8.5 7.4 #> [40] 8.5 8.5 6.8 7.6 8.5 8.9 7.8 8.9 7.8 9.0 8.3 10.2 9.6 #> [53] 10.0 7.8 9.3 8.5 9.6 7.3 9.5 7.9 7.0 8.9 8.2 9.0 8.5 #> [66] 9.8 8.6 8.5 8.4 8.1 9.1 8.9 8.8 8.9 9.3 9.6 9.6 9.7 #> [79] 8.9 8.3 7.9 7.9 8.5 8.7 8.4 9.4 9.8 8.6 8.6 8.0 8.1 #> [92] 9.1 8.4 7.3 8.3 8.7 8.6 9.1 7.6 8.5 9.6 8.5 10.1 9.2 #> [105] 9.5 10.6 7.4 10.2 9.2 10.8 9.7 9.1 9.8 8.2 8.6 9.6 9.5 #> [118] 11.5 10.3 8.2 10.1 8.4 10.5 9.0 10.0 10.4 9.0 9.1 9.2 10.2 #> [131] 10.2 11.7 9.2 9.1 8.7 10.7 9.7 9.5 9.0 10.0 9.8 10.0 8.5 #> [144] 10.0 10.0 9.7 8.8 9.5 9.6 8.9 # Problematic columns are caught with intuitive errors tryCatch( expr = { x <- model.frame(~ offset(Species), iris) model_offset(terms(x), x) }, error = function(e) { print(e$message) } ) #> Column, 'offset(Species)', is tagged as an offset, but is not numeric. All offsets must be numeric."},{"path":"https://hardhat.tidymodels.org/dev/reference/modeling-usethis.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a modeling package — modeling-usethis","title":"Create a modeling package — modeling-usethis","text":"create_modeling_package() : Call usethis::create_package() set new R package. Call use_modeling_deps(). Call use_modeling_files(). use_modeling_deps() : Add hardhat, rlang, stats Imports Add recipes Suggests roxygen2 available, use roxygen markdown use_modeling_files() : Add package documentation file Generate populate 3 files R/: {{model}}-constructor.R {{model}}-fit.R {{model}}-predict.R","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/modeling-usethis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a modeling package — modeling-usethis","text":"","code":"create_modeling_package(path, model, fields = NULL, open = interactive()) use_modeling_deps() use_modeling_files(model)"},{"path":"https://hardhat.tidymodels.org/dev/reference/modeling-usethis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a modeling package — modeling-usethis","text":"path path. exists, used. exist, created, provided parent path exists. model string. name high level modeling function users call. example, \"linear_regression\". used populate skeleton. Spaces allowed. fields named list fields add DESCRIPTION, potentially overriding default values. See usethis::use_description() can set personalized defaults using package options. open TRUE, activates new project: RStudio desktop, package opened new session. RStudio server, current RStudio project activated. Otherwise, working directory active project changed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/modeling-usethis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a modeling package — modeling-usethis","text":"create_modeling_package() returns project path invisibly. use_modeling_deps() returns invisibly. use_modeling_files() return model invisibly.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":null,"dir":"Reference","previous_headings":"","what":"Mold data for modeling — mold","title":"Mold data for modeling — mold","text":"mold() applies appropriate processing steps required get training data ready fed model. use various blueprints understand preprocess data come various forms, formula recipe. blueprints consistent return values others, unique enough help page. Click learn use one conjunction mold(). XY Method - default_xy_blueprint() Formula Method - default_formula_blueprint() Recipes Method - default_recipe_blueprint()","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mold data for modeling — mold","text":"","code":"mold(x, ...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mold data for modeling — mold","text":"x object. See method specific implementations linked Description information. ... used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mold data for modeling — mold","text":"named list containing 4 elements: predictors: tibble containing molded predictors used model. outcomes: tibble containing molded outcomes used model. blueprint: method specific \"hardhat_blueprint\" object use making predictions. extras: Either NULL blueprint returns extra information, named list containing extra information.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mold data for modeling — mold","text":"","code":"# See the method specific documentation linked in Description # for the details of each blueprint, and more examples. # XY mold(iris[\"Sepal.Width\"], iris$Species) #> $predictors #> # A tibble: 150 × 1 #> Sepal.Width #> #> 1 3.5 #> 2 3 #> 3 3.2 #> 4 3.1 #> 5 3.6 #> 6 3.9 #> 7 3.4 #> 8 3.4 #> 9 2.9 #> 10 3.1 #> # ℹ 140 more rows #> #> $outcomes #> # A tibble: 150 × 1 #> .outcome #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 140 more rows #> #> $blueprint #> XY blueprint: #> #> # Predictors: 1 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> #> $extras #> NULL #> # Formula mold(Species ~ Sepal.Width, iris) #> $predictors #> # A tibble: 150 × 1 #> Sepal.Width #> #> 1 3.5 #> 2 3 #> 3 3.2 #> 4 3.1 #> 5 3.6 #> 6 3.9 #> 7 3.4 #> 8 3.4 #> 9 2.9 #> 10 3.1 #> # ℹ 140 more rows #> #> $outcomes #> # A tibble: 150 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 140 more rows #> #> $blueprint #> Formula blueprint: #> #> # Predictors: 1 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> Indicators: traditional #> #> $extras #> $extras$offset #> NULL #> #> # Recipe library(recipes) mold(recipe(Species ~ Sepal.Width, iris), iris) #> $predictors #> # A tibble: 150 × 1 #> Sepal.Width #> #> 1 3.5 #> 2 3 #> 3 3.2 #> 4 3.1 #> 5 3.6 #> 6 3.9 #> 7 3.4 #> 8 3.4 #> 9 2.9 #> 10 3.1 #> # ℹ 140 more rows #> #> $outcomes #> # A tibble: 150 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 140 more rows #> #> $blueprint #> Recipe blueprint: #> #> # Predictors: 1 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> #> $extras #> $extras$roles #> NULL #> #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/new-blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a new preprocessing blueprint — new_formula_blueprint","title":"Create a new preprocessing blueprint — new_formula_blueprint","text":"base classes creating new preprocessing blueprints. blueprints inherit one created new_blueprint(), default method specific blueprints inherit three . want create processing blueprint specific method, generally subclass one method specific blueprints . want create completely new preprocessing blueprint totally new preprocessing method (.e. formula, xy, recipe method) subclass new_blueprint(). addition creating blueprint subclass, likely also need provide S3 methods run_mold() run_forge() subclass.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new-blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a new preprocessing blueprint — new_formula_blueprint","text":"","code":"new_formula_blueprint( intercept = FALSE, allow_novel_levels = FALSE, ptypes = NULL, formula = NULL, indicators = \"traditional\", composition = \"tibble\", ..., subclass = character() ) new_recipe_blueprint( intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, strings_as_factors = TRUE, composition = \"tibble\", ptypes = NULL, recipe = NULL, ..., subclass = character() ) new_xy_blueprint( intercept = FALSE, allow_novel_levels = FALSE, composition = \"tibble\", ptypes = NULL, ..., subclass = character() ) new_blueprint( intercept = FALSE, allow_novel_levels = FALSE, composition = \"tibble\", ptypes = NULL, ..., subclass = character() )"},{"path":"https://hardhat.tidymodels.org/dev/reference/new-blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a new preprocessing blueprint — new_formula_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). ptypes Either NULL, named list 2 elements, predictors outcomes, 0-row tibbles. ptypes generated automatically mold() time used validate new_data prediction time. formula Either NULL, formula specifies predictors outcomes preprocessed. argument set automatically mold() time. indicators single character string. Control factors expanded dummy variable indicator columns. One : \"traditional\" - default. Create dummy variables using traditional model.matrix() infrastructure. Generally creates K - 1 indicator columns factor, K number levels factor. \"none\" - Leave factor variables alone. expansion done. \"one_hot\" - Create dummy variables using one-hot encoding approach expands unordered factors K indicator columns, rather K - 1. composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. ... Name-value pairs additional elements blueprints subclass blueprint. subclass character vector. subclasses blueprint. fresh already trained operations re-trained prep() called? strings_as_factors character columns converted factors prep() called? recipe Either NULL, unprepped recipe. argument set automatically mold() time.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new-blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a new preprocessing blueprint — new_formula_blueprint","text":"preprocessing blueprint, list containing inputs used arguments function, along class specific type blueprint created.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new-default-blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a new default blueprint — new_default_formula_blueprint","title":"Create a new default blueprint — new_default_formula_blueprint","text":"page contains constructors default blueprints. can extended want add extra behavior top default blueprints already , generally extend non-default versions constructors found documentation new_blueprint().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new-default-blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a new default blueprint — new_default_formula_blueprint","text":"","code":"new_default_formula_blueprint( intercept = FALSE, allow_novel_levels = FALSE, ptypes = NULL, formula = NULL, indicators = \"traditional\", composition = \"tibble\", terms = list(predictors = NULL, outcomes = NULL), levels = NULL, ..., subclass = character() ) new_default_recipe_blueprint( intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, strings_as_factors = TRUE, composition = \"tibble\", ptypes = NULL, recipe = NULL, extra_role_ptypes = NULL, ..., subclass = character() ) new_default_xy_blueprint( intercept = FALSE, allow_novel_levels = FALSE, composition = \"tibble\", ptypes = NULL, ..., subclass = character() )"},{"path":"https://hardhat.tidymodels.org/dev/reference/new-default-blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a new default blueprint — new_default_formula_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). ptypes Either NULL, named list 2 elements, predictors outcomes, 0-row tibbles. ptypes generated automatically mold() time used validate new_data prediction time. formula Either NULL, formula specifies predictors outcomes preprocessed. argument set automatically mold() time. indicators single character string. Control factors expanded dummy variable indicator columns. One : \"traditional\" - default. Create dummy variables using traditional model.matrix() infrastructure. Generally creates K - 1 indicator columns factor, K number levels factor. \"none\" - Leave factor variables alone. expansion done. \"one_hot\" - Create dummy variables using one-hot encoding approach expands unordered factors K indicator columns, rather K - 1. composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. terms named list two elements, predictors outcomes. elements terms objects describe terms outcomes predictors separately. argument set automatically mold() time. levels Either NULL named list character vectors correspond levels observed converting character predictor columns factors mold(). argument set automatically mold() time. ... Name-value pairs additional elements blueprints subclass blueprint. subclass character vector. subclasses blueprint. fresh already trained operations re-trained prep() called? strings_as_factors character columns converted factors prep() called? recipe Either NULL, unprepped recipe. argument set automatically mold() time. extra_role_ptypes named list. names unique non-standard recipe roles (.e. everything except \"predictors\" \"outcomes\"). values prototypes original columns role. used validation forge().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Extend case weights — new_case_weights","title":"Extend case weights — new_case_weights","text":"new_case_weights() developer oriented function constructing new case weights type. type abstract type little functionality. , class required argument.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extend case weights — new_case_weights","text":"","code":"new_case_weights(x, ..., class)"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extend case weights — new_case_weights","text":"x integer double vector. ... Name-value pairs defining attributes class Name subclass.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extend case weights — new_case_weights","text":"new subclassed case weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extend case weights — new_case_weights","text":"","code":"new_case_weights(1:5, class = \"my_weights\") #> #> [1] 1 2 3 4 5"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct a frequency weights vector — new_frequency_weights","title":"Construct a frequency weights vector — new_frequency_weights","text":"new_frequency_weights() developer oriented function constructing new frequency weights vector. Generally, use frequency_weights() instead.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct a frequency weights vector — new_frequency_weights","text":"","code":"new_frequency_weights(x = integer(), ..., class = character())"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct a frequency weights vector — new_frequency_weights","text":"x integer vector. ... Name-value pairs defining attributes class Name subclass.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct a frequency weights vector — new_frequency_weights","text":"new frequency weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Construct a frequency weights vector — new_frequency_weights","text":"","code":"new_frequency_weights() #> new_frequency_weights(1:5) #> #> [1] 1 2 3 4 5"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct an importance weights vector — new_importance_weights","title":"Construct an importance weights vector — new_importance_weights","text":"new_importance_weights() developer oriented function constructing new importance weights vector. Generally, use importance_weights() instead.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct an importance weights vector — new_importance_weights","text":"","code":"new_importance_weights(x = double(), ..., class = character())"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct an importance weights vector — new_importance_weights","text":"x double vector. ... Name-value pairs defining attributes class Name subclass.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct an importance weights vector — new_importance_weights","text":"new importance weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Construct an importance weights vector — new_importance_weights","text":"","code":"new_importance_weights() #> new_importance_weights(c(1.5, 2.3, 10)) #> #> [1] 1.5 2.3 10.0"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":null,"dir":"Reference","previous_headings":"","what":"Constructor for a base model — new_model","title":"Constructor for a base model — new_model","text":"model scalar object, classified Advanced R. , takes uniquely named elements ... combines list class class. entire object represent single model.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Constructor for a base model — new_model","text":"","code":"new_model(..., blueprint = default_xy_blueprint(), class = character())"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Constructor for a base model — new_model","text":"... Name-value pairs elements specific model defined class. blueprint preprocessing blueprint returned call mold(). class character vector representing class model.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Constructor for a base model — new_model","text":"new scalar model object, represented classed list named elements specified ....","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Constructor for a base model — new_model","text":"every model multiple interfaces, including formula recipes interfaces, models blueprint can process new data predict() called. easiest way generate blueprint information required prediction time use one returned call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Constructor for a base model — new_model","text":"","code":"new_model( custom_element = \"my-elem\", blueprint = default_xy_blueprint(), class = \"custom_model\" ) #> #> $custom_element #> [1] \"my-elem\" #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a vector containing sets of quantiles — quantile_pred","title":"Create a vector containing sets of quantiles — quantile_pred","text":"quantile_pred() special vector class used efficiently store predictions quantile regression model. requires quantile levels row predicted.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a vector containing sets of quantiles — quantile_pred","text":"","code":"quantile_pred(values, quantile_levels = double()) extract_quantile_levels(x) # S3 method for class 'quantile_pred' as_tibble(x, ..., .rows = NULL, .name_repair = \"minimal\", rownames = NULL) # S3 method for class 'quantile_pred' as.matrix(x, ...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a vector containing sets of quantiles — quantile_pred","text":"values matrix values. column correspond one quantile levels. quantile_levels vector probabilities corresponding values. x object produced quantile_pred(). ... currently used. .rows, .name_repair, rownames Arguments used required original S3 method.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a vector containing sets of quantiles — quantile_pred","text":"quantile_pred() returns vector values associated quantile levels. extract_quantile_levels() returns numeric vector levels. as_tibble() returns tibble rows \".pred_quantile\", \".quantile_levels\", \".row\". .matrix() returns unnamed matrix rows samples, columns quantile levels, entries predictions.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a vector containing sets of quantiles — quantile_pred","text":"","code":".pred_quantile <- quantile_pred(matrix(rnorm(20), 5), c(.2, .4, .6, .8)) unclass(.pred_quantile) #> [[1]] #> [1] 0.5060559 -0.4771927 -0.1102855 0.1340882 #> #> [[2]] #> [1] -0.5747400 -0.9983864 -0.5110095 -0.4906859 #> #> [[3]] #> [1] -0.5466319 -0.7762539 -0.9111954 -0.4405479 #> #> [[4]] #> [1] -0.56445200 0.06445882 -0.83717168 0.45958944 #> #> [[5]] #> [1] -0.8900378 0.9594941 2.4158352 -0.6937202 #> #> attr(,\"quantile_levels\") #> [1] 0.2 0.4 0.6 0.8 # Access the underlying information extract_quantile_levels(.pred_quantile) #> [1] 0.2 0.4 0.6 0.8 # Matrix format as.matrix(.pred_quantile) #> [,1] [,2] [,3] [,4] #> [1,] 0.5060559 -0.47719270 -0.1102855 0.1340882 #> [2,] -0.5747400 -0.99838644 -0.5110095 -0.4906859 #> [3,] -0.5466319 -0.77625389 -0.9111954 -0.4405479 #> [4,] -0.5644520 0.06445882 -0.8371717 0.4595894 #> [5,] -0.8900378 0.95949406 2.4158352 -0.6937202 # Tidy format library(tibble) as_tibble(.pred_quantile) #> # A tibble: 20 × 3 #> .pred_quantile .quantile_levels .row #> #> 1 0.506 0.2 1 #> 2 -0.477 0.4 1 #> 3 -0.110 0.6 1 #> 4 0.134 0.8 1 #> 5 -0.575 0.2 2 #> 6 -0.998 0.4 2 #> 7 -0.511 0.6 2 #> 8 -0.491 0.8 2 #> 9 -0.547 0.2 3 #> 10 -0.776 0.4 3 #> 11 -0.911 0.6 3 #> 12 -0.441 0.8 3 #> 13 -0.564 0.2 4 #> 14 0.0645 0.4 4 #> 15 -0.837 0.6 4 #> 16 0.460 0.8 4 #> 17 -0.890 0.2 5 #> 18 0.959 0.4 5 #> 19 2.42 0.6 5 #> 20 -0.694 0.8 5"},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":null,"dir":"Reference","previous_headings":"","what":"Recompose a data frame into another form — recompose","title":"Recompose a data frame into another form — recompose","text":"recompose() takes data frame converts one : tibble data frame matrix sparse matrix (using Matrix package) internal function used hardhat recipes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Recompose a data frame into another form — recompose","text":"","code":"recompose(data, ..., composition = \"tibble\")"},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Recompose a data frame into another form — recompose","text":"data data frame. ... dots future extensions must empty. composition One : \"tibble\" convert tibble. \"data.frame\" convert base data frame. \"matrix\" convert matrix. columns must numeric. \"dgCMatrix\" convert sparse matrix. columns must numeric, Matrix package must installed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Recompose a data frame into another form — recompose","text":"output type determined composition.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Recompose a data frame into another form — recompose","text":"","code":"df <- vctrs::data_frame(x = 1) recompose(df) #> # A tibble: 1 × 1 #> x #> #> 1 1 recompose(df, composition = \"matrix\") #> x #> [1,] 1 # All columns must be numeric to convert to a matrix df <- vctrs::data_frame(x = 1, y = \"a\") try(recompose(df, composition = \"matrix\")) #> Error in recompose(df, composition = \"matrix\") : #> `data` must only contain numeric columns. #> ℹ These columns aren't numeric: \"y\"."},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Refresh a preprocessing blueprint — refresh_blueprint","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"refresh_blueprint() developer facing generic function called end update_blueprint(). simply wrapper around method specific new_*_blueprint() function runs updated blueprint constructor ensure elements blueprint still valid update.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"","code":"refresh_blueprint(blueprint)"},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"blueprint preprocessing blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"blueprint returned call corresponding constructor.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"implement custom blueprint, export refresh_blueprint() method just calls constructor blueprint passes elements blueprint constructor.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"","code":"blueprint <- default_xy_blueprint() # This should never be done manually, but is essentially # what `update_blueprint(blueprint, intercept = TRUE)` does for you blueprint$intercept <- TRUE # Then update_blueprint() will call refresh_blueprint() # to ensure that the structure is correct refresh_blueprint(blueprint) #> XY blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: TRUE #> Novel Levels: FALSE #> Composition: tibble # So you can't do something like... blueprint_bad <- blueprint blueprint_bad$intercept <- 1 # ...because the constructor will catch it try(refresh_blueprint(blueprint_bad)) #> Error in new_blueprint(intercept = intercept, allow_novel_levels = allow_novel_levels, : #> `intercept` must be `TRUE` or `FALSE`, not the number 1. # And update_blueprint() catches this automatically try(update_blueprint(blueprint, intercept = 1)) #> Error in new_blueprint(intercept = intercept, allow_novel_levels = allow_novel_levels, : #> `intercept` must be `TRUE` or `FALSE`, not the number 1."},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":null,"dir":"Reference","previous_headings":"","what":"forge() according to a blueprint — run-forge","title":"forge() according to a blueprint — run-forge","text":"developer facing function used creating blueprint subclass. called forge() dispatches S3 class blueprint. gives opportunity forge new data way specific blueprint. run_forge() always called forge() arguments, unlike run_mold(), different interfaces calling forge(). run_forge() always called : run_forge(blueprint, new_data = new_data, outcomes = outcomes) write blueprint subclass new_xy_blueprint(), new_recipe_blueprint(), new_formula_blueprint(), new_blueprint(), run_forge() method signature must match .","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"forge() according to a blueprint — run-forge","text":"","code":"run_forge(blueprint, new_data, ..., outcomes = FALSE) # S3 method for class 'default_formula_blueprint' run_forge(blueprint, new_data, ..., outcomes = FALSE) # S3 method for class 'default_recipe_blueprint' run_forge(blueprint, new_data, ..., outcomes = FALSE) # S3 method for class 'default_xy_blueprint' run_forge(blueprint, new_data, ..., outcomes = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"forge() according to a blueprint — run-forge","text":"blueprint preprocessing blueprint. new_data data frame matrix predictors process. outcomes = TRUE, also contain outcomes process. ... used. outcomes logical. outcomes processed returned well?","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"forge() according to a blueprint — run-forge","text":"run_forge() methods return object immediately returned forge(). See return value section forge() understand structure return value look like.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"forge() according to a blueprint — run-forge","text":"","code":"bp <- default_xy_blueprint() outcomes <- mtcars[\"mpg\"] predictors <- mtcars predictors$mpg <- NULL mold <- run_mold(bp, x = predictors, y = outcomes) run_forge(mold$blueprint, new_data = predictors) #> $predictors #> # A tibble: 32 × 10 #> cyl disp hp drat wt qsec vs am gear carb #> #> 1 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> #> $outcomes #> NULL #> #> $extras #> NULL #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":null,"dir":"Reference","previous_headings":"","what":"mold() according to a blueprint — run-mold","title":"mold() according to a blueprint — run-mold","text":"developer facing function used creating blueprint subclass. called mold() dispatches S3 class blueprint. gives opportunity mold data way specific blueprint. run_mold() called different arguments depending interface mold() used: XY interface: run_mold(blueprint, x = x, y = y) Formula interface: run_mold(blueprint, data = data) Additionally, blueprint updated contain formula. Recipe interface: run_mold(blueprint, data = data) Additionally, blueprint updated contain recipe. write blueprint subclass new_xy_blueprint(), new_recipe_blueprint(), new_formula_blueprint() run_mold() method signature must match whichever interface listed used. write completely new blueprint inheriting new_blueprint() write new mold() method (using xy, formula, recipe interface), full control run_mold() called.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"mold() according to a blueprint — run-mold","text":"","code":"run_mold(blueprint, ...) # S3 method for class 'default_formula_blueprint' run_mold(blueprint, ..., data) # S3 method for class 'default_recipe_blueprint' run_mold(blueprint, ..., data) # S3 method for class 'default_xy_blueprint' run_mold(blueprint, ..., x, y)"},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"mold() according to a blueprint — run-mold","text":"blueprint preprocessing blueprint. ... used. Required extensibility. data data frame matrix containing outcomes predictors. x data frame matrix containing predictors. y data frame, matrix, vector containing outcomes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"mold() according to a blueprint — run-mold","text":"run_mold() methods return object immediately returned mold(). See return value section mold() understand structure return value look like.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"mold() according to a blueprint — run-mold","text":"","code":"bp <- default_xy_blueprint() outcomes <- mtcars[\"mpg\"] predictors <- mtcars predictors$mpg <- NULL run_mold(bp, x = predictors, y = outcomes) #> $predictors #> # A tibble: 32 × 10 #> cyl disp hp drat wt qsec vs am gear carb #> #> 1 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> #> $outcomes #> # A tibble: 32 × 1 #> mpg #> #> 1 21 #> 2 21 #> 3 22.8 #> 4 21.4 #> 5 18.7 #> 6 18.1 #> 7 14.3 #> 8 24.4 #> 9 22.8 #> 10 19.2 #> # ℹ 22 more rows #> #> $blueprint #> XY blueprint: #> #> # Predictors: 10 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> #> $extras #> NULL #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":null,"dir":"Reference","previous_headings":"","what":"Scream — scream","title":"Scream — scream","text":"scream() ensures structure data prototype, ptype. hood, vctrs::vec_cast() used, casts column data type corresponding column ptype. casting enforces number important structural checks, including limited : Data Classes - Checks class column data corresponding column ptype. Novel Levels - Checks factor columns data new levels compared ptype columns. new levels, warning issued coerced NA. check optional, can turned allow_novel_levels = TRUE. Level Recovery - Checks factor columns data missing factor levels compared ptype columns. missing levels, restored.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Scream — scream","text":"","code":"scream(data, ptype, allow_novel_levels = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Scream — scream","text":"data data frame containing new data check structure . ptype data frame prototype cast data . commonly 0-row slice training set. allow_novel_levels novel factor levels data allowed? safest approach default, throws warning novel levels found, coerces NA values. Setting argument TRUE ignore novel levels. argument apply ordered factors. Novel levels allowed ordered factors level ordering critical part type.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Scream — scream","text":"tibble containing required columns required structural modifications made.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Scream — scream","text":"scream() called forge() shrink() actual processing done. Generally, need call scream() directly, forge() . scream() used standalone function, good practice call shrink() right checks scream() ensure required column names actually exist data. checks exist shrink().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"factor-levels","dir":"Reference","previous_headings":"","what":"Factor Levels","title":"Scream — scream","text":"scream() tries helpful recovering missing factor levels warning novel levels. following graphic outlines scream() handles factor levels coercing column data column ptype. Note ordered factor handing much stricter factor handling. Ordered factors data must exactly levels ordered factors ptype.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Scream — scream","text":"","code":"# --------------------------------------------------------------------------- # Setup train <- iris[1:100, ] test <- iris[101:150, ] # mold() is run at model fit time # and a formula preprocessing blueprint is recorded x <- mold(log(Sepal.Width) ~ Species, train) # Inside the result of mold() are the prototype tibbles # for the predictors and the outcomes ptype_pred <- x$blueprint$ptypes$predictors ptype_out <- x$blueprint$ptypes$outcomes # --------------------------------------------------------------------------- # shrink() / scream() # Pass the test data, along with a prototype, to # shrink() to extract the prototype columns test_shrunk <- shrink(test, ptype_pred) # Now pass that to scream() to perform validation checks # If no warnings / errors are thrown, the checks were # successful! scream(test_shrunk, ptype_pred) #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows # --------------------------------------------------------------------------- # Outcomes # To also extract the outcomes, use the outcome prototype test_outcome <- shrink(test, ptype_out) scream(test_outcome, ptype_out) #> # A tibble: 50 × 1 #> Sepal.Width #> #> 1 3.3 #> 2 2.7 #> 3 3 #> 4 2.9 #> 5 3 #> 6 3 #> 7 2.5 #> 8 2.9 #> 9 2.5 #> 10 3.6 #> # ℹ 40 more rows # --------------------------------------------------------------------------- # Casting # scream() uses vctrs::vec_cast() to intelligently convert # new data to the prototype automatically. This means # it can automatically perform certain conversions, like # coercing character columns to factors. test2 <- test test2$Species <- as.character(test2$Species) test2_shrunk <- shrink(test2, ptype_pred) scream(test2_shrunk, ptype_pred) #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows # It can also recover missing factor levels. # For example, it is plausible that the test data only had the # \"virginica\" level test3 <- test test3$Species <- factor(test3$Species, levels = \"virginica\") test3_shrunk <- shrink(test3, ptype_pred) test3_fixed <- scream(test3_shrunk, ptype_pred) # scream() recovered the missing levels levels(test3_fixed$Species) #> [1] \"setosa\" \"versicolor\" \"virginica\" # --------------------------------------------------------------------------- # Novel levels # When novel levels with any data are present in `data`, the default # is to coerce them to `NA` values with a warning. test4 <- test test4$Species <- as.character(test4$Species) test4$Species[1] <- \"new_level\" test4$Species <- factor( test4$Species, levels = c(levels(test$Species), \"new_level\") ) test4 <- shrink(test4, ptype_pred) # Warning is thrown test4_removed <- scream(test4, ptype_pred) #> Warning: Novel levels found in column 'Species': 'new_level'. The levels have been removed, and values have been coerced to 'NA'. # Novel level is removed levels(test4_removed$Species) #> [1] \"setosa\" \"versicolor\" \"virginica\" # No warning is thrown test4_kept <- scream(test4, ptype_pred, allow_novel_levels = TRUE) # Novel level is kept levels(test4_kept$Species) #> [1] \"setosa\" \"versicolor\" \"virginica\" \"new_level\""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":null,"dir":"Reference","previous_headings":"","what":"Subset only required columns — shrink","title":"Subset only required columns — shrink","text":"shrink() subsets data contain required columns specified prototype, ptype.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Subset only required columns — shrink","text":"","code":"shrink(data, ptype)"},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Subset only required columns — shrink","text":"data data frame containing data subset. ptype data frame prototype containing required columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Subset only required columns — shrink","text":"tibble containing required columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Subset only required columns — shrink","text":"shrink() called forge() scream() actual processing done.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Subset only required columns — shrink","text":"","code":"# --------------------------------------------------------------------------- # Setup train <- iris[1:100, ] test <- iris[101:150, ] # --------------------------------------------------------------------------- # shrink() # mold() is run at model fit time # and a formula preprocessing blueprint is recorded x <- mold(log(Sepal.Width) ~ Species, train) # Inside the result of mold() are the prototype tibbles # for the predictors and the outcomes ptype_pred <- x$blueprint$ptypes$predictors ptype_out <- x$blueprint$ptypes$outcomes # Pass the test data, along with a prototype, to # shrink() to extract the prototype columns shrink(test, ptype_pred) #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows # To extract the outcomes, just use the # outcome prototype shrink(test, ptype_out) #> # A tibble: 50 × 1 #> Sepal.Width #> #> 1 3.3 #> 2 2.7 #> 3 3 #> 4 2.9 #> 5 3 #> 6 3 #> 7 2.5 #> 8 2.9 #> 9 2.5 #> 10 3.6 #> # ℹ 40 more rows # shrink() makes sure that the columns # required by `ptype` actually exist in the data # and errors nicely when they don't test2 <- subset(test, select = -Species) try(shrink(test2, ptype_pred)) #> Error in validate_column_names(data, cols) : #> The following required columns are missing: 'Species'."},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":null,"dir":"Reference","previous_headings":"","what":"Spruce up multi-outcome predictions — spruce-multiple","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"family spruce_*_multiple() functions converts multi-outcome predictions standardized format. generally called prediction implementation function specific type prediction return.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"","code":"spruce_numeric_multiple(...) spruce_class_multiple(...) spruce_prob_multiple(...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"... Multiple vectors predictions: spruce_numeric_multiple(), numeric vectors equal size. spruce_class_multiple(), factors \"hard\" class predictions equal size. spruce_prob_multiple(), tibbles equal size, result calling spruce_prob() matrix prediction probabilities. ... named, name used suffix resulting column name, otherwise positional index used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"spruce_numeric_multiple(), tibble numeric columns named pattern .pred_*. spruce_class_multiple(), tibble factor columns named pattern .pred_class_*. spruce_prob_multiple(), tibble data frame columns named pattern .pred_*.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"","code":"spruce_numeric_multiple(1:3, foo = 2:4) #> # A tibble: 3 × 2 #> .pred_1 .pred_foo #> #> 1 1 2 #> 2 2 3 #> 3 3 4 spruce_class_multiple( one_step = factor(c(\"a\", \"b\", \"c\")), two_step = factor(c(\"a\", \"c\", \"c\")) ) #> # A tibble: 3 × 2 #> .pred_class_one_step .pred_class_two_step #> #> 1 a a #> 2 b c #> 3 c c one_step <- matrix(c(.3, .7, .0, .1, .3, .6), nrow = 2, byrow = TRUE) two_step <- matrix(c(.2, .7, .1, .2, .4, .4), nrow = 2, byrow = TRUE) binary <- matrix(c(.5, .5, .4, .6), nrow = 2, byrow = TRUE) spruce_prob_multiple( one_step = spruce_prob(c(\"a\", \"b\", \"c\"), one_step), two_step = spruce_prob(c(\"a\", \"b\", \"c\"), two_step), binary = spruce_prob(c(\"yes\", \"no\"), binary) ) #> # A tibble: 2 × 3 #> .pred_one_step$.pred_a .pred_two_step$.pred_a .pred_binary$.pred_yes #> #> 1 0.3 0.2 0.5 #> 2 0.1 0.2 0.4 #> # ℹ 5 more variables: .pred_one_step$.pred_b , $.pred_c , #> # .pred_two_step$.pred_b , $.pred_c , #> # .pred_binary$.pred_no "},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":null,"dir":"Reference","previous_headings":"","what":"Spruce up predictions — spruce","title":"Spruce up predictions — spruce","text":"family spruce_*() functions convert predictions standardized format. generally called prediction implementation function specific type prediction return.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spruce up predictions — spruce","text":"","code":"spruce_numeric(pred) spruce_class(pred_class) spruce_prob(pred_levels, prob_matrix)"},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spruce up predictions — spruce","text":"pred (type = \"numeric\") numeric vector predictions. pred_class (type = \"class\") factor \"hard\" class predictions. pred_levels, prob_matrix (type = \"prob\") pred_levels character vector original levels outcome used training. prob_matrix numeric matrix class probabilities many columns levels pred_levels, order.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spruce up predictions — spruce","text":"tibble, ideally number rows new_data passed predict(). column names number columns vary based function used, standardized.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Spruce up predictions — spruce","text":"running spruce_*() function, always use validation function validate_prediction_size() ensure number rows returned number rows input (new_data).","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":null,"dir":"Reference","previous_headings":"","what":"Standardize the outcome — standardize","title":"Standardize the outcome — standardize","text":"time, input model flexible enough capture number different input types user. standardize() focuses capturing flexibility outcome.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Standardize the outcome — standardize","text":"","code":"standardize(y)"},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Standardize the outcome — standardize","text":"y outcome. can : factor vector numeric vector 1D numeric array numeric matrix column names 2D numeric array column names data frame numeric factor columns","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Standardize the outcome — standardize","text":"possible values y transformed tibble standardization. Vectors transformed tibble single column named \".outcome\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Standardize the outcome — standardize","text":"standardize() called mold() using XY interface (.e. y argument supplied).","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Standardize the outcome — standardize","text":"","code":"standardize(1:5) #> # A tibble: 5 × 1 #> .outcome #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 standardize(factor(letters[1:5])) #> # A tibble: 5 × 1 #> .outcome #> #> 1 a #> 2 b #> 3 c #> 4 d #> 5 e mat <- matrix(1:10, ncol = 2) colnames(mat) <- c(\"a\", \"b\") standardize(mat) #> # A tibble: 5 × 2 #> a b #> #> 1 1 6 #> 2 2 7 #> 3 3 8 #> 4 4 9 #> 5 5 10 df <- data.frame(x = 1:5, y = 6:10) standardize(df) #> # A tibble: 5 × 2 #> x y #> #> 1 1 6 #> 2 2 7 #> 3 3 8 #> 4 4 9 #> 5 5 10"},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":null,"dir":"Reference","previous_headings":"","what":"Mark arguments for tuning — tune","title":"Mark arguments for tuning — tune","text":"tune() argument placeholder used recipes, parsnip, tune packages. marks recipes step parsnip model arguments tuning.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mark arguments for tuning — tune","text":"","code":"tune(id = \"\")"},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mark arguments for tuning — tune","text":"id single character value can used differentiate parameters used multiple places name, user wants add note specified parameter.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mark arguments for tuning — tune","text":"call object echos user's input.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mark arguments for tuning — tune","text":"","code":"tune() #> tune() tune(\"your name here\") #> tune(\"your name here\") # In practice, `tune()` is used alongside recipes or parsnip to mark # specific arguments for tuning library(recipes) recipe(mpg ~ ., data = mtcars) %>% step_normalize(all_numeric_predictors()) %>% step_pca(all_numeric_predictors, num_comp = tune()) #> #> ── Recipe ──────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 10 #> #> ── Operations #> • Centering and scaling for: all_numeric_predictors() #> • PCA extraction with: all_numeric_predictors"},{"path":"https://hardhat.tidymodels.org/dev/reference/update_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Update a preprocessing blueprint — update_blueprint","title":"Update a preprocessing blueprint — update_blueprint","text":"update_blueprint() correct way alter elements existing blueprint object. two benefits just blueprint$elem <- new_elem. name updating must already exist blueprint. prevents accidentally updating non-existent elements. constructor blueprint automatically run update refresh_blueprint() ensure blueprint still valid.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/update_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update a preprocessing blueprint — update_blueprint","text":"","code":"update_blueprint(blueprint, ...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/update_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update a preprocessing blueprint — update_blueprint","text":"blueprint preprocessing blueprint. ... Name-value pairs existing elements blueprint updated.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/update_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Update a preprocessing blueprint — update_blueprint","text":"","code":"blueprint <- default_xy_blueprint() # `intercept` defaults to FALSE blueprint #> XY blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble update_blueprint(blueprint, intercept = TRUE) #> XY blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: TRUE #> Novel Levels: FALSE #> Composition: tibble # Can't update non-existent elements try(update_blueprint(blueprint, intercpt = TRUE)) #> Error in update_blueprint(blueprint, intercpt = TRUE) : #> All elements of `...` must already exist. #> ℹ The following fields are new: \"intercpt\". # Can't add non-valid elements try(update_blueprint(blueprint, intercept = 1)) #> Error in new_blueprint(intercept = intercept, allow_novel_levels = allow_novel_levels, : #> `intercept` must be `TRUE` or `FALSE`, not the number 1."},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that data contains required column names — validate_column_names","title":"Ensure that data contains required column names — validate_column_names","text":"validate - asserts following: column names data must contain original_names. check - returns following: ok logical. check pass? missing_names character vector. missing column names.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that data contains required column names — validate_column_names","text":"","code":"validate_column_names(data, original_names) check_column_names(data, original_names)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that data contains required column names — validate_column_names","text":"data data frame check. original_names character vector. original column names.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that data contains required column names — validate_column_names","text":"validate_column_names() returns data invisibly. check_column_names() returns named list two components, ok, missing_names.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that data contains required column names — validate_column_names","text":"special error thrown missing column named \".outcome\". happens case mold() called using xy-method, vector y value supplied rather data frame matrix. case, y coerced data frame, automatic name \".outcome\" added, looked forge(). happens, user tries request outcomes using forge(..., outcomes = TRUE) supplied new_data contain required \".outcome\" column, special error thrown telling . See examples!","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that data contains required column names — validate_column_names","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that data contains required column names — validate_column_names","text":"","code":"# --------------------------------------------------------------------------- original_names <- colnames(mtcars) test <- mtcars bad_test <- test[, -c(3, 4)] # All good check_column_names(test, original_names) #> $ok #> [1] TRUE #> #> $missing_names #> character(0) #> # Missing 2 columns check_column_names(bad_test, original_names) #> $ok #> [1] FALSE #> #> $missing_names #> [1] \"disp\" \"hp\" #> # Will error try(validate_column_names(bad_test, original_names)) #> Error in validate_column_names(bad_test, original_names) : #> The following required columns are missing: 'disp', 'hp'. # --------------------------------------------------------------------------- # Special error when `.outcome` is missing train <- iris[1:100, ] test <- iris[101:150, ] train_x <- subset(train, select = -Species) train_y <- train$Species # Here, y is a vector processed <- mold(train_x, train_y) # So the default column name is `\".outcome\"` processed$outcomes #> # A tibble: 100 × 1 #> .outcome #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 90 more rows # It doesn't affect forge() normally forge(test, processed$blueprint) #> $predictors #> # A tibble: 50 × 4 #> Sepal.Length Sepal.Width Petal.Length Petal.Width #> #> 1 6.3 3.3 6 2.5 #> 2 5.8 2.7 5.1 1.9 #> 3 7.1 3 5.9 2.1 #> 4 6.3 2.9 5.6 1.8 #> 5 6.5 3 5.8 2.2 #> 6 7.6 3 6.6 2.1 #> 7 4.9 2.5 4.5 1.7 #> 8 7.3 2.9 6.3 1.8 #> 9 6.7 2.5 5.8 1.8 #> 10 7.2 3.6 6.1 2.5 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> NULL #> # But if the outcome is requested, and `\".outcome\"` # is not present in `new_data`, an error is thrown # with very specific instructions try(forge(test, processed$blueprint, outcomes = TRUE)) #> Error in validate_missing_name_isnt_.outcome(check$missing_names) : #> The following required columns are missing: '.outcome'. #> #> (This indicates that `mold()` was called with a vector for `y`. When this is the case, and the outcome columns are requested in `forge()`, `new_data` must include a column with the automatically generated name, '.outcome', containing the outcome.) # To get this to work, just create an .outcome column in new_data test$.outcome <- test$Species forge(test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 4 #> Sepal.Length Sepal.Width Petal.Length Petal.Width #> #> 1 6.3 3.3 6 2.5 #> 2 5.8 2.7 5.1 1.9 #> 3 7.1 3 5.9 2.1 #> 4 6.3 2.9 5.6 1.8 #> 5 6.5 3 5.8 2.2 #> 6 7.6 3 6.6 2.1 #> 7 4.9 2.5 4.5 1.7 #> 8 7.3 2.9 6.3 1.8 #> 9 6.7 2.5 5.8 1.8 #> 10 7.2 3.6 6.1 2.5 #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> .outcome #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $extras #> NULL #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"validate - asserts following: formula must duplicates terms left right hand side formula. check - returns following: ok logical. check pass? duplicates character vector. duplicate terms.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"","code":"validate_no_formula_duplication(formula, original = FALSE) check_no_formula_duplication(formula, original = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"formula formula check. original logical. original names checked, names processing used? FALSE, y ~ log(y) allowed names \"y\" \"log(y)\", TRUE, y ~ log(y) allowed original names \"y\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"validate_no_formula_duplication() returns formula invisibly. check_no_formula_duplication() returns named list two components, ok duplicates.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"","code":"# All good check_no_formula_duplication(y ~ x) #> $ok #> [1] TRUE #> #> $duplicates #> character(0) #> # Not good! check_no_formula_duplication(y ~ y) #> $ok #> [1] FALSE #> #> $duplicates #> [1] \"y\" #> # This is generally okay check_no_formula_duplication(y ~ log(y)) #> $ok #> [1] TRUE #> #> $duplicates #> character(0) #> # But you can be more strict check_no_formula_duplication(y ~ log(y), original = TRUE) #> $ok #> [1] FALSE #> #> $duplicates #> [1] \"y\" #> # This would throw an error try(validate_no_formula_duplication(log(y) ~ log(y))) #> Error in validate_no_formula_duplication(log(y) ~ log(y)) : #> The following terms are duplicated on the left and right hand side of the `formula`: 'log(y)'."},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"validate - asserts following: outcomes must binary factor columns. check - returns following: ok logical. check pass? bad_cols character vector. names columns problems. num_levels integer vector. actual number levels columns problems.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"","code":"validate_outcomes_are_binary(outcomes) check_outcomes_are_binary(outcomes)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"outcomes object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"validate_outcomes_are_binary() returns outcomes invisibly. check_outcomes_are_binary() returns named list three components, ok, bad_cols, num_levels.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"expected way use validation function supply $outcomes element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"","code":"# Not a binary factor. 0 levels check_outcomes_are_binary(data.frame(x = 1)) #> $ok #> [1] FALSE #> #> $bad_cols #> [1] \"x\" #> #> $num_levels #> [1] 0 #> # Not a binary factor. 1 level check_outcomes_are_binary(data.frame(x = factor(\"A\"))) #> $ok #> [1] FALSE #> #> $bad_cols #> [1] \"x\" #> #> $num_levels #> [1] 1 #> # All good check_outcomes_are_binary(data.frame(x = factor(c(\"A\", \"B\")))) #> $ok #> [1] TRUE #> #> $bad_cols #> character(0) #> #> $num_levels #> integer(0) #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"validate - asserts following: outcomes must factor columns. check - returns following: ok logical. check pass? bad_classes named list. names names problematic columns, values classes matching column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"","code":"validate_outcomes_are_factors(outcomes) check_outcomes_are_factors(outcomes)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"outcomes object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"validate_outcomes_are_factors() returns outcomes invisibly. check_outcomes_are_factors() returns named list two components, ok bad_classes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"expected way use validation function supply $outcomes element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"","code":"# Not a factor column. check_outcomes_are_factors(data.frame(x = 1)) #> $ok #> [1] FALSE #> #> $bad_classes #> $bad_classes$x #> [1] \"numeric\" #> #> # All good check_outcomes_are_factors(data.frame(x = factor(c(\"A\", \"B\")))) #> $ok #> [1] TRUE #> #> $bad_classes #> list() #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"validate - asserts following: outcomes must numeric columns. check - returns following: ok logical. check pass? bad_classes named list. names names problematic columns, values classes matching column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"","code":"validate_outcomes_are_numeric(outcomes) check_outcomes_are_numeric(outcomes)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"outcomes object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"validate_outcomes_are_numeric() returns outcomes invisibly. check_outcomes_are_numeric() returns named list two components, ok bad_classes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"expected way use validation function supply $outcomes element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"","code":"# All good check_outcomes_are_numeric(mtcars) #> $ok #> [1] TRUE #> #> $bad_classes #> list() #> # Species is not numeric check_outcomes_are_numeric(iris) #> $ok #> [1] FALSE #> #> $bad_classes #> $bad_classes$Species #> [1] \"factor\" #> #> # This gives an intelligent error message try(validate_outcomes_are_numeric(iris)) #> Error in validate_outcomes_are_numeric(iris) : #> All outcomes must be numeric, but the following are not: #> 'Species': 'factor'"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"validate - asserts following: outcomes must 1 column. Atomic vectors treated 1 column matrices. check - returns following: ok logical. check pass? n_cols single numeric. actual number columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"","code":"validate_outcomes_are_univariate(outcomes) check_outcomes_are_univariate(outcomes)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"outcomes object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"validate_outcomes_are_univariate() returns outcomes invisibly. check_outcomes_are_univariate() returns named list two components, ok n_cols.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"expected way use validation function supply $outcomes element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"","code":"validate_outcomes_are_univariate(data.frame(x = 1)) try(validate_outcomes_are_univariate(mtcars)) #> Error in validate_outcomes_are_univariate(mtcars) : #> The outcome must be univariate, but 11 columns were found."},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that predictions have the correct number of rows — validate_prediction_size","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"validate - asserts following: size pred must size new_data. check - returns following: ok logical. check pass? size_new_data single numeric. size new_data. size_pred single numeric. size pred.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"","code":"validate_prediction_size(pred, new_data) check_prediction_size(pred, new_data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"pred tibble. predictions return prediction type. often created using one spruce functions, like spruce_numeric(). new_data data frame new predictors possibly outcomes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"validate_prediction_size() returns pred invisibly. check_prediction_size() returns named list three components, ok, size_new_data, size_pred.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"validation function one developer focused rather user focused. final check used right value returned specific predict() method, mainly \"good practice\" sanity check ensure prediction blueprint always returns number rows new_data, one modeling conventions package tries promote.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"","code":"# Say new_data has 5 rows new_data <- mtcars[1:5, ] # And somehow you generate predictions # for those 5 rows pred_vec <- 1:5 # Then you use `spruce_numeric()` to clean # up these numeric predictions pred <- spruce_numeric(pred_vec) pred #> # A tibble: 5 × 1 #> .pred #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 # Use this check to ensure that # the number of rows or pred match new_data check_prediction_size(pred, new_data) #> $ok #> [1] TRUE #> #> $size_new_data #> [1] 5 #> #> $size_pred #> [1] 5 #> # An informative error message is thrown # if the rows are different try(validate_prediction_size(spruce_numeric(1:4), new_data)) #> Error in validate_prediction_size(spruce_numeric(1:4), new_data) : #> The size of `new_data` (5) must match the size of `pred` (4)."},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure predictors are all numeric — validate_predictors_are_numeric","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"validate - asserts following: predictors must numeric columns. check - returns following: ok logical. check pass? bad_classes named list. names names problematic columns, values classes matching column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"","code":"validate_predictors_are_numeric(predictors) check_predictors_are_numeric(predictors)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"predictors object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"validate_predictors_are_numeric() returns predictors invisibly. check_predictors_are_numeric() returns named list two components, ok, bad_classes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"expected way use validation function supply $predictors element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"","code":"# All good check_predictors_are_numeric(mtcars) #> $ok #> [1] TRUE #> #> $bad_classes #> list() #> # Species is not numeric check_predictors_are_numeric(iris) #> $ok #> [1] FALSE #> #> $bad_classes #> $bad_classes$Species #> [1] \"factor\" #> #> # This gives an intelligent error message try(validate_predictors_are_numeric(iris)) #> Error in validate_predictors_are_numeric(iris) : #> All predictors must be numeric, but the following are not: #> 'Species': 'factor'"},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":null,"dir":"Reference","previous_headings":"","what":"Weighted table — weighted_table","title":"Weighted table — weighted_table","text":"weighted_table() computes weighted contingency table based factors provided ... double vector weights provided weights. can seen weighted extension base::table() alternative stats::xtabs(). weighted_table() always uses exact set levels returned levels() constructing table. results following properties: Missing values found factors never included table unless explicit NA factor level. needed, can added factor base::addNA() forcats::fct_expand(x, NA). Levels found factors actually used underlying data included table value 0. needed, can drop unused factor levels re-running factor factor(), calling forcats::fct_drop(). See examples section information properties.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Weighted table — weighted_table","text":"","code":"weighted_table(..., weights, na_remove = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Weighted table — weighted_table","text":"... Factors equal length use weighted table. ... named, names propagate onto \"dimnames names\" resulting table. least one factor must provided. weights double vector weights used fill cells weighted table. must length factors provided .... na_remove single TRUE FALSE handling whether missing values weights removed summing weights.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Weighted table — weighted_table","text":"weighted table array double values.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Weighted table — weighted_table","text":"result weighted_table() \"table\" class attached . double array. \"table\" objects defined containing integer counts, weighted tables can utilize fractional weights.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Weighted table — weighted_table","text":"","code":"x <- factor(c(\"x\", \"y\", \"z\", \"x\", \"x\", \"y\")) y <- factor(c(\"a\", \"b\", \"a\", \"a\", \"b\", \"b\")) w <- c(1.5, 2, 1.1, .5, 3, 2) weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 2.0 3 #> y 0.0 4 #> z 1.1 0 # --------------------------------------------------------------------------- # If `weights` contains missing values, then missing values will be # propagated into the weighted table x <- factor(c(\"x\", \"y\", \"y\")) y <- factor(c(\"a\", \"b\", \"b\")) w <- c(1, NA, 3) weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 1 0 #> y 0 NA # You can remove the missing values while summing up the weights with # `na_remove = TRUE` weighted_table(x = x, y = y, weights = w, na_remove = TRUE) #> y #> x a b #> x 1 0 #> y 0 3 # --------------------------------------------------------------------------- # If there are missing values in the factors, those typically don't show # up in the weighted table x <- factor(c(\"x\", NA, \"y\", \"x\")) y <- factor(c(\"a\", \"b\", \"a\", NA)) w <- 1:4 weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 1 0 #> y 3 0 # This is because the missing values aren't considered explicit levels levels(x) #> [1] \"x\" \"y\" # You can force them to show up in the table by using `addNA()` ahead of time # (or `forcats::fct_expand(x, NA)`) x <- addNA(x, ifany = TRUE) y <- addNA(y, ifany = TRUE) levels(x) #> [1] \"x\" \"y\" NA weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 1 0 4 #> y 3 0 0 #> 0 2 0 # --------------------------------------------------------------------------- # If there are levels in your factors that aren't actually used in the # underlying data, then they will still show up in the table with a `0` value x <- factor(c(\"x\", \"y\", \"x\"), levels = c(\"x\", \"y\", \"z\")) y <- factor(c(\"a\", \"b\", \"a\"), levels = c(\"a\", \"b\", \"c\")) w <- 1:3 weighted_table(x = x, y = y, weights = w) #> y #> x a b c #> x 4 0 0 #> y 0 2 0 #> z 0 0 0 # If you want to drop these empty factor levels from the result, you can # rerun `factor()` ahead of time to drop them (or `forcats::fct_drop()`) x <- factor(x) y <- factor(y) levels(x) #> [1] \"x\" \"y\" weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 4 0 #> y 0 2"},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-development-version","dir":"Changelog","previous_headings":"","what":"hardhat (development version)","title":"hardhat (development version)","text":"Added new vector class called quantile_pred() house predictions made quantile regression model (tidymodels/parsnip#1191, @dajmcdon).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-140","dir":"Changelog","previous_headings":"","what":"hardhat 1.4.0","title":"hardhat 1.4.0","text":"CRAN release: 2024-06-02 Added extract_postprocessor() generic (#247). Added extract_fit_time() generic (#218).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-131","dir":"Changelog","previous_headings":"","what":"hardhat 1.3.1","title":"hardhat 1.3.1","text":"CRAN release: 2024-02-02 Changed Rd name modeling-package -> modeling-usethis request CRAN.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-130","dir":"Changelog","previous_headings":"","what":"hardhat 1.3.0","title":"hardhat 1.3.0","text":"CRAN release: 2023-03-30 New family spruce_*_multiple() functions support standardizing multi-outcome predictions (#223, contributions @cregouby). New fct_encode_one_hot() encodes factor one-hot indicator matrix (#215). default_recipe_blueprint() gained strings_as_factors argument, passed recipes::prep() (#212). Using formula blueprint indicators = \"none\" character predictors now works properly provide character column contains single value (#213). Using formula blueprint indicators = \"traditional\" indicators = \"one_hot\" character predictors now properly enforces factor levels generated predictors new_data forge() (#213). Using formula blueprint indicators = \"none\" now works correctly variable formula space name (#217). mold() forge() generally less overhead (#235, #236). Added documentation importance frequency weights ?importance_weights() ?frequency_weights() (#214). New internal recompose() helper (#220).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-120","dir":"Changelog","previous_headings":"","what":"hardhat 1.2.0","title":"hardhat 1.2.0","text":"CRAN release: 2022-06-30 reverted change made hardhat 1.0.0 caused recipe preprocessors drop non-standard roles default calling forge(). Determining roles required bake() time really something controlled within recipes, hardhat. results following changes (#207): new argument, bake_dependent_roles, added default_recipe_blueprint() 1.0.0 removed. longer needed new behavior. default, forge() pass columns new_data bake() except roles \"outcome\" \"case_weights\". outcomes = TRUE, also pass \"outcome\" role. essentially pre-1.0.0 behavior, means , default, non-standard roles required bake() time. assumption now also enforced recipes 1.0.0, even aren’t using hardhat workflow. development version recipes, become recipes 1.0.0, new update_role_requirements() function can used declare role required bake() time. hardhat now knows respect feature, forge() won’t pass columns new_data bake() roles aren’t required bake() time.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-110","dir":"Changelog","previous_headings":"","what":"hardhat 1.1.0","title":"hardhat 1.1.0","text":"CRAN release: 2022-06-10 Fixed bug results calling mold() using hardhat < 1.0.0 longer compatible calling forge() hardhat >= 1.0.0. occur save workflow object fitting , load R session uses newer version hardhat (#200). Internal details related blueprints work alongside mold() forge() heavily re-factored support fix #200. changes mostly internal developer focused. include: Blueprints longer store clean/process functions used calling mold() forge(). stored blueprint$mold$clean(), blueprint$mold$process(), blueprint$forge$clean(), blueprint$forge$process() strictly internal use. Storing blueprint caused problems blueprints created old versions hardhat unlikely compatible newer versions hardhat. change means new_blueprint() blueprint constructors longer mold forge arguments. run_mold() repurposed. Rather calling $clean() $process() functions (, mentioned , longer blueprint), methods S3 generic rewritten directly call current versions clean process functions live hardhat. result less accidental breaking changes. New run_forge() forge() equivalent run_mold(). handles clean/process steps previously handled $clean() $process() functions stored directly blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-100","dir":"Changelog","previous_headings":"","what":"hardhat 1.0.0","title":"hardhat 1.0.0","text":"CRAN release: 2022-06-01 Recipe preprocessors now ignore non-standard recipe roles (.e. \"outcome\" \"predictor\") default calling forge(). Previously, assumed non-standard role columns present original training data also required test data forge() called. seems often case columns actually required bake() new data, often won’t even present making predictions new data. example, custom \"case_weights\" role might required computing case-weighted estimates prep() time, won’t necessary bake() time (since estimates already pre-computed stored). account case require specific non-standard role present bake() time, default_recipe_blueprint() gained new argument, bake_dependent_roles, can set character vector non-standard roles required. New weighted_table() generating weighted contingency table, similar table() (#191). New experimental family functions working case weights. particular, frequency_weights() importance_weights() (#190). use_modeling_files() create_modeling_package() longer open package documentation file current RStudio session (#192). rlang >=1.0.2 vctrs >=0.4.1 now required. Bumped required R version >= 3.4.0 reflect tidyverse standards.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-020","dir":"Changelog","previous_headings":"","what":"hardhat 0.2.0","title":"hardhat 0.2.0","text":"CRAN release: 2022-01-24 Moved tune() tune hardhat (#181). Added extract_parameter_dials() extract_parameter_set_dials() generics extend family extract_*() generics. mold() longer misinterprets :: interaction term (#174). indicators = \"none\", mold() longer misinterprets factor columns part inline function similarly named non-factor column also present (#182).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-016","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.6","title":"hardhat 0.1.6","text":"CRAN release: 2021-07-14 Added new family extract_*() S3 generics extracting important components various tidymodels objects. S3 methods defined tidymodels packages. example, tune register extract_workflow() method easily extract workflow embedded within result tune::last_fit(). logical indicators argument longer allowed default_formula_blueprint(). soft-deprecated hardhat 0.1.4, now result error (#144).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-015","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.5","title":"hardhat 0.1.5","text":"CRAN release: 2020-11-09 use_modeling_files() (therefore, create_modeling_package()) now ensures generated functions templated model name. makes easier add multiple models package (#152). preprocessors can now mold() forge() predictors one three output formats (either tibble, matrix, dgCMatrix sparse matrix) via composition argument blueprint (#100, #150).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-014","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.4","title":"hardhat 0.1.4","text":"CRAN release: 2020-07-02 Setting indicators = \"none\" default_formula_blueprint() longer accidentally expands character columns dummy variable columns. now left completely untouched pass characters. indicators = \"traditional\" indicators = \"one_hot\", character columns treated unordered factors (#139). indicators argument default_formula_blueprint() now takes character input rather logical. update: Logical input indicators continue work, warning, hardhat 0.1.6, formally deprecated. also new indicators = \"one_hot\" option expands factor columns K dummy variable columns corresponding K levels factor, rather traditional K - 1 expansion.","code":"indicators = TRUE -> indicators = \"traditional\" indicators = FALSE -> indicators = \"none\""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-013","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.3","title":"hardhat 0.1.3","text":"CRAN release: 2020-05-20 Updated stay current latest vctrs 0.3.0 conventions. scream() now stricter checking ordered factor levels new data ptype used training time. Ordered factors must now exactly set levels training prediction time. See ?scream new graphic outlining factor levels handled (#132). novel factor level check scream() longer throws novel level warning NA values (#131).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-012","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.2","title":"hardhat 0.1.2","text":"CRAN release: 2020-02-28 default_recipe_blueprint() now defaults prepping recipes fresh = TRUE. safer default, guards user accidentally skipping preprocessing step tuning (#122). model_matrix() now correctly strips attributes result internal call model.matrix().","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-011","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.1","title":"hardhat 0.1.1","text":"CRAN release: 2020-01-08 forge() now works correctly used recipe predictor multiple roles (#120). Require recipes 0.1.8 incorporate important bug fix juice() 0-column selections.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-010","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.0","title":"hardhat 0.1.0","text":"CRAN release: 2019-12-16 Added NEWS.md file track changes package.","code":""}] +[{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to tidymodels","title":"Contributing to tidymodels","text":"detailed information contributing tidymodels packages, see development contributing guide.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CONTRIBUTING.html","id":"documentation","dir":"","previous_headings":"","what":"Documentation","title":"Contributing to tidymodels","text":"Typos grammatical errors documentation may edited directly using GitHub web interface, long changes made source file. YES ✅: edit roxygen comment .R file R/ directory. 🚫: edit .Rd file man/ directory. use roxygen2, Markdown syntax, documentation.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CONTRIBUTING.html","id":"code","dir":"","previous_headings":"","what":"Code","title":"Contributing to tidymodels","text":"submit 🎯 pull request tidymodels package, always file issue confirm tidymodels team agrees idea happy basic proposal. tidymodels packages work together. package contains unit tests, integration tests tests using packages contained extratests. recommend create Git branch pull request (PR). Look build status making changes. README contains badges continuous integration services used package. New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. user-facing changes, add bullet top NEWS.md current development version header describing changes made followed GitHub username, links relevant issue(s)/PR(s). use testthat. Contributions test cases included easier accept. contribution spans use one package, consider building extratests changes check breakages /adding new tests . Let us know PR ran extra tests.","code":""},{"path":"https://hardhat.tidymodels.org/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"Code","what":"Code of Conduct","title":"Contributing to tidymodels","text":"project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"https://hardhat.tidymodels.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 hardhat authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Forging data for predictions","text":"counterpart mold() (can read vignette(\"mold\", \"hardhat\")), forge(). mold() used preprocess training data, forge() used preprocess new data going use generate predictions model. Like mold(), forge() intended used interactively. Instead, called predict() method model. learn using forge() modeling package, see vignette(\"package\", \"hardhat\"). rest vignette focused many features forge() offers.","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"connection-with-mold","dir":"Articles","previous_headings":"","what":"Connection with mold()","title":"Forging data for predictions","text":"mold() used, one returned objects blueprint. key preprocessing new data forge(). instance, assume ’ve called mold() like : formula blueprint returned , knows predictors outcomes used training time, knows don’t want expand species dummy variables setting indicators = \"none\". time predict() new data, data passed forge() along blueprint just created. Note predictors, species expanded blueprint knew preprocessing options set mold() called. forge() always returns three things, look familiar used mold(). predictors holds tibble predictors. outcomes returned NULL default, predict() methods assume access new predictors. Alternatively, read moment, can contain tibble new outcomes. extras varies per blueprint, catch-slot hold kind extra objects returned blueprint mold() called.","code":"penguin_train <- penguins[1:300,] penguin_test <- penguins[-(1:300),] penguin_form <- mold( log(body_mass_g) ~ species + bill_length_mm, penguin_train, blueprint = default_formula_blueprint(indicators = \"none\") ) formula_eng <- penguin_form$blueprint formula_eng #> Formula blueprint: #> #> # Predictors: 2 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> Indicators: none forge(penguin_test, formula_eng) #> $predictors #> # A tibble: 33 × 2 #> bill_length_mm species #> #> 1 47.5 Chinstrap #> 2 47.6 Chinstrap #> 3 52 Chinstrap #> 4 46.9 Chinstrap #> 5 53.5 Chinstrap #> 6 49 Chinstrap #> 7 46.2 Chinstrap #> 8 50.9 Chinstrap #> 9 45.5 Chinstrap #> 10 50.9 Chinstrap #> # ℹ 23 more rows #> #> $outcomes #> NULL #> #> $extras #> $extras$offset #> NULL"},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"outcomes","dir":"Articles","previous_headings":"","what":"Outcomes","title":"Forging data for predictions","text":"Generally generating predictions need know new predictors. However, performing resampling need processed outcomes well can compute cross validated performance statistics decide multiple models, choose hyperparameters. can easily request outcomes well outcomes = TRUE. Just like predictors, get processed using steps done outcomes fit time.","code":"forge(penguin_test, formula_eng, outcomes = TRUE) #> $predictors #> # A tibble: 33 × 2 #> bill_length_mm species #> #> 1 47.5 Chinstrap #> 2 47.6 Chinstrap #> 3 52 Chinstrap #> 4 46.9 Chinstrap #> 5 53.5 Chinstrap #> 6 49 Chinstrap #> 7 46.2 Chinstrap #> 8 50.9 Chinstrap #> 9 45.5 Chinstrap #> 10 50.9 Chinstrap #> # ℹ 23 more rows #> #> $outcomes #> # A tibble: 33 × 1 #> `log(body_mass_g)` #> #> 1 8.27 #> 2 8.26 #> 3 8.48 #> 4 7.90 #> 5 8.41 #> 6 8.28 #> 7 8.20 #> 8 8.17 #> 9 8.16 #> 10 8.21 #> # ℹ 23 more rows #> #> $extras #> $extras$offset #> NULL"},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"validation","dir":"Articles","previous_headings":"","what":"Validation","title":"Forging data for predictions","text":"One useful things forge() robustness malformed new data. isn’t unreasonable enforce new data user provides prediction time type data used fit time. Type defined vctrs sense, uses essentially means number checks test data pass, including: column names testing data training data must . type column testing data must columns found training data. means: classes must (e.g. factor training, must factor testing). attributes must (e.g. levels factors must also ). Almost validation possible use vctrs::vec_cast(), called forge().","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"column-existence","dir":"Articles","previous_headings":"Validation","what":"Column existence","title":"Forging data for predictions","text":"easiest example demonstrate missing columns testing data. forge() won’t let continue required predictors used training also present new data.","code":"test_missing_column <- subset(penguin_test, select = -species) forge(test_missing_column, formula_eng) #> Error in `validate_column_names()`: #> ! The following required columns are missing: 'species'."},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"column-types","dir":"Articles","previous_headings":"Validation","what":"Column types","title":"Forging data for predictions","text":"initial scan column names done, deeper scan column performed, checking type column. instance, happens new species column double, factor? error thrown, indicating double can’t cast factor.","code":"test_species_double <- penguin_test test_species_double$species <- as.double(test_species_double$species) forge(test_species_double, formula_eng) #> Error in `scream()`: #> ! Can't convert `data$species` to match type of `species` >."},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"lossless-conversion","dir":"Articles","previous_headings":"Validation","what":"Lossless conversion","title":"Forging data for predictions","text":"error message suggests cases can automatically cast one type another, fact true! Rather double, species just character? Interesting, case can actually convert factor, class even levels restored. key lossless conversion. lost information converting character species factor unique character values subset original levels. example conversion lossy character species column value level training data. case: lossy warning thrown species column still converted factor right levels novel level removed value set NA","code":"test_species_character <- penguin_test test_species_character$species <- as.character(test_species_character$species) forged_char <- forge(test_species_character, formula_eng) forged_char$predictors #> # A tibble: 33 × 2 #> bill_length_mm species #> #> 1 47.5 Chinstrap #> 2 47.6 Chinstrap #> 3 52 Chinstrap #> 4 46.9 Chinstrap #> 5 53.5 Chinstrap #> 6 49 Chinstrap #> 7 46.2 Chinstrap #> 8 50.9 Chinstrap #> 9 45.5 Chinstrap #> 10 50.9 Chinstrap #> # ℹ 23 more rows class(forged_char$predictors$species) #> [1] \"factor\" levels(forged_char$predictors$species) #> [1] \"Adelie\" \"Chinstrap\" \"Gentoo\" test_species_lossy <- penguin_test test_species_lossy$species <- as.character(test_species_lossy$species) test_species_lossy$species[2] <- \"im new!\" forged_lossy <- forge(test_species_lossy, formula_eng) #> Warning: Novel levels found in column 'species': 'im new!'. The levels #> have been removed, and values have been coerced to 'NA'. forged_lossy$predictors #> # A tibble: 33 × 2 #> bill_length_mm species #> #> 1 47.5 Chinstrap #> 2 47.6 NA #> 3 52 Chinstrap #> 4 46.9 Chinstrap #> 5 53.5 Chinstrap #> 6 49 Chinstrap #> 7 46.2 Chinstrap #> 8 50.9 Chinstrap #> 9 45.5 Chinstrap #> 10 50.9 Chinstrap #> # ℹ 23 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"recipes-and-forge","dir":"Articles","previous_headings":"","what":"Recipes and forge()","title":"Forging data for predictions","text":"Just like formula method, recipe can used preprocessor fit prediction time. hardhat handles calling prep(), juice(), bake() right times. instance, say recipe just creates dummy variables species. blueprint recipe blueprint. forge(), can request outcomes predictors outcomes separated like formula method.","code":"library(recipes) rec <- recipe(bill_length_mm ~ body_mass_g + species, penguin_train) %>% step_dummy(species) penguin_recipe <- mold(rec, penguin_train) penguin_recipe$predictors #> # A tibble: 300 × 3 #> body_mass_g species_Chinstrap species_Gentoo #> #> 1 3750 0 0 #> 2 3800 0 0 #> 3 3250 0 0 #> 4 3450 0 0 #> 5 3650 0 0 #> 6 3625 0 0 #> 7 4675 0 0 #> 8 3200 0 0 #> 9 3800 0 0 #> 10 4400 0 0 #> # ℹ 290 more rows recipe_eng <- penguin_recipe$blueprint recipe_eng #> Recipe blueprint: #> #> # Predictors: 2 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble forge(penguin_test, recipe_eng, outcomes = TRUE) #> $predictors #> # A tibble: 33 × 3 #> body_mass_g species_Chinstrap species_Gentoo #> #> 1 3900 1 0 #> 2 3850 1 0 #> 3 4800 1 0 #> 4 2700 1 0 #> 5 4500 1 0 #> 6 3950 1 0 #> 7 3650 1 0 #> 8 3550 1 0 #> 9 3500 1 0 #> 10 3675 1 0 #> # ℹ 23 more rows #> #> $outcomes #> # A tibble: 33 × 1 #> bill_length_mm #> #> 1 47.5 #> 2 47.6 #> 3 52 #> 4 46.9 #> 5 53.5 #> 6 49 #> 7 46.2 #> 8 50.9 #> 9 45.5 #> 10 50.9 #> # ℹ 23 more rows #> #> $extras #> $extras$roles #> NULL"},{"path":"https://hardhat.tidymodels.org/dev/articles/forge.html","id":"a-note-on-recipes","dir":"Articles","previous_headings":"Recipes and forge()","what":"A note on recipes","title":"Forging data for predictions","text":"One complication recipes , bake() step, processing happens predictors outcomes together. means might run situation outcomes seem required forge(), even aren’t requesting . new_data doesn’t outcome, baking recipe fail even don’t request outcomes returned forge(). way around use built-recipe argument, skip, step containing outcome. skips processing step bake() time. tradeoff need aware . just interested generating predictions completely new data, can safely use skip = TRUE almost never access corresponding true outcomes preprocess compare . know need resampling, likely access outcomes resampling step can cross-validate performance. case, can’t set skip = TRUE outcomes won’t processed, since access , shouldn’t need . example, used penguin_test recipe (outcome), bill_length_mm wouldn’t get centered forge() called. probably skipped step knew test data outcome.","code":"rec2 <- recipe(bill_length_mm ~ body_mass_g + species, penguin_train) %>% step_dummy(species) %>% step_center(bill_length_mm) # Here we modify the outcome penguin_recipe2 <- mold(rec2, penguin_train) recipe_eng_log_outcome <- penguin_recipe2$blueprint penguin_test_no_outcome <- subset(penguin_test, select = -bill_length_mm) forge(penguin_test_no_outcome, recipe_eng_log_outcome) #> Error in `step_center()`: #> ! The following required column is missing from `new_data` in step #> 'center_UUEdL': bill_length_mm. rec3 <- recipe(bill_length_mm ~ body_mass_g + species, penguin_train) %>% step_dummy(species) %>% step_center(bill_length_mm, skip = TRUE) penguin_recipe3 <- mold(rec3, penguin_train) recipe_eng_skip_outcome <- penguin_recipe3$blueprint forge(penguin_test_no_outcome, recipe_eng_skip_outcome) #> $predictors #> # A tibble: 33 × 3 #> body_mass_g species_Chinstrap species_Gentoo #> #> 1 3900 1 0 #> 2 3850 1 0 #> 3 4800 1 0 #> 4 2700 1 0 #> 5 4500 1 0 #> 6 3950 1 0 #> 7 3650 1 0 #> 8 3550 1 0 #> 9 3500 1 0 #> 10 3675 1 0 #> # ℹ 23 more rows #> #> $outcomes #> NULL #> #> $extras #> $extras$roles #> NULL forge(penguin_test, recipe_eng_skip_outcome, outcomes = TRUE)$outcomes #> # A tibble: 33 × 1 #> bill_length_mm #> #> 1 47.5 #> 2 47.6 #> 3 52 #> 4 46.9 #> 5 53.5 #> 6 49 #> 7 46.2 #> 8 50.9 #> 9 45.5 #> 10 50.9 #> # ℹ 23 more rows # Notice that the `outcome` values haven't been centered # and are the same as before head(penguin_test$bill_length_mm) #> [1] 47.5 47.6 52.0 46.9 53.5 49.0"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Molding data for modeling","text":"modeling functions, data must accepted user format outcomes predictors specified. next step often validate preprocess input way prepare actual modeling implementation function. example, formula method used, R provides infrastructure preprocessing user input model.frame() model.matrix() functions. formula method way specify modeling terms. also XY method, x y supplied directly, , recently, recipe implementation can used preprocess data using set sequential steps. developer, likely won’t want care details methods work, (hopefully) still want provide three interfaces shiny new model. mold() makes easy , takes care details preprocessing user input methods. intended use mold() called user facing modeling function. see action, look vignette found : vignette(\"package\", \"hardhat\"). rest vignette focused various different ways use mold(), keep mind generally used interactive function like .","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"a-first-example","dir":"Articles","previous_headings":"","what":"A First Example","title":"Molding data for modeling","text":"familiar interface R users likely formula interface. case, terms specified using formula notation: outcomes ~ predictors. Generally, developer, call model.frame() model.matrix() result coerce right format ingestion model. mold() handles . mold() returns four things. Two immediately useful, almost always applicable modeling implementation created. first predictors, returned tibble. required processing done , just focus modeling implementation. Second outcomes, also returned tibble. used , processing outcome specified formula also done . Beyond two elements, mold() also returns slot extras might generated preprocessing, aren’t specifically predictors outcomes. example, offset() can specified directly formula, isn’t technically predictor. Lastly, mold() returns important object, blueprint. responsible knowing preprocess training data, new data prediction time. developer, attach blueprint model object returning user. information , see package development vignette, vignette(\"package\", \"hardhat\").","code":"penguin_form <- mold(body_mass_g ~ log(bill_length_mm), penguins) names(penguin_form) #> [1] \"predictors\" \"outcomes\" \"blueprint\" \"extras\" penguin_form$predictors #> # A tibble: 333 × 1 #> `log(bill_length_mm)` #> #> 1 3.67 #> 2 3.68 #> 3 3.70 #> 4 3.60 #> 5 3.67 #> 6 3.66 #> 7 3.67 #> 8 3.72 #> 9 3.65 #> 10 3.54 #> # ℹ 323 more rows penguin_form$outcomes #> # A tibble: 333 × 1 #> body_mass_g #> #> 1 3750 #> 2 3800 #> 3 3250 #> 4 3450 #> 5 3650 #> 6 3625 #> 7 4675 #> 8 3200 #> 9 3800 #> 10 4400 #> # ℹ 323 more rows mold(body_mass_g ~ log(bill_length_mm) + offset(bill_depth_mm), penguins)$extras #> $offset #> # A tibble: 333 × 1 #> .offset #> #> 1 18.7 #> 2 17.4 #> 3 18 #> 4 19.3 #> 5 20.6 #> 6 17.8 #> 7 19.6 #> 8 17.6 #> 9 21.2 #> 10 21.1 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"blueprints","dir":"Articles","previous_headings":"","what":"blueprints","title":"Molding data for modeling","text":"mentioned , one objects mold() returns blueprint responsible controlling preprocessing. multiple blueprints available hardhat, call mold() one selected automatically . following two calls generate result, using default formula blueprint. blueprint can tweaked change processing interface occurs, options vary per blueprint. understand ’d ever want , read !","code":"identical( mold(~ body_mass_g, penguins), mold(~ body_mass_g, penguins, blueprint = default_formula_blueprint()) ) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"formulas","dir":"Articles","previous_headings":"","what":"Formulas","title":"Molding data for modeling","text":"Now basic idea mold() works, can talk interesting functionality.","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"intercepts","dir":"Articles","previous_headings":"Formulas","what":"Intercepts","title":"Molding data for modeling","text":"One challenge standard formula interface , default, intercepts always implicitly present added data set automatically. works great simple regression case. However, models might either always require never allow intercept, still use formula interface convenience (example, earth). led many ad hoc solutions prevent user removing adding intercept. get around , mold() never add intercept default. Instead, addition intercept completely controlled formula blueprint argument, intercept. error thrown intercept removal term specified:","code":"no_intercept <- mold(~ body_mass_g, penguins) no_intercept$predictors #> # A tibble: 333 × 1 #> body_mass_g #> #> 1 3750 #> 2 3800 #> 3 3250 #> 4 3450 #> 5 3650 #> 6 3625 #> 7 4675 #> 8 3200 #> 9 3800 #> 10 4400 #> # ℹ 323 more rows with_intercept <- mold( ~ body_mass_g, penguins, blueprint = default_formula_blueprint(intercept = TRUE) ) with_intercept$predictors #> # A tibble: 333 × 2 #> `(Intercept)` body_mass_g #> #> 1 1 3750 #> 2 1 3800 #> 3 1 3250 #> 4 1 3450 #> 5 1 3650 #> 6 1 3625 #> 7 1 4675 #> 8 1 3200 #> 9 1 3800 #> 10 1 4400 #> # ℹ 323 more rows mold(~ body_mass_g - 1, penguins) #> Error in `mold_formula_default_clean()`: #> ! `formula` must not contain the intercept removal term: `- 1`. mold(~ body_mass_g + 0, penguins) #> Error in `mold_formula_default_clean()`: #> ! `formula` must not contain the intercept removal term: `+ 0` or #> `0 +`."},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"dummy-variables","dir":"Articles","previous_headings":"Formulas","what":"Dummy variables","title":"Molding data for modeling","text":"One nice things formula interface expands factors dummy variable columns . Like intercepts, great…isn’t. example, ranger fits random forest, can take factors directly, still uses formula notation. case, great factor columns specified predictors weren’t expanded. job blueprint argument, indicators. Note: ’s worth mentioning intercept present, base R expands first factor completely K indicator columns corresponding K levels present factor (also known one-hot encoding). Subsequent columns expanded traditional K - 1 columns. intercept present, K - 1 columns generated factor predictors.","code":"expanded_dummies <- mold(~ body_mass_g + species, penguins) expanded_dummies$predictors #> # A tibble: 333 × 4 #> body_mass_g speciesAdelie speciesChinstrap speciesGentoo #> #> 1 3750 1 0 0 #> 2 3800 1 0 0 #> 3 3250 1 0 0 #> 4 3450 1 0 0 #> 5 3650 1 0 0 #> 6 3625 1 0 0 #> 7 4675 1 0 0 #> 8 3200 1 0 0 #> 9 3800 1 0 0 #> 10 4400 1 0 0 #> # ℹ 323 more rows non_expanded_dummies <- mold( ~ body_mass_g + species, penguins, blueprint = default_formula_blueprint(indicators = \"none\") ) non_expanded_dummies$predictors #> # A tibble: 333 × 2 #> body_mass_g species #> #> 1 3750 Adelie #> 2 3800 Adelie #> 3 3250 Adelie #> 4 3450 Adelie #> 5 3650 Adelie #> 6 3625 Adelie #> 7 4675 Adelie #> 8 3200 Adelie #> 9 3800 Adelie #> 10 4400 Adelie #> # ℹ 323 more rows k_cols <- mold(~ species, penguins) k_minus_one_cols <- mold( ~ species, penguins, blueprint = default_formula_blueprint(intercept = TRUE) ) colnames(k_cols$predictors) #> [1] \"speciesAdelie\" \"speciesChinstrap\" \"speciesGentoo\" colnames(k_minus_one_cols$predictors) #> [1] \"(Intercept)\" \"speciesChinstrap\" \"speciesGentoo\""},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"multivariate-outcomes","dir":"Articles","previous_headings":"Formulas","what":"Multivariate outcomes","title":"Molding data for modeling","text":"One frustrating things working formula method multivariate outcomes bit clunky specify. might look like 3 columns, actually 2, first column named cbind(body_mass_g, bill_length_mm), actually matrix 2 columns, body_mass_g bill_length_mm inside . default formula blueprint used mold() allows specify multiple outcomes like specify multiple predictors. can even inline transformations outcome, although much , ’d advise using recipe instead. outcomes holds two outcomes columns.","code":".f <- cbind(body_mass_g, bill_length_mm) ~ bill_depth_mm frame <- model.frame(.f, penguins) head(frame) #> cbind(body_mass_g, bill_length_mm).body_mass_g #> 1 3750.0 #> 2 3800.0 #> 3 3250.0 #> 4 3450.0 #> 5 3650.0 #> 6 3625.0 #> cbind(body_mass_g, bill_length_mm).bill_length_mm bill_depth_mm #> 1 39.1 18.7 #> 2 39.5 17.4 #> 3 40.3 18.0 #> 4 36.7 19.3 #> 5 39.3 20.6 #> 6 38.9 17.8 ncol(frame) #> [1] 2 class(frame$`cbind(body_mass_g, bill_length_mm)`) #> [1] \"matrix\" \"array\" head(frame$`cbind(body_mass_g, bill_length_mm)`) #> body_mass_g bill_length_mm #> [1,] 3750 39.1 #> [2,] 3800 39.5 #> [3,] 3250 40.3 #> [4,] 3450 36.7 #> [5,] 3650 39.3 #> [6,] 3625 38.9 multivariate <- mold(body_mass_g + log(bill_length_mm) ~ bill_depth_mm, penguins) multivariate$outcomes #> # A tibble: 333 × 2 #> body_mass_g `log(bill_length_mm)` #> #> 1 3750 3.67 #> 2 3800 3.68 #> 3 3250 3.70 #> 4 3450 3.60 #> 5 3650 3.67 #> 6 3625 3.66 #> 7 4675 3.67 #> 8 3200 3.72 #> 9 3800 3.65 #> 10 4400 3.54 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"xy","dir":"Articles","previous_headings":"","what":"XY","title":"Molding data for modeling","text":"second interface XY interface, useful predictors outcomes specified separately. interface doesn’t much way preprocessing, let specify intercept blueprint specific arguments. Rather default_formula_blueprint(), uses default_xy_blueprint().","code":"x <- subset(penguins, select = -body_mass_g) y <- subset(penguins, select = body_mass_g) penguin_xy <- mold(x, y) penguin_xy$predictors #> # A tibble: 333 × 6 #> species island bill_length_mm bill_depth_mm flipper_length_mm sex #> #> 1 Adelie Torgersen 39.1 18.7 181 male #> 2 Adelie Torgersen 39.5 17.4 186 female #> 3 Adelie Torgersen 40.3 18 195 female #> 4 Adelie Torgersen 36.7 19.3 193 female #> 5 Adelie Torgersen 39.3 20.6 190 male #> 6 Adelie Torgersen 38.9 17.8 181 female #> 7 Adelie Torgersen 39.2 19.6 195 male #> 8 Adelie Torgersen 41.1 17.6 182 female #> 9 Adelie Torgersen 38.6 21.2 191 male #> 10 Adelie Torgersen 34.6 21.1 198 male #> # ℹ 323 more rows penguin_xy$outcomes #> # A tibble: 333 × 1 #> body_mass_g #> #> 1 3750 #> 2 3800 #> 3 3250 #> 4 3450 #> 5 3650 #> 6 3625 #> 7 4675 #> 8 3200 #> 9 3800 #> 10 4400 #> # ℹ 323 more rows xy_with_intercept <- mold(x, y, blueprint = default_xy_blueprint(intercept = TRUE)) xy_with_intercept$predictors #> # A tibble: 333 × 7 #> `(Intercept)` species island bill_length_mm bill_depth_mm #> #> 1 1 Adelie Torgersen 39.1 18.7 #> 2 1 Adelie Torgersen 39.5 17.4 #> 3 1 Adelie Torgersen 40.3 18 #> 4 1 Adelie Torgersen 36.7 19.3 #> 5 1 Adelie Torgersen 39.3 20.6 #> 6 1 Adelie Torgersen 38.9 17.8 #> 7 1 Adelie Torgersen 39.2 19.6 #> 8 1 Adelie Torgersen 41.1 17.6 #> 9 1 Adelie Torgersen 38.6 21.2 #> 10 1 Adelie Torgersen 34.6 21.1 #> # ℹ 323 more rows #> # ℹ 2 more variables: flipper_length_mm , sex "},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"vector-outcomes","dir":"Articles","previous_headings":"XY","what":"Vector outcomes","title":"Molding data for modeling","text":"y bit special XY interface, univariate case users might expect able pass vector, 1 column data frame, matrix. mold() prepared cases, vector case requires special attention. consistent mold() interfaces, outcomes slot return value tibble. achieve y supplied vector, default column name created, \".outcome\".","code":"mold(x, y$body_mass_g)$outcomes #> # A tibble: 333 × 1 #> .outcome #> #> 1 3750 #> 2 3800 #> 3 3250 #> 4 3450 #> 5 3650 #> 6 3625 #> 7 4675 #> 8 3200 #> 9 3800 #> 10 4400 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/mold.html","id":"recipe","dir":"Articles","previous_headings":"","what":"Recipe","title":"Molding data for modeling","text":"last three interfaces relatively new recipes interface. default_recipe_blueprint() knows prep() recipe, juice() extract predictors outcomes. far flexible way preprocess data. special thing can tweak recipe blueprint whether intercept added.","code":"library(recipes) rec <- recipe(bill_length_mm ~ species + bill_depth_mm, penguins) %>% step_log(bill_length_mm) %>% step_dummy(species) penguin_recipe <- mold(rec, penguins) penguin_recipe$predictors #> # A tibble: 333 × 3 #> bill_depth_mm species_Chinstrap species_Gentoo #> #> 1 18.7 0 0 #> 2 17.4 0 0 #> 3 18 0 0 #> 4 19.3 0 0 #> 5 20.6 0 0 #> 6 17.8 0 0 #> 7 19.6 0 0 #> 8 17.6 0 0 #> 9 21.2 0 0 #> 10 21.1 0 0 #> # ℹ 323 more rows penguin_recipe$outcomes #> # A tibble: 333 × 1 #> bill_length_mm #> #> 1 3.67 #> 2 3.68 #> 3 3.70 #> 4 3.60 #> 5 3.67 #> 6 3.66 #> 7 3.67 #> 8 3.72 #> 9 3.65 #> 10 3.54 #> # ℹ 323 more rows recipe_with_intercept <- mold( rec, penguins, blueprint = default_recipe_blueprint(intercept = TRUE) ) recipe_with_intercept$predictors #> # A tibble: 333 × 4 #> `(Intercept)` bill_depth_mm species_Chinstrap species_Gentoo #> #> 1 1 18.7 0 0 #> 2 1 17.4 0 0 #> 3 1 18 0 0 #> 4 1 19.3 0 0 #> 5 1 20.6 0 0 #> 6 1 17.8 0 0 #> 7 1 19.6 0 0 #> 8 1 17.6 0 0 #> 9 1 21.2 0 0 #> 10 1 21.1 0 0 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Creating Modeling Packages With hardhat","text":"goal vignette teach use mold() forge() modeling package. intended use functions, even though can also called interactively. Creating new modeling package two main stages: creating model fitting function, implementing predict method. stages break like : Stage 1 - Model Fitting Create model constructor. Create fitting implementation function. Create common bridge go high level user facing methods lower level constructor implementation function. Create user facing function methods data frame, matrix, formula, recipe inputs. imagine comes together internal pieces power user facing methods model. Stage 2 - Model Prediction Create one prediction implementation functions, varying \"type\" prediction make. Create common bridge high level predict method lower level prediction implementation functions. Create user facing predict method. case, 2 user facing methods. Many models multiple internal implementation functions ’ll switch , depending \"type\". end result single high level modeling function methods multiple different “interfaces”, corresponding predict method make predictions using one models along new data (“interfaces”, just mean different types inputs, : data frame, matrix, formula recipe). obviously things might want modeling package . instance, might implement plot() summary() method. two stages described necessary almost every model, involve inputs outputs hardhat helps .","code":""},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"whats-our-model","dir":"Articles","previous_headings":"Introduction","what":"What’s Our Model?","title":"Creating Modeling Packages With hardhat","text":"use underlying lm() infrastructure, lm.fit(), create model. Linear regression recognizable many, can focus understanding mold() forge() fit bigger picture, rather trying understand model works. lm.fit() takes x y directly, rather using formula method. serve core part modeling implementation function. generally, easiest core implementation function algorithm takes x y manner, since mold() standardize inputs. call model simple_lm(). won’t features normal linear regression (weights, offsets, etc), serve nice dummy model show features get hardhat.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"model-constructor","dir":"Articles","previous_headings":"Model Fitting","what":"Model Constructor","title":"Creating Modeling Packages With hardhat","text":"first thing need modeling constructor. Constructors simple functions creates new objects model class. arguments constructor supply individual pieces, wraps model object. hardhat function new_model() can help creating . model constructor : name new_(). Take required model elements named arguments, including required blueprint. Validate types new elements. Pass named elements new_model() along setting class \"\". want learn details constructors creating S3 classes, take look S3 section Advanced R. \"simple_lm\" object just enough information make numeric predictions new data, can store things well enable model object work extra post-fitting functionality. can test manually generating model object. Along custom class provided, object also \"hardhat_model\" class. simple print method objects type. Specifically, prints name class top, prints custom elements (.e. blueprint).","code":"new_simple_lm <- function(coefs, coef_names, blueprint) { if (!is.numeric(coefs)) { stop(\"`coefs` should be a numeric vector.\", call. = FALSE) } if (!is.character(coef_names)) { stop(\"`coef_names` should be a character vector.\", call. = FALSE) } if (length(coefs) != length(coef_names)) { stop(\"`coefs` and `coef_names` must have the same length.\") } new_model( coefs = coefs, coef_names = coef_names, blueprint = blueprint, class = \"simple_lm\" ) } manual_model <- new_simple_lm(1, \"my_coef\", default_xy_blueprint()) manual_model #> #> $coefs #> [1] 1 #> #> $coef_names #> [1] \"my_coef\" names(manual_model) #> [1] \"coefs\" \"coef_names\" \"blueprint\" manual_model$blueprint #> XY blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"model-fitting-implementation","dir":"Articles","previous_headings":"Model Fitting","what":"Model Fitting Implementation","title":"Creating Modeling Packages With hardhat","text":"implementation function hard work done. generally recommend naming _impl(). accept predictors outcomes whatever form required algorithm, run algorithm, return named list new elements added model constructor. might also arguments extra options can used tweak internal algorithm. simple linear regression implementation just calls lm.fit() x = predictors y = outcomes. lm.fit() expects matrix predictors vector outcomes (least univariate regression). moment discuss create .","code":"simple_lm_impl <- function(predictors, outcomes) { lm_fit <- lm.fit(predictors, outcomes) coefs <- lm_fit$coefficients coef_names <- names(coefs) coefs <- unname(coefs) list( coefs = coefs, coef_names = coef_names ) } predictors <- as.matrix(subset(penguins, select = bill_length_mm)) outcomes <- penguins$body_mass_g simple_lm_impl(predictors, outcomes) #> $coefs #> [1] 95.49649 #> #> $coef_names #> [1] \"bill_length_mm\""},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"model-fitting-bridge","dir":"Articles","previous_headings":"Model Fitting","what":"Model Fitting Bridge","title":"Creating Modeling Packages With hardhat","text":"Now constructor implementation function, can create common function used top level methods (data frames, matrices, formulas, recipes). call implementation function, use information along blueprint create new instance model. argument output call mold(), ’ve called processed. object (minimum) predictors, outcomes, blueprint. might also arguments additional options pass implementation function. bridge function take standardized predictors outcomes convert lower level types implementation function requires. predictors outcomes returned mold() always data frames, case can convert matrices vectors directly use lower level function. also good place use hardhat’s validation functions. case, always expect outcome single column since univariate model, can use validate_outcomes_is_univariate() enforce . point, can simulate user input pass bridge run model. Multiple outcomes error:","code":"simple_lm_bridge <- function(processed) { validate_outcomes_are_univariate(processed$outcomes) predictors <- as.matrix(processed$predictors) outcomes <- processed$outcomes[[1]] fit <- simple_lm_impl(predictors, outcomes) new_simple_lm( coefs = fit$coefs, coef_names = fit$coef_names, blueprint = processed$blueprint ) } # Simulate formula interface processed_1 <- mold(bill_length_mm ~ body_mass_g + species, penguins) # Simulate xy interface processed_2 <- mold(x = penguins[\"body_mass_g\"], y = penguins$bill_length_mm) simple_lm_bridge(processed_1) #> #> $coefs #> [1] 0.003754612 24.908763524 34.817525835 28.447942512 #> #> $coef_names #> [1] \"body_mass_g\" \"speciesAdelie\" \"speciesChinstrap\" #> [4] \"speciesGentoo\" simple_lm_bridge(processed_2) #> #> $coefs #> [1] 0.01022951 #> #> $coef_names #> [1] \"body_mass_g\" multi_outcome <- mold(bill_length_mm + bill_depth_mm ~ body_mass_g + species, penguins) simple_lm_bridge(multi_outcome) #> Error in `validate_outcomes_are_univariate()`: #> ! The outcome must be univariate, but 2 columns were found."},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"user-facing-fitting-function","dir":"Articles","previous_headings":"Model Fitting","what":"User Facing Fitting Function","title":"Creating Modeling Packages With hardhat","text":"pieces place, everything need create high level modeling interface. generic function, generally methods data frames, matrices, formulas, recipes. method call mold() method specific inputs run preprocessing, pass bridge function run actual model. also good practice provide default method nice error message unknown types. Let’s give try: can use preprocessing well, handled mold().","code":"# Generic simple_lm <- function(x, ...) { UseMethod(\"simple_lm\") } # Default simple_lm.default <- function(x, ...) { stop( \"`simple_lm()` is not defined for a '\", class(x)[1], \"'.\", call. = FALSE ) } # XY method - data frame simple_lm.data.frame <- function(x, y, ...) { processed <- mold(x, y) simple_lm_bridge(processed) } # XY method - matrix simple_lm.matrix <- function(x, y, ...) { processed <- mold(x, y) simple_lm_bridge(processed) } # Formula method simple_lm.formula <- function(formula, data, ...) { processed <- mold(formula, data) simple_lm_bridge(processed) } # Recipe method simple_lm.recipe <- function(x, data, ...) { processed <- mold(x, data) simple_lm_bridge(processed) } predictors <- penguins[c(\"bill_length_mm\", \"bill_depth_mm\")] outcomes_vec <- penguins$body_mass_g outcomes_df <- penguins[\"body_mass_g\"] # Vector outcome simple_lm(predictors, outcomes_vec) #> #> $coefs #> [1] 110.88151 -40.16918 #> #> $coef_names #> [1] \"bill_length_mm\" \"bill_depth_mm\" # 1 column data frame outcome simple_lm(predictors, outcomes_df) #> #> $coefs #> [1] 110.88151 -40.16918 #> #> $coef_names #> [1] \"bill_length_mm\" \"bill_depth_mm\" # Formula interface simple_lm(body_mass_g ~ bill_length_mm + bill_depth_mm, penguins) #> #> $coefs #> [1] 110.88151 -40.16918 #> #> $coef_names #> [1] \"bill_length_mm\" \"bill_depth_mm\" library(recipes) # - Log a predictor # - Generate dummy variables for factors simple_lm(body_mass_g ~ log(bill_length_mm) + species, penguins) #> #> $coefs #> [1] 3985.047 -10865.973 -11753.182 -10290.188 #> #> $coef_names #> [1] \"log(bill_length_mm)\" \"speciesAdelie\" \"speciesChinstrap\" #> [4] \"speciesGentoo\" # Same, but with a recipe rec <- recipe(body_mass_g ~ bill_length_mm + species, penguins) %>% step_log(bill_length_mm) %>% step_dummy(species, one_hot = TRUE) simple_lm(rec, penguins) #> #> $coefs #> [1] 3985.047 -10865.973 -11753.182 -10290.188 #> #> $coef_names #> [1] \"bill_length_mm\" \"species_Adelie\" \"species_Chinstrap\" #> [4] \"species_Gentoo\""},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"adding-an-intercept-option","dir":"Articles","previous_headings":"Model Fitting > User Facing Fitting Function","what":"Adding an Intercept Option","title":"Creating Modeling Packages With hardhat","text":"might noticed linear regression isn’t adding intercept. Generally, linear regression models want default intercept added . accomplish , can add intercept argument user facing function, use tweak blueprint otherwise created automatically. Note even formula method respects intercept argument. recap, default mold() automatically add intercept method, including formula method.","code":"simple_lm <- function(x, ...) { UseMethod(\"simple_lm\") } simple_lm.data.frame <- function(x, y, intercept = TRUE, ...) { blueprint <- default_xy_blueprint(intercept = intercept) processed <- mold(x, y, blueprint = blueprint) simple_lm_bridge(processed) } simple_lm.matrix <- function(x, y, intercept = TRUE,...) { blueprint <- default_xy_blueprint(intercept = intercept) processed <- mold(x, y, blueprint = blueprint) simple_lm_bridge(processed) } simple_lm.formula <- function(formula, data, intercept = TRUE, ...) { blueprint <- default_formula_blueprint(intercept = intercept) processed <- mold(formula, data, blueprint = blueprint) simple_lm_bridge(processed) } simple_lm.recipe <- function(x, data, intercept = TRUE, ...) { blueprint <- default_recipe_blueprint(intercept = intercept) processed <- mold(x, data, blueprint = blueprint) simple_lm_bridge(processed) } # By default an intercept is included simple_lm(predictors, outcomes_df) #> #> $coefs #> [1] 3413.45185 74.81263 -145.50718 #> #> $coef_names #> [1] \"(Intercept)\" \"bill_length_mm\" \"bill_depth_mm\" # But the user can turn this off simple_lm(body_mass_g ~ log(bill_length_mm) + species, penguins, intercept = FALSE) #> #> $coefs #> [1] 3985.047 -10865.973 -11753.182 -10290.188 #> #> $coef_names #> [1] \"log(bill_length_mm)\" \"speciesAdelie\" \"speciesChinstrap\" #> [4] \"speciesGentoo\""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"prediction-implementation","dir":"Articles","previous_headings":"Model Prediction","what":"Prediction Implementation","title":"Creating Modeling Packages With hardhat","text":"prediction side, need implementation functions like fitting model. vary based \"type\" argument predict(). \"type\" might \"numeric\" numeric predictions use , \"class\" hard class predictions, \"prob\" class probabilities, . set recommended names \"type\" can found Model Predictions section implementation principles. model, focus returning numeric predictions. generally like name prediction implementation functions predict__(). arguments implementation functions include model object predictors form prediction algorithm expects (, matrix). Also used another hardhat function standardizing prediction output, spruce_numeric(). function tidies numeric response output, automatically standardizes match recommendations principles guide. output always tibble, \"numeric\" type 1 column, .pred. test , run model call forge() output manually. higher level user facing function automatically.","code":"predict_simple_lm_numeric <- function(object, predictors) { coefs <- object$coefs pred <- as.vector(predictors %*% coefs) out <- spruce_numeric(pred) out } model <- simple_lm(bill_length_mm ~ body_mass_g + species, penguins) predictors <- forge(penguins, model$blueprint)$predictors predictors <- as.matrix(predictors) predict_simple_lm_numeric(model, predictors) #> # A tibble: 333 × 1 #> .pred #> #> 1 39.0 #> 2 39.2 #> 3 37.1 #> 4 37.9 #> 5 38.6 #> 6 38.5 #> 7 42.5 #> 8 36.9 #> 9 39.2 #> 10 41.4 #> # ℹ 323 more rows"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"prediction-bridge","dir":"Articles","previous_headings":"Model Prediction","what":"Prediction Bridge","title":"Creating Modeling Packages With hardhat","text":"prediction bridge converts standardized predictors lower level type prediction implementation functions expect. predictors always data frame, part return value call forge(). Since prediction implementation function takes matrix, convert . Additionally, switch() type argument decide prediction implementation functions call. , type == \"numeric\", predict_simple_lm_numeric() called. also like using rlang::arg_match() validate type one accepted prediction types. advantage match.arg() partial matches allowed, error messages bit nicer. Let’s test:","code":"predict_simple_lm_bridge <- function(type, object, predictors) { type <- rlang::arg_match(type, \"numeric\") predictors <- as.matrix(predictors) switch( type, numeric = predict_simple_lm_numeric(object, predictors) ) } model <- simple_lm(bill_length_mm ~ body_mass_g + species, penguins) # Pass in the data frame predictors <- forge(penguins, model$blueprint)$predictors predict_simple_lm_bridge(\"numeric\", model, predictors) #> # A tibble: 333 × 1 #> .pred #> #> 1 39.0 #> 2 39.2 #> 3 37.1 #> 4 37.9 #> 5 38.6 #> 6 38.5 #> 7 42.5 #> 8 36.9 #> 9 39.2 #> 10 41.4 #> # ℹ 323 more rows # Partial matches are an error predict_simple_lm_bridge(\"numer\", model, predictors) #> Error in `predict_simple_lm_bridge()`: #> ! `type` must be one of \"numeric\", not \"numer\". #> ℹ Did you mean \"numeric\"?"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"user-facing-prediction-function","dir":"Articles","previous_headings":"Model Prediction","what":"User Facing Prediction Function","title":"Creating Modeling Packages With hardhat","text":"Finally, can create S3 method generic predict() function. match modeling principles, use new_data accept matrix data frame new predictors. first thing predict() method call forge() new_data blueprint attached simple_lm model fit time. performs required preprocessing new data, checks type data frame supplied new_data matches type data frame supplied fit time. one valuable features forge(), adds large amount robustness predict()ion function. see examples end vignette. calling forge(), pass bridge function call correct prediction function based type. Finally, good practice call hardhat function, validate_prediction_size(), return value original new_data ensure number rows output number rows input. prediction made row new_data, NA value placed instead. Mainly, validation function check model developer ensure always return output sane length.","code":"predict.simple_lm <- function(object, new_data, type = \"numeric\", ...) { # Enforces column order, type, column names, etc processed <- forge(new_data, object$blueprint) out <- predict_simple_lm_bridge(type, object, processed$predictors) validate_prediction_size(out, new_data) out }"},{"path":"https://hardhat.tidymodels.org/dev/articles/package.html","id":"final-testing","dir":"Articles","previous_headings":"","what":"Final Testing","title":"Creating Modeling Packages With hardhat","text":"Finally, can test top level modeling function along corresponding predict() method. using forge(), automatically get powerful type checking ensure new_data form expect.","code":"model <- simple_lm(bill_length_mm ~ log(body_mass_g) + species, penguins) predict(model, penguins) #> # A tibble: 333 × 1 #> .pred #> #> 1 39.1 #> 2 39.3 #> 3 36.9 #> 4 37.9 #> 5 38.7 #> 6 38.6 #> 7 42.5 #> 8 36.7 #> 9 39.3 #> 10 41.5 #> # ℹ 323 more rows # `new_data` isn't a data frame predict(model, penguins$species) #> Error in `forge()`: #> ! The class of `new_data`, 'factor', is not recognized. # Missing a required column predict(model, subset(penguins, select = -body_mass_g)) #> Error in `validate_column_names()`: #> ! The following required columns are missing: 'body_mass_g'. # In this case, 'species' is a character, # but can be losslessy converted to a factor. # That happens for you automatically and silently. penguins_chr_species <- transform(penguins, species = as.character(species)) predict(model, penguins_chr_species) #> # A tibble: 333 × 1 #> .pred #> #> 1 39.1 #> 2 39.3 #> 3 36.9 #> 4 37.9 #> 5 38.7 #> 6 38.6 #> 7 42.5 #> 8 36.7 #> 9 39.3 #> 10 41.5 #> # ℹ 323 more rows # Slightly different from above. Here, 'species' is a character, # AND has an extra unexpected factor level. It is # removed with a warning, but you still get a factor # with the correct levels penguins_chr_bad_species <- penguins_chr_species penguins_chr_bad_species$species[1] <- \"new_level\" predict(model, penguins_chr_bad_species) #> Warning: Novel levels found in column 'species': 'new_level'. The levels #> have been removed, and values have been coerced to 'NA'. #> # A tibble: 333 × 1 #> .pred #> #> 1 NA #> 2 39.3 #> 3 36.9 #> 4 37.9 #> 5 38.7 #> 6 38.6 #> 7 42.5 #> 8 36.7 #> 9 39.3 #> 10 41.5 #> # ℹ 323 more rows # This case throws an error. # Here, 'species' is a double and # when it should have been a factor. # You can't cast a double to a factor! penguins_dbl_species <- transform(penguins, species = 1) predict(model, penguins_dbl_species) #> Error in `scream()`: #> ! Can't convert `data$species` to match type of `species` >."},{"path":"https://hardhat.tidymodels.org/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Hannah Frick. Author, maintainer. Davis Vaughan. Author. Max Kuhn. Author. . Copyright holder, funder.","code":""},{"path":"https://hardhat.tidymodels.org/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Frick H, Vaughan D, Kuhn M (2024). hardhat: Construct Modeling Packages. R package version 1.4.0.9002, https://hardhat.tidymodels.org, https://github.com/tidymodels/hardhat.","code":"@Manual{, title = {hardhat: Construct Modeling Packages}, author = {Hannah Frick and Davis Vaughan and Max Kuhn}, year = {2024}, note = {R package version 1.4.0.9002, https://hardhat.tidymodels.org}, url = {https://github.com/tidymodels/hardhat}, }"},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/index.html","id":"introduction","dir":"","previous_headings":"","what":"Introduction","title":"Construct Modeling Packages","text":"hardhat developer focused package designed ease creation new modeling packages, simultaneously promoting good R modeling package standards laid set opinionated Conventions R Modeling Packages. hardhat four main goals: Easily, consistently, robustly preprocess data fit time prediction time mold() forge(). Provide one source truth common input validation functions, checking new data prediction time contains required columns used fit time. Provide extra utility functions additional common tasks, adding intercept columns, standardizing predict() output, extracting valuable class factor level information predictors. Reimagine base R preprocessing infrastructure stats::model.matrix() stats::model.frame() using stricter approaches found model_matrix() model_frame(). idea reduce burden creating good modeling interface much possible, instead let package developer focus writing core implementation new model. benefits developer, also user modeling package, standardization allows users build set “expectations” around modeling function return, interact .","code":""},{"path":"https://hardhat.tidymodels.org/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Construct Modeling Packages","text":"can install released version hardhat CRAN : development version GitHub :","code":"install.packages(\"hardhat\") # install.packages(\"pak\") pak::pak(\"tidymodels/hardhat\")"},{"path":"https://hardhat.tidymodels.org/dev/index.html","id":"learning-more","dir":"","previous_headings":"","what":"Learning more","title":"Construct Modeling Packages","text":"learn use hardhat, check vignettes: vignette(\"mold\", \"hardhat\"): Learn preprocess data fit time mold(). vignette(\"forge\", \"hardhat\"): Learn preprocess new data prediction time forge(). vignette(\"package\", \"hardhat\"): Learn use mold() forge() help creating new modeling package. can also watch Max Kuhn discuss use hardhat build new modeling package scratch XI Jornadas de Usuarios de R conference .","code":""},{"path":"https://hardhat.tidymodels.org/dev/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Construct Modeling Packages","text":"project released Contributor Code Conduct. contributing project, agree abide terms. questions discussions tidymodels packages, modeling, machine learning, please post RStudio Community. think encountered bug, please submit issue. Either way, learn create share reprex (minimal, reproducible example), clearly communicate code. Check details contributing guidelines tidymodels packages get help.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":null,"dir":"Reference","previous_headings":"","what":"Add an intercept column to data — add_intercept_column","title":"Add an intercept column to data — add_intercept_column","text":"function adds integer column 1's data.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add an intercept column to data — add_intercept_column","text":"","code":"add_intercept_column(data, name = \"(Intercept)\")"},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add an intercept column to data — add_intercept_column","text":"data data frame matrix. name name intercept column. Defaults \"(Intercept)\", name stats::lm() uses.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add an intercept column to data — add_intercept_column","text":"data intercept column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add an intercept column to data — add_intercept_column","text":"column named name already exists data, data returned unchanged warning issued.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/add_intercept_column.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add an intercept column to data — add_intercept_column","text":"","code":"add_intercept_column(mtcars) #> (Intercept) mpg cyl disp hp drat wt qsec vs am #> Mazda RX4 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 #> Mazda RX4 Wag 1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 #> Datsun 710 1 22.8 4 108.0 93 3.85 2.320 18.61 1 1 #> Hornet 4 Drive 1 21.4 6 258.0 110 3.08 3.215 19.44 1 0 #> Hornet Sportabout 1 18.7 8 360.0 175 3.15 3.440 17.02 0 0 #> Valiant 1 18.1 6 225.0 105 2.76 3.460 20.22 1 0 #> Duster 360 1 14.3 8 360.0 245 3.21 3.570 15.84 0 0 #> Merc 240D 1 24.4 4 146.7 62 3.69 3.190 20.00 1 0 #> Merc 230 1 22.8 4 140.8 95 3.92 3.150 22.90 1 0 #> Merc 280 1 19.2 6 167.6 123 3.92 3.440 18.30 1 0 #> Merc 280C 1 17.8 6 167.6 123 3.92 3.440 18.90 1 0 #> Merc 450SE 1 16.4 8 275.8 180 3.07 4.070 17.40 0 0 #> Merc 450SL 1 17.3 8 275.8 180 3.07 3.730 17.60 0 0 #> Merc 450SLC 1 15.2 8 275.8 180 3.07 3.780 18.00 0 0 #> Cadillac Fleetwood 1 10.4 8 472.0 205 2.93 5.250 17.98 0 0 #> Lincoln Continental 1 10.4 8 460.0 215 3.00 5.424 17.82 0 0 #> Chrysler Imperial 1 14.7 8 440.0 230 3.23 5.345 17.42 0 0 #> Fiat 128 1 32.4 4 78.7 66 4.08 2.200 19.47 1 1 #> Honda Civic 1 30.4 4 75.7 52 4.93 1.615 18.52 1 1 #> Toyota Corolla 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 #> Toyota Corona 1 21.5 4 120.1 97 3.70 2.465 20.01 1 0 #> Dodge Challenger 1 15.5 8 318.0 150 2.76 3.520 16.87 0 0 #> AMC Javelin 1 15.2 8 304.0 150 3.15 3.435 17.30 0 0 #> Camaro Z28 1 13.3 8 350.0 245 3.73 3.840 15.41 0 0 #> Pontiac Firebird 1 19.2 8 400.0 175 3.08 3.845 17.05 0 0 #> Fiat X1-9 1 27.3 4 79.0 66 4.08 1.935 18.90 1 1 #> Porsche 914-2 1 26.0 4 120.3 91 4.43 2.140 16.70 0 1 #> Lotus Europa 1 30.4 4 95.1 113 3.77 1.513 16.90 1 1 #> Ford Pantera L 1 15.8 8 351.0 264 4.22 3.170 14.50 0 1 #> Ferrari Dino 1 19.7 6 145.0 175 3.62 2.770 15.50 0 1 #> Maserati Bora 1 15.0 8 301.0 335 3.54 3.570 14.60 0 1 #> Volvo 142E 1 21.4 4 121.0 109 4.11 2.780 18.60 1 1 #> gear carb #> Mazda RX4 4 4 #> Mazda RX4 Wag 4 4 #> Datsun 710 4 1 #> Hornet 4 Drive 3 1 #> Hornet Sportabout 3 2 #> Valiant 3 1 #> Duster 360 3 4 #> Merc 240D 4 2 #> Merc 230 4 2 #> Merc 280 4 4 #> Merc 280C 4 4 #> Merc 450SE 3 3 #> Merc 450SL 3 3 #> Merc 450SLC 3 3 #> Cadillac Fleetwood 3 4 #> Lincoln Continental 3 4 #> Chrysler Imperial 3 4 #> Fiat 128 4 1 #> Honda Civic 4 2 #> Toyota Corolla 4 1 #> Toyota Corona 3 1 #> Dodge Challenger 3 2 #> AMC Javelin 3 2 #> Camaro Z28 3 4 #> Pontiac Firebird 3 2 #> Fiat X1-9 4 1 #> Porsche 914-2 5 2 #> Lotus Europa 5 2 #> Ford Pantera L 5 4 #> Ferrari Dino 5 6 #> Maserati Bora 5 8 #> Volvo 142E 4 2 add_intercept_column(mtcars, \"intercept\") #> intercept mpg cyl disp hp drat wt qsec vs am #> Mazda RX4 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 #> Mazda RX4 Wag 1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 #> Datsun 710 1 22.8 4 108.0 93 3.85 2.320 18.61 1 1 #> Hornet 4 Drive 1 21.4 6 258.0 110 3.08 3.215 19.44 1 0 #> Hornet Sportabout 1 18.7 8 360.0 175 3.15 3.440 17.02 0 0 #> Valiant 1 18.1 6 225.0 105 2.76 3.460 20.22 1 0 #> Duster 360 1 14.3 8 360.0 245 3.21 3.570 15.84 0 0 #> Merc 240D 1 24.4 4 146.7 62 3.69 3.190 20.00 1 0 #> Merc 230 1 22.8 4 140.8 95 3.92 3.150 22.90 1 0 #> Merc 280 1 19.2 6 167.6 123 3.92 3.440 18.30 1 0 #> Merc 280C 1 17.8 6 167.6 123 3.92 3.440 18.90 1 0 #> Merc 450SE 1 16.4 8 275.8 180 3.07 4.070 17.40 0 0 #> Merc 450SL 1 17.3 8 275.8 180 3.07 3.730 17.60 0 0 #> Merc 450SLC 1 15.2 8 275.8 180 3.07 3.780 18.00 0 0 #> Cadillac Fleetwood 1 10.4 8 472.0 205 2.93 5.250 17.98 0 0 #> Lincoln Continental 1 10.4 8 460.0 215 3.00 5.424 17.82 0 0 #> Chrysler Imperial 1 14.7 8 440.0 230 3.23 5.345 17.42 0 0 #> Fiat 128 1 32.4 4 78.7 66 4.08 2.200 19.47 1 1 #> Honda Civic 1 30.4 4 75.7 52 4.93 1.615 18.52 1 1 #> Toyota Corolla 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 #> Toyota Corona 1 21.5 4 120.1 97 3.70 2.465 20.01 1 0 #> Dodge Challenger 1 15.5 8 318.0 150 2.76 3.520 16.87 0 0 #> AMC Javelin 1 15.2 8 304.0 150 3.15 3.435 17.30 0 0 #> Camaro Z28 1 13.3 8 350.0 245 3.73 3.840 15.41 0 0 #> Pontiac Firebird 1 19.2 8 400.0 175 3.08 3.845 17.05 0 0 #> Fiat X1-9 1 27.3 4 79.0 66 4.08 1.935 18.90 1 1 #> Porsche 914-2 1 26.0 4 120.3 91 4.43 2.140 16.70 0 1 #> Lotus Europa 1 30.4 4 95.1 113 3.77 1.513 16.90 1 1 #> Ford Pantera L 1 15.8 8 351.0 264 4.22 3.170 14.50 0 1 #> Ferrari Dino 1 19.7 6 145.0 175 3.62 2.770 15.50 0 1 #> Maserati Bora 1 15.0 8 301.0 335 3.54 3.570 14.60 0 1 #> Volvo 142E 1 21.4 4 121.0 109 4.11 2.780 18.60 1 1 #> gear carb #> Mazda RX4 4 4 #> Mazda RX4 Wag 4 4 #> Datsun 710 4 1 #> Hornet 4 Drive 3 1 #> Hornet Sportabout 3 2 #> Valiant 3 1 #> Duster 360 3 4 #> Merc 240D 4 2 #> Merc 230 4 2 #> Merc 280 4 4 #> Merc 280C 4 4 #> Merc 450SE 3 3 #> Merc 450SL 3 3 #> Merc 450SLC 3 3 #> Cadillac Fleetwood 3 4 #> Lincoln Continental 3 4 #> Chrysler Imperial 3 4 #> Fiat 128 4 1 #> Honda Civic 4 2 #> Toyota Corolla 4 1 #> Toyota Corona 3 1 #> Dodge Challenger 3 2 #> AMC Javelin 3 2 #> Camaro Z28 3 4 #> Pontiac Firebird 3 2 #> Fiat X1-9 4 1 #> Porsche 914-2 5 2 #> Lotus Europa 5 2 #> Ford Pantera L 5 4 #> Ferrari Dino 5 6 #> Maserati Bora 5 8 #> Volvo 142E 4 2 add_intercept_column(as.matrix(mtcars)) #> (Intercept) mpg cyl disp hp drat wt qsec vs am #> Mazda RX4 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 #> Mazda RX4 Wag 1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 #> Datsun 710 1 22.8 4 108.0 93 3.85 2.320 18.61 1 1 #> Hornet 4 Drive 1 21.4 6 258.0 110 3.08 3.215 19.44 1 0 #> Hornet Sportabout 1 18.7 8 360.0 175 3.15 3.440 17.02 0 0 #> Valiant 1 18.1 6 225.0 105 2.76 3.460 20.22 1 0 #> Duster 360 1 14.3 8 360.0 245 3.21 3.570 15.84 0 0 #> Merc 240D 1 24.4 4 146.7 62 3.69 3.190 20.00 1 0 #> Merc 230 1 22.8 4 140.8 95 3.92 3.150 22.90 1 0 #> Merc 280 1 19.2 6 167.6 123 3.92 3.440 18.30 1 0 #> Merc 280C 1 17.8 6 167.6 123 3.92 3.440 18.90 1 0 #> Merc 450SE 1 16.4 8 275.8 180 3.07 4.070 17.40 0 0 #> Merc 450SL 1 17.3 8 275.8 180 3.07 3.730 17.60 0 0 #> Merc 450SLC 1 15.2 8 275.8 180 3.07 3.780 18.00 0 0 #> Cadillac Fleetwood 1 10.4 8 472.0 205 2.93 5.250 17.98 0 0 #> Lincoln Continental 1 10.4 8 460.0 215 3.00 5.424 17.82 0 0 #> Chrysler Imperial 1 14.7 8 440.0 230 3.23 5.345 17.42 0 0 #> Fiat 128 1 32.4 4 78.7 66 4.08 2.200 19.47 1 1 #> Honda Civic 1 30.4 4 75.7 52 4.93 1.615 18.52 1 1 #> Toyota Corolla 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 #> Toyota Corona 1 21.5 4 120.1 97 3.70 2.465 20.01 1 0 #> Dodge Challenger 1 15.5 8 318.0 150 2.76 3.520 16.87 0 0 #> AMC Javelin 1 15.2 8 304.0 150 3.15 3.435 17.30 0 0 #> Camaro Z28 1 13.3 8 350.0 245 3.73 3.840 15.41 0 0 #> Pontiac Firebird 1 19.2 8 400.0 175 3.08 3.845 17.05 0 0 #> Fiat X1-9 1 27.3 4 79.0 66 4.08 1.935 18.90 1 1 #> Porsche 914-2 1 26.0 4 120.3 91 4.43 2.140 16.70 0 1 #> Lotus Europa 1 30.4 4 95.1 113 3.77 1.513 16.90 1 1 #> Ford Pantera L 1 15.8 8 351.0 264 4.22 3.170 14.50 0 1 #> Ferrari Dino 1 19.7 6 145.0 175 3.62 2.770 15.50 0 1 #> Maserati Bora 1 15.0 8 301.0 335 3.54 3.570 14.60 0 1 #> Volvo 142E 1 21.4 4 121.0 109 4.11 2.780 18.60 1 1 #> gear carb #> Mazda RX4 4 4 #> Mazda RX4 Wag 4 4 #> Datsun 710 4 1 #> Hornet 4 Drive 3 1 #> Hornet Sportabout 3 2 #> Valiant 3 1 #> Duster 360 3 4 #> Merc 240D 4 2 #> Merc 230 4 2 #> Merc 280 4 4 #> Merc 280C 4 4 #> Merc 450SE 3 3 #> Merc 450SL 3 3 #> Merc 450SLC 3 3 #> Cadillac Fleetwood 3 4 #> Lincoln Continental 3 4 #> Chrysler Imperial 3 4 #> Fiat 128 4 1 #> Honda Civic 4 2 #> Toyota Corolla 4 1 #> Toyota Corona 3 1 #> Dodge Challenger 3 2 #> AMC Javelin 3 2 #> Camaro Z28 3 4 #> Pontiac Firebird 3 2 #> Fiat X1-9 4 1 #> Porsche 914-2 5 2 #> Lotus Europa 5 2 #> Ford Pantera L 5 4 #> Ferrari Dino 5 6 #> Maserati Bora 5 8 #> Volvo 142E 4 2"},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":null,"dir":"Reference","previous_headings":"","what":"Check levels of quantiles — check_quantile_levels","title":"Check levels of quantiles — check_quantile_levels","text":"Check levels quantiles","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check levels of quantiles — check_quantile_levels","text":"","code":"check_quantile_levels(levels, call = rlang::caller_env())"},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check levels of quantiles — check_quantile_levels","text":"levels quantile levels. call Call shown error messages.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check levels of quantiles — check_quantile_levels","text":"Invisible TRUE","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/check_quantile_levels.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Check levels of quantiles — check_quantile_levels","text":"Checks levels data type, range, uniqueness, order missingness.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/contr_one_hot.html","id":null,"dir":"Reference","previous_headings":"","what":"Contrast function for one-hot encodings — contr_one_hot","title":"Contrast function for one-hot encodings — contr_one_hot","text":"contrast function produces model matrix indicator columns level factor.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/contr_one_hot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Contrast function for one-hot encodings — contr_one_hot","text":"","code":"contr_one_hot(n, contrasts = TRUE, sparse = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/contr_one_hot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Contrast function for one-hot encodings — contr_one_hot","text":"n vector character factor levels number unique levels. contrasts argument backwards compatibility default TRUE supported. sparse argument backwards compatibility default FALSE supported.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/contr_one_hot.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Contrast function for one-hot encodings — contr_one_hot","text":"diagonal matrix n--n.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Default formula blueprint — default_formula_blueprint","title":"Default formula blueprint — default_formula_blueprint","text":"pages holds details formula preprocessing blueprint. blueprint used default mold() x formula.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Default formula blueprint — default_formula_blueprint","text":"","code":"default_formula_blueprint( intercept = FALSE, allow_novel_levels = FALSE, indicators = \"traditional\", composition = \"tibble\" ) # S3 method for class 'formula' mold(formula, data, ..., blueprint = NULL)"},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Default formula blueprint — default_formula_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). indicators single character string. Control factors expanded dummy variable indicator columns. One : \"traditional\" - default. Create dummy variables using traditional model.matrix() infrastructure. Generally creates K - 1 indicator columns factor, K number levels factor. \"none\" - Leave factor variables alone. expansion done. \"one_hot\" - Create dummy variables using one-hot encoding approach expands unordered factors K indicator columns, rather K - 1. composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. formula formula specifying predictors outcomes. data data frame matrix containing outcomes predictors. ... used. blueprint preprocessing blueprint. left NULL, default_formula_blueprint() used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Default formula blueprint — default_formula_blueprint","text":"default_formula_blueprint(), formula blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Default formula blueprint — default_formula_blueprint","text":"different base R, behavior expanding factors dummy variables indicators = \"traditional\" intercept present always intuitive documented. intercept present, factors expanded K-1 new columns, K number levels factor. intercept present, first factor expanded K columns (one-hot encoding), remaining factors expanded K-1 columns. behavior ensures meaningful predictions can made reference level first factor, exact \"intercept\" model requested. Without behavior, predictions reference level first factor always forced 0 intercept. Offsets can included formula method use inline function stats::offset(). returned tibble 1 column named \".offset\" $extras$offset slot return value.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"mold","dir":"Reference","previous_headings":"","what":"Mold","title":"Default formula blueprint — default_formula_blueprint","text":"mold() used default formula blueprint: Predictors RHS formula isolated, converted 1 sided formula: ~ RHS. Runs stats::model.frame() RHS formula uses data. indicators = \"traditional\", runs stats::model.matrix() result. indicators = \"none\", factors removed model.matrix() run, added back afterwards. interactions inline functions involving factors allowed. indicators = \"one_hot\", runs stats::model.matrix() result using contrast function creates indicator columns levels factors. offsets present using offset(), extracted model_offset(). intercept = TRUE, adds intercept column. Coerces result steps tibble. Outcomes LHS formula isolated, converted 1 sided formula: ~ LHS. Runs stats::model.frame() LHS formula uses data. Coerces result steps tibble.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"forge","dir":"Reference","previous_headings":"","what":"Forge","title":"Default formula blueprint — default_formula_blueprint","text":"forge() used default formula blueprint: calls shrink() trim new_data required columns coerce new_data tibble. calls scream() perform validation structure columns new_data. Predictors runs stats::model.frame() new_data using stored terms object corresponding predictors. , original mold() call, indicators = \"traditional\" set, runs stats::model.matrix() result. , original mold() call, indicators = \"none\" set, runs stats::model.matrix() result without factor columns, adds afterwards. , original mold() call, indicators = \"one_hot\" set, runs stats::model.matrix() result contrast function includes indicators levels factor columns. offsets present using offset() original call mold(), extracted model_offset(). intercept = TRUE original call mold(), intercept column added. coerces result steps tibble. Outcomes runs stats::model.frame() new_data using stored terms object corresponding outcomes. Coerces result tibble.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"differences-from-base-r","dir":"Reference","previous_headings":"","what":"Differences From Base R","title":"Default formula blueprint — default_formula_blueprint","text":"number differences base R regarding formulas processed mold() require explanation. Multivariate outcomes can specified LHS using syntax similar RHS (.e. outcome_1 + outcome_2 ~ predictors). complex calculations done LHS return matrices (like stats::poly()), matrices flattened multiple columns tibble call model.frame(). possible, recommended, large amount preprocessing required outcomes, better using recipes::recipe(). Global variables allowed formula. error thrown included. terms formula come data. need use inline functions formula, safest way prefix package name, like pkg::fn(). ensures function always available mold() (fit) forge() (prediction) time. said, package attached (.e. library()), able use inline function without prefix. default, intercepts included predictor output formula. include intercept, set blueprint = default_formula_blueprint(intercept = TRUE). rationale many packages either always require never allow intercept (example, earth package), large amount extra work keep user supplying one removing . interface standardizes flexibility one place.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_formula_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Default formula blueprint — default_formula_blueprint","text":"","code":"# --------------------------------------------------------------------------- data(\"hardhat-example-data\") # --------------------------------------------------------------------------- # Formula Example # Call mold() with the training data processed <- mold( log(num_1) ~ num_2 + fac_1, example_train, blueprint = default_formula_blueprint(intercept = TRUE) ) # Then, call forge() with the blueprint and the test data # to have it preprocess the test data in the same way forge(example_test, processed$blueprint) #> $predictors #> # A tibble: 2 × 4 #> `(Intercept)` num_2 fac_1b fac_1c #> #> 1 1 0.967 0 0 #> 2 1 0.761 0 1 #> #> $outcomes #> NULL #> #> $extras #> $extras$offset #> NULL #> #> # Use `outcomes = TRUE` to also extract the preprocessed outcome forge(example_test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 2 × 4 #> `(Intercept)` num_2 fac_1b fac_1c #> #> 1 1 0.967 0 0 #> 2 1 0.761 0 1 #> #> $outcomes #> # A tibble: 2 × 1 #> `log(num_1)` #> #> 1 3.00 #> 2 3.04 #> #> $extras #> $extras$offset #> NULL #> #> # --------------------------------------------------------------------------- # Factors without an intercept # No intercept is added by default processed <- mold(num_1 ~ fac_1 + fac_2, example_train) # So, for factor columns, the first factor is completely expanded into all # `K` columns (the number of levels), and the subsequent factors are expanded # into `K - 1` columns. processed$predictors #> # A tibble: 12 × 4 #> fac_1a fac_1b fac_1c fac_2B #> #> 1 1 0 0 0 #> 2 1 0 0 1 #> 3 1 0 0 0 #> 4 1 0 0 1 #> 5 0 1 0 0 #> 6 0 1 0 1 #> 7 0 1 0 0 #> 8 0 1 0 1 #> 9 0 0 1 0 #> 10 0 0 1 1 #> 11 0 0 1 0 #> 12 0 0 1 1 # In the above example, `fac_1` is expanded into all three columns, # `fac_2` is not. This behavior comes from `model.matrix()`, and is somewhat # known in the R community, but can lead to a model that is difficult to # interpret since the corresponding p-values are testing wildly different # hypotheses. # To get all indicators for all columns (irrespective of the intercept), # use the `indicators = \"one_hot\"` option processed <- mold( num_1 ~ fac_1 + fac_2, example_train, blueprint = default_formula_blueprint(indicators = \"one_hot\") ) processed$predictors #> # A tibble: 12 × 5 #> fac_1a fac_1b fac_1c fac_2A fac_2B #> #> 1 1 0 0 1 0 #> 2 1 0 0 0 1 #> 3 1 0 0 1 0 #> 4 1 0 0 0 1 #> 5 0 1 0 1 0 #> 6 0 1 0 0 1 #> 7 0 1 0 1 0 #> 8 0 1 0 0 1 #> 9 0 0 1 1 0 #> 10 0 0 1 0 1 #> 11 0 0 1 1 0 #> 12 0 0 1 0 1 # It is not possible to construct a no-intercept model that expands all # factors into `K - 1` columns using the formula method. If required, a # recipe could be used to construct this model. # --------------------------------------------------------------------------- # Global variables y <- rep(1, times = nrow(example_train)) # In base R, global variables are allowed in a model formula frame <- model.frame(fac_1 ~ y + num_2, example_train) head(frame) #> fac_1 y num_2 #> 1 a 1 0.579 #> 2 a 1 0.338 #> 3 a 1 0.206 #> 4 a 1 0.546 #> 5 b 1 0.964 #> 6 b 1 0.631 # mold() does not allow them, and throws an error try(mold(fac_1 ~ y + num_2, example_train)) #> Error in get_all_predictors(formula, data) : #> The following predictors were not found in `data`: 'y'. # --------------------------------------------------------------------------- # Dummy variables and interactions # By default, factor columns are expanded # and interactions are created, both by # calling `model.matrix()`. Some models (like # tree based models) can take factors directly # but still might want to use the formula method. # In those cases, set `indicators = \"none\"` to not # run `model.matrix()` on factor columns. Interactions # are still allowed and are run on numeric columns. bp_no_indicators <- default_formula_blueprint(indicators = \"none\") processed <- mold( ~ fac_1 + num_1:num_2, example_train, blueprint = bp_no_indicators ) processed$predictors #> # A tibble: 12 × 2 #> `num_1:num_2` fac_1 #> #> 1 0.579 a #> 2 0.676 a #> 3 0.618 a #> 4 2.18 a #> 5 4.82 b #> 6 3.79 b #> 7 5.66 b #> 8 1.66 b #> 9 2.84 c #> 10 0.83 c #> 11 6.81 c #> 12 7.42 c # An informative error is thrown when `indicators = \"none\"` and # factors are present in interaction terms or in inline functions try(mold(num_1 ~ num_2:fac_1, example_train, blueprint = bp_no_indicators)) #> Error in mold_formula_default_process_predictors(blueprint = blueprint, : #> Interaction terms involving factors or characters have been #> detected on the RHS of `formula`. These are not allowed when `indicators #> = \"none\"`. #> ℹ Interactions terms involving factors were detected for \"fac_1\" in #> `num_2:fac_1`. try(mold(num_1 ~ paste0(fac_1), example_train, blueprint = bp_no_indicators)) #> Error in mold_formula_default_process_predictors(blueprint = blueprint, : #> Functions involving factors or characters have been detected on #> the RHS of `formula`. These are not allowed when `indicators = \"none\"`. #> ℹ Functions involving factors were detected for \"fac_1\" in #> `paste0(fac_1)`. # --------------------------------------------------------------------------- # Multivariate outcomes # Multivariate formulas can be specified easily processed <- mold(num_1 + log(num_2) ~ fac_1, example_train) processed$outcomes #> # A tibble: 12 × 2 #> num_1 `log(num_2)` #> #> 1 1 -0.546 #> 2 2 -1.08 #> 3 3 -1.58 #> 4 4 -0.605 #> 5 5 -0.0367 #> 6 6 -0.460 #> 7 7 -0.213 #> 8 8 -1.57 #> 9 9 -1.15 #> 10 10 -2.49 #> 11 11 -0.480 #> 12 12 -0.481 # Inline functions on the LHS are run, but any matrix # output is flattened (like what happens in `model.matrix()`) # (essentially this means you don't wind up with columns # in the tibble that are matrices) processed <- mold(poly(num_2, degree = 2) ~ fac_1, example_train) processed$outcomes #> # A tibble: 12 × 2 #> `poly(num_2, degree = 2).1` `poly(num_2, degree = 2).2` #> #> 1 0.0981 -0.254 #> 2 -0.177 -0.157 #> 3 -0.327 0.108 #> 4 0.0604 -0.270 #> 5 0.537 0.634 #> 6 0.157 -0.209 #> 7 0.359 0.120 #> 8 -0.325 0.103 #> 9 -0.202 -0.124 #> 10 -0.468 0.492 #> 11 0.144 -0.221 #> 12 0.143 -0.222 # TRUE ncol(processed$outcomes) == 2 #> [1] TRUE # Multivariate formulas specified in mold() # carry over into forge() forge(example_test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 2 × 3 #> fac_1a fac_1b fac_1c #> #> 1 1 0 0 #> 2 0 0 1 #> #> $outcomes #> # A tibble: 2 × 2 #> `poly(num_2, degree = 2).1` `poly(num_2, degree = 2).2` #> #> 1 0.541 0.646 #> 2 0.306 0.00619 #> #> $extras #> $extras$offset #> NULL #> #> # --------------------------------------------------------------------------- # Offsets # Offsets are handled specially in base R, so they deserve special # treatment here as well. You can add offsets using the inline function # `offset()` processed <- mold(num_1 ~ offset(num_2) + fac_1, example_train) processed$extras$offset #> # A tibble: 12 × 1 #> .offset #> #> 1 0.579 #> 2 0.338 #> 3 0.206 #> 4 0.546 #> 5 0.964 #> 6 0.631 #> 7 0.808 #> 8 0.208 #> 9 0.316 #> 10 0.083 #> 11 0.619 #> 12 0.618 # Multiple offsets can be included, and they get added together processed <- mold( num_1 ~ offset(num_2) + offset(num_3), example_train ) identical( processed$extras$offset$.offset, example_train$num_2 + example_train$num_3 ) #> [1] TRUE # Forging test data will also require # and include the offset forge(example_test, processed$blueprint) #> $predictors #> # A tibble: 2 × 0 #> #> $outcomes #> NULL #> #> $extras #> $extras$offset #> # A tibble: 2 × 1 #> .offset #> #> 1 1.06 #> 2 0.802 #> #> # --------------------------------------------------------------------------- # Intercept only # Because `1` and `0` are intercept modifying terms, they are # not allowed in the formula and are instead controlled by the # `intercept` argument of the blueprint. To use an intercept # only formula, you should supply `NULL` on the RHS of the formula. mold( ~NULL, example_train, blueprint = default_formula_blueprint(intercept = TRUE) ) #> $predictors #> # A tibble: 12 × 1 #> `(Intercept)` #> #> 1 1 #> 2 1 #> 3 1 #> 4 1 #> 5 1 #> 6 1 #> 7 1 #> 8 1 #> 9 1 #> 10 1 #> 11 1 #> 12 1 #> #> $outcomes #> # A tibble: 12 × 0 #> #> $blueprint #> Formula blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: TRUE #> Novel Levels: FALSE #> Composition: tibble #> Indicators: traditional #> #> $extras #> $extras$offset #> NULL #> #> # --------------------------------------------------------------------------- # Matrix output for predictors # You can change the `composition` of the predictor data set bp <- default_formula_blueprint(composition = \"dgCMatrix\") processed <- mold(log(num_1) ~ num_2 + fac_1, example_train, blueprint = bp) class(processed$predictors) #> [1] \"dgCMatrix\" #> attr(,\"package\") #> [1] \"Matrix\""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Default recipe blueprint — default_recipe_blueprint","title":"Default recipe blueprint — default_recipe_blueprint","text":"pages holds details recipe preprocessing blueprint. blueprint used default mold() x recipe.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Default recipe blueprint — default_recipe_blueprint","text":"","code":"default_recipe_blueprint( intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, strings_as_factors = TRUE, composition = \"tibble\" ) # S3 method for class 'recipe' mold(x, data, ..., blueprint = NULL)"},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Default recipe blueprint — default_recipe_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). fresh already trained operations re-trained prep() called? strings_as_factors character columns converted factors prep() called? composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. x unprepped recipe created recipes::recipe(). data data frame matrix containing outcomes predictors. ... used. blueprint preprocessing blueprint. left NULL, default_recipe_blueprint() used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Default recipe blueprint — default_recipe_blueprint","text":"default_recipe_blueprint(), recipe blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"mold","dir":"Reference","previous_headings":"","what":"Mold","title":"Default recipe blueprint — default_recipe_blueprint","text":"mold() used default recipe blueprint: calls recipes::prep() prep recipe. calls recipes::juice() extract outcomes predictors. returned tibbles. intercept = TRUE, adds intercept column predictors.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"forge","dir":"Reference","previous_headings":"","what":"Forge","title":"Default recipe blueprint — default_recipe_blueprint","text":"forge() used default recipe blueprint: calls shrink() trim new_data required columns coerce new_data tibble. calls scream() perform validation structure columns new_data. calls recipes::bake() new_data using prepped recipe used training. adds intercept column onto new_data intercept = TRUE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_recipe_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Default recipe blueprint — default_recipe_blueprint","text":"","code":"library(recipes) #> Loading required package: dplyr #> #> Attaching package: ‘dplyr’ #> The following objects are masked from ‘package:stats’: #> #> filter, lag #> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union #> #> Attaching package: ‘recipes’ #> The following object is masked from ‘package:stats’: #> #> step # --------------------------------------------------------------------------- # Setup train <- iris[1:100, ] test <- iris[101:150, ] # --------------------------------------------------------------------------- # Recipes example # Create a recipe that logs a predictor rec <- recipe(Species ~ Sepal.Length + Sepal.Width, train) %>% step_log(Sepal.Length) processed <- mold(rec, train) # Sepal.Length has been logged processed$predictors #> # A tibble: 100 × 2 #> Sepal.Length Sepal.Width #> #> 1 1.63 3.5 #> 2 1.59 3 #> 3 1.55 3.2 #> 4 1.53 3.1 #> 5 1.61 3.6 #> 6 1.69 3.9 #> 7 1.53 3.4 #> 8 1.61 3.4 #> 9 1.48 2.9 #> 10 1.59 3.1 #> # ℹ 90 more rows processed$outcomes #> # A tibble: 100 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 90 more rows # The underlying blueprint is a prepped recipe processed$blueprint$recipe #> #> ── Recipe ──────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 2 #> #> ── Training information #> Training data contained 100 data points and no incomplete rows. #> #> ── Operations #> • Log transformation on: Sepal.Length | Trained # Call forge() with the blueprint and the test data # to have it preprocess the test data in the same way forge(test, processed$blueprint) #> $predictors #> # A tibble: 50 × 2 #> Sepal.Length Sepal.Width #> #> 1 1.84 3.3 #> 2 1.76 2.7 #> 3 1.96 3 #> 4 1.84 2.9 #> 5 1.87 3 #> 6 2.03 3 #> 7 1.59 2.5 #> 8 1.99 2.9 #> 9 1.90 2.5 #> 10 1.97 3.6 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> $extras$roles #> NULL #> #> # Use `outcomes = TRUE` to also extract the preprocessed outcome! # This logged the Sepal.Length column of `new_data` forge(test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 2 #> Sepal.Length Sepal.Width #> #> 1 1.84 3.3 #> 2 1.76 2.7 #> 3 1.96 3 #> 4 1.84 2.9 #> 5 1.87 3 #> 6 2.03 3 #> 7 1.59 2.5 #> 8 1.99 2.9 #> 9 1.90 2.5 #> 10 1.97 3.6 #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $extras #> $extras$roles #> NULL #> #> # --------------------------------------------------------------------------- # With an intercept # You can add an intercept with `intercept = TRUE` processed <- mold(rec, train, blueprint = default_recipe_blueprint(intercept = TRUE)) processed$predictors #> # A tibble: 100 × 3 #> `(Intercept)` Sepal.Length Sepal.Width #> #> 1 1 1.63 3.5 #> 2 1 1.59 3 #> 3 1 1.55 3.2 #> 4 1 1.53 3.1 #> 5 1 1.61 3.6 #> 6 1 1.69 3.9 #> 7 1 1.53 3.4 #> 8 1 1.61 3.4 #> 9 1 1.48 2.9 #> 10 1 1.59 3.1 #> # ℹ 90 more rows # But you also could have used a recipe step rec2 <- step_intercept(rec) mold(rec2, iris)$predictors #> # A tibble: 150 × 3 #> intercept Sepal.Length Sepal.Width #> #> 1 1 1.63 3.5 #> 2 1 1.59 3 #> 3 1 1.55 3.2 #> 4 1 1.53 3.1 #> 5 1 1.61 3.6 #> 6 1 1.69 3.9 #> 7 1 1.53 3.4 #> 8 1 1.61 3.4 #> 9 1 1.48 2.9 #> 10 1 1.59 3.1 #> # ℹ 140 more rows # --------------------------------------------------------------------------- # Matrix output for predictors # You can change the `composition` of the predictor data set bp <- default_recipe_blueprint(composition = \"dgCMatrix\") processed <- mold(rec, train, blueprint = bp) class(processed$predictors) #> [1] \"dgCMatrix\" #> attr(,\"package\") #> [1] \"Matrix\" # --------------------------------------------------------------------------- # Non standard roles # If you have custom recipes roles, they are assumed to be required at # `bake()` time when passing in `new_data`. This is an assumption that both # recipes and hardhat makes, meaning that those roles are required at # `forge()` time as well. rec_roles <- recipe(train) %>% update_role(Sepal.Width, new_role = \"predictor\") %>% update_role(Species, new_role = \"outcome\") %>% update_role(Sepal.Length, new_role = \"id\") %>% update_role(Petal.Length, new_role = \"important\") processed_roles <- mold(rec_roles, train) # The custom roles will be in the `mold()` result in case you need # them for modeling. processed_roles$extras #> $roles #> $roles$id #> # A tibble: 100 × 1 #> Sepal.Length #> #> 1 5.1 #> 2 4.9 #> 3 4.7 #> 4 4.6 #> 5 5 #> 6 5.4 #> 7 4.6 #> 8 5 #> 9 4.4 #> 10 4.9 #> # ℹ 90 more rows #> #> $roles$important #> # A tibble: 100 × 1 #> Petal.Length #> #> 1 1.4 #> 2 1.4 #> 3 1.3 #> 4 1.5 #> 5 1.4 #> 6 1.7 #> 7 1.4 #> 8 1.5 #> 9 1.4 #> 10 1.5 #> # ℹ 90 more rows #> #> $roles$`NA` #> # A tibble: 100 × 1 #> Petal.Width #> #> 1 0.2 #> 2 0.2 #> 3 0.2 #> 4 0.2 #> 5 0.2 #> 6 0.4 #> 7 0.3 #> 8 0.2 #> 9 0.2 #> 10 0.1 #> # ℹ 90 more rows #> #> # And they are in the `forge()` result forge(test, processed_roles$blueprint)$extras #> $roles #> $roles$id #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $roles$important #> # A tibble: 50 × 1 #> Petal.Length #> #> 1 6 #> 2 5.1 #> 3 5.9 #> 4 5.6 #> 5 5.8 #> 6 6.6 #> 7 4.5 #> 8 6.3 #> 9 5.8 #> 10 6.1 #> # ℹ 40 more rows #> #> $roles$`NA` #> # A tibble: 50 × 1 #> Petal.Width #> #> 1 2.5 #> 2 1.9 #> 3 2.1 #> 4 1.8 #> 5 2.2 #> 6 2.1 #> 7 1.7 #> 8 1.8 #> 9 1.8 #> 10 2.5 #> # ℹ 40 more rows #> #> # If you remove a column with a custom role from the test data, then you # won't be able to `forge()` even though this recipe technically didn't # use that column in any steps test2 <- test test2$Petal.Length <- NULL try(forge(test2, processed_roles$blueprint)) #> Error in validate_column_names(data, cols) : #> The following required columns are missing: 'Petal.Length'. # Most of the time, if you find yourself in the above scenario, then we # suggest that you remove `Petal.Length` from the data that is supplied to # the recipe. If that isn't an option, you can declare that that column # isn't required at `bake()` time by using `update_role_requirements()` rec_roles <- update_role_requirements(rec_roles, \"important\", bake = FALSE) processed_roles <- mold(rec_roles, train) forge(test2, processed_roles$blueprint) #> $predictors #> # A tibble: 50 × 1 #> Sepal.Width #> #> 1 3.3 #> 2 2.7 #> 3 3 #> 4 2.9 #> 5 3 #> 6 3 #> 7 2.5 #> 8 2.9 #> 9 2.5 #> 10 3.6 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> $extras$roles #> $extras$roles$id #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $extras$roles$important #> # A tibble: 50 × 0 #> #> $extras$roles$`NA` #> # A tibble: 50 × 1 #> Petal.Width #> #> 1 2.5 #> 2 1.9 #> 3 2.1 #> 4 1.8 #> 5 2.2 #> 6 2.1 #> 7 1.7 #> 8 1.8 #> 9 1.8 #> 10 2.5 #> # ℹ 40 more rows #> #> #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Default XY blueprint — default_xy_blueprint","title":"Default XY blueprint — default_xy_blueprint","text":"pages holds details XY preprocessing blueprint. blueprint used default mold() x y provided separately (.e. XY interface used).","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Default XY blueprint — default_xy_blueprint","text":"","code":"default_xy_blueprint( intercept = FALSE, allow_novel_levels = FALSE, composition = \"tibble\" ) # S3 method for class 'data.frame' mold(x, y, ..., blueprint = NULL) # S3 method for class 'matrix' mold(x, y, ..., blueprint = NULL)"},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Default XY blueprint — default_xy_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. x data frame matrix containing predictors. y data frame, matrix, vector containing outcomes. ... used. blueprint preprocessing blueprint. left NULL, default_xy_blueprint() used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Default XY blueprint — default_xy_blueprint","text":"default_xy_blueprint(), XY blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Default XY blueprint — default_xy_blueprint","text":"documented standardize(), y vector, returned outcomes tibble 1 column standardized name \".outcome\". one special thing XY method's forge function behavior outcomes = TRUE vector y value provided original call mold(). case, mold() converts y tibble, default name .outcome. column forge() look new_data preprocess. See examples section demonstration .","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"mold","dir":"Reference","previous_headings":"","what":"Mold","title":"Default XY blueprint — default_xy_blueprint","text":"mold() used default xy blueprint: converts x tibble. adds intercept column x intercept = TRUE. runs standardize() y.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"forge","dir":"Reference","previous_headings":"","what":"Forge","title":"Default XY blueprint — default_xy_blueprint","text":"forge() used default xy blueprint: calls shrink() trim new_data required columns coerce new_data tibble. calls scream() perform validation structure columns new_data. adds intercept column onto new_data intercept = TRUE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/default_xy_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Default XY blueprint — default_xy_blueprint","text":"","code":"# --------------------------------------------------------------------------- # Setup train <- iris[1:100, ] test <- iris[101:150, ] train_x <- train[\"Sepal.Length\"] train_y <- train[\"Species\"] test_x <- test[\"Sepal.Length\"] test_y <- test[\"Species\"] # --------------------------------------------------------------------------- # XY Example # First, call mold() with the training data processed <- mold(train_x, train_y) # Then, call forge() with the blueprint and the test data # to have it preprocess the test data in the same way forge(test_x, processed$blueprint) #> $predictors #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> NULL #> # --------------------------------------------------------------------------- # Intercept processed <- mold(train_x, train_y, blueprint = default_xy_blueprint(intercept = TRUE)) forge(test_x, processed$blueprint) #> $predictors #> # A tibble: 50 × 2 #> `(Intercept)` Sepal.Length #> #> 1 1 6.3 #> 2 1 5.8 #> 3 1 7.1 #> 4 1 6.3 #> 5 1 6.5 #> 6 1 7.6 #> 7 1 4.9 #> 8 1 7.3 #> 9 1 6.7 #> 10 1 7.2 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> NULL #> # --------------------------------------------------------------------------- # XY Method and forge(outcomes = TRUE) # You can request that the new outcome columns are preprocessed as well, but # they have to be present in `new_data`! processed <- mold(train_x, train_y) # Can't do this! try(forge(test_x, processed$blueprint, outcomes = TRUE)) #> Error in validate_column_names(data, cols) : #> The following required columns are missing: 'Species'. # Need to use the full test set, including `y` forge(test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $extras #> NULL #> # With the XY method, if the Y value used in `mold()` is a vector, # then a column name of `.outcome` is automatically generated. # This name is what forge() looks for in `new_data`. # Y is a vector! y_vec <- train_y$Species processed_vec <- mold(train_x, y_vec) # This throws an informative error that tell you # to include an `\".outcome\"` column in `new_data`. try(forge(iris, processed_vec$blueprint, outcomes = TRUE)) #> Error in validate_missing_name_isnt_.outcome(check$missing_names) : #> The following required columns are missing: '.outcome'. #> #> (This indicates that `mold()` was called with a vector for `y`. When this is the case, and the outcome columns are requested in `forge()`, `new_data` must include a column with the automatically generated name, '.outcome', containing the outcome.) test2 <- test test2$.outcome <- test2$Species test2$Species <- NULL # This works, and returns a tibble in the $outcomes slot forge(test2, processed_vec$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 1 #> Sepal.Length #> #> 1 6.3 #> 2 5.8 #> 3 7.1 #> 4 6.3 #> 5 6.5 #> 6 7.6 #> 7 4.9 #> 8 7.3 #> 9 6.7 #> 10 7.2 #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> .outcome #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $extras #> NULL #> # --------------------------------------------------------------------------- # Matrix output for predictors # You can change the `composition` of the predictor data set bp <- default_xy_blueprint(composition = \"dgCMatrix\") processed <- mold(train_x, train_y, blueprint = bp) class(processed$predictors) #> [1] \"dgCMatrix\" #> attr(,\"package\") #> [1] \"Matrix\""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":null,"dir":"Reference","previous_headings":"","what":"Delete the response from a terms object — delete_response","title":"Delete the response from a terms object — delete_response","text":"delete_response() exactly delete.response(), except fixes long standing bug also removing part \"dataClasses\" attribute corresponding response, exists.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Delete the response from a terms object — delete_response","text":"","code":"delete_response(terms)"},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Delete the response from a terms object — delete_response","text":"terms terms object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Delete the response from a terms object — delete_response","text":"terms response sections removed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Delete the response from a terms object — delete_response","text":"bug described : https://stat.ethz.ch/pipermail/r-devel/2012-January/062942.html","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/delete_response.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Delete the response from a terms object — delete_response","text":"","code":"framed <- model_frame(Species ~ Sepal.Width, iris) attr(delete.response(framed$terms), \"dataClasses\") #> Species Sepal.Width #> \"factor\" \"numeric\" attr(delete_response(framed$terms), \"dataClasses\") #> Sepal.Width #> \"numeric\""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract a prototype — extract_ptype","title":"Extract a prototype — extract_ptype","text":"extract_ptype() extracts tibble 0 rows data. contains required information column names, classes, factor levels required check structure new data prediction time.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract a prototype — extract_ptype","text":"","code":"extract_ptype(data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract a prototype — extract_ptype","text":"data data frame matrix.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract a prototype — extract_ptype","text":"0 row slice data converting tibble.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract a prototype — extract_ptype","text":"extract_ptype() useful creating new preprocessing blueprint. extracts required information used validation checks prediction time.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/extract_ptype.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract a prototype — extract_ptype","text":"","code":"hardhat:::extract_ptype(iris) #> # A tibble: 0 × 5 #> # ℹ 5 variables: Sepal.Length , Sepal.Width , #> # Petal.Length , Petal.Width , Species "},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":null,"dir":"Reference","previous_headings":"","what":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"fct_encode_one_hot() encodes factor one-hot indicator matrix. matrix consists length(x) rows length(levels(x)) columns. Every value row matrix filled 0L except column name x[[]], instead filled 1L.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"","code":"fct_encode_one_hot(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"x factor. x contain missing values. x allowed ordered factor.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"integer matrix length(x) rows length(levels(x)) columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"columns returned order levels(x). x names, names propagated onto result row names.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/fct_encode_one_hot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Encode a factor as a one-hot indicator matrix — fct_encode_one_hot","text":"","code":"fct_encode_one_hot(factor(letters)) #> a b c d e f g h i j k l m n o p q r s t u v w x y z #> [1,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [2,] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [3,] 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [4,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [5,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [6,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [7,] 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [8,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [9,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [10,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [11,] 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [12,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [13,] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [14,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 #> [15,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 #> [16,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 #> [17,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 #> [18,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 #> [19,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 #> [20,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 #> [21,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 #> [22,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 #> [23,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 #> [24,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 #> [25,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 #> [26,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 fct_encode_one_hot(factor(letters[1:2], levels = letters)) #> a b c d e f g h i j k l m n o p q r s t u v w x y z #> [1,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #> [2,] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 set.seed(1234) fct_encode_one_hot(factor(sample(letters[1:4], 10, TRUE))) #> a b c d #> [1,] 0 0 0 1 #> [2,] 0 0 0 1 #> [3,] 0 1 0 0 #> [4,] 0 1 0 0 #> [5,] 1 0 0 0 #> [6,] 0 0 0 1 #> [7,] 0 0 1 0 #> [8,] 1 0 0 0 #> [9,] 1 0 0 0 #> [10,] 0 1 0 0"},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":null,"dir":"Reference","previous_headings":"","what":"Forge prediction-ready data — forge","title":"Forge prediction-ready data — forge","text":"forge() applies transformations requested specific blueprint set new_data. new_data contains new predictors (potentially outcomes) used generate predictions. blueprints consistent return values others, unique enough help page. Click learn use one conjunction forge(). XY Method - default_xy_blueprint() Formula Method - default_formula_blueprint() Recipes Method - default_recipe_blueprint()","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Forge prediction-ready data — forge","text":"","code":"forge(new_data, blueprint, ..., outcomes = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Forge prediction-ready data — forge","text":"new_data data frame matrix predictors process. outcomes = TRUE, also contain outcomes process. blueprint preprocessing blueprint. ... used. outcomes logical. outcomes processed returned well?","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Forge prediction-ready data — forge","text":"named list 3 elements: predictors: tibble containing preprocessed new_data predictors. outcomes: outcomes = TRUE, tibble containing preprocessed outcomes found new_data. Otherwise, NULL. extras: Either NULL blueprint returns extra information, named list containing extra information.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Forge prediction-ready data — forge","text":"outcomes present new_data, can optionally processed returned outcomes slot returned list setting outcomes = TRUE. useful cross validation need preprocess outcomes test set computing performance.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/forge.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Forge prediction-ready data — forge","text":"","code":"# See the blueprint specific documentation linked above # for various ways to call forge with different # blueprints. train <- iris[1:100, ] test <- iris[101:150, ] # Formula processed <- mold( log(Sepal.Width) ~ Species, train, blueprint = default_formula_blueprint(indicators = \"none\") ) forge(test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> `log(Sepal.Width)` #> #> 1 1.19 #> 2 0.993 #> 3 1.10 #> 4 1.06 #> 5 1.10 #> 6 1.10 #> 7 0.916 #> 8 1.06 #> 9 0.916 #> 10 1.28 #> # ℹ 40 more rows #> #> $extras #> $extras$offset #> NULL #> #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Frequency weights — frequency_weights","title":"Frequency weights — frequency_weights","text":"frequency_weights() creates vector frequency weights allow compactly repeat observation set number times. Frequency weights supplied non-negative integer vector, whole numbers allowed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Frequency weights — frequency_weights","text":"","code":"frequency_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Frequency weights — frequency_weights","text":"x integer vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Frequency weights — frequency_weights","text":"new frequency weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Frequency weights — frequency_weights","text":"Frequency weights integers denote many times particular row data observed. help compress redundant rows single entry. tidymodels, frequency weights used parts preprocessing, model fitting, performance estimation operations.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/frequency_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Frequency weights — frequency_weights","text":"","code":"# Record that the first observation has 10 replicates, the second has 12 # replicates, and so on frequency_weights(c(10, 12, 2, 1)) #> #> [1] 10 12 2 1 # Fractional values are not allowed try(frequency_weights(c(1.5, 2.3, 10))) #> Error in frequency_weights(c(1.5, 2.3, 10)) : #> Can't convert from `x` to due to loss of precision. #> • Locations: 1, 2"},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract data classes from a data frame or matrix — get_data_classes","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"predicting model, often important new_data classes original data used fit model. get_data_classes() extracts classes original training data.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"","code":"get_data_classes(data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"data data frame matrix.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"named list. names column names data values character vectors containing class column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_data_classes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract data classes from a data frame or matrix — get_data_classes","text":"","code":"get_data_classes(iris) #> $Sepal.Length #> [1] \"numeric\" #> #> $Sepal.Width #> [1] \"numeric\" #> #> $Petal.Length #> [1] \"numeric\" #> #> $Petal.Width #> [1] \"numeric\" #> #> $Species #> [1] \"factor\" #> get_data_classes(as.matrix(mtcars)) #> $mpg #> [1] \"numeric\" #> #> $cyl #> [1] \"numeric\" #> #> $disp #> [1] \"numeric\" #> #> $hp #> [1] \"numeric\" #> #> $drat #> [1] \"numeric\" #> #> $wt #> [1] \"numeric\" #> #> $qsec #> [1] \"numeric\" #> #> $vs #> [1] \"numeric\" #> #> $am #> [1] \"numeric\" #> #> $gear #> [1] \"numeric\" #> #> $carb #> [1] \"numeric\" #> # Unlike .MFclass(), the full class # vector is returned data <- data.frame(col = ordered(c(\"a\", \"b\"))) .MFclass(data$col) #> [1] \"ordered\" get_data_classes(data) #> $col #> [1] \"ordered\" \"factor\" #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract factor levels from a data frame — get_levels","title":"Extract factor levels from a data frame — get_levels","text":"get_levels() extracts levels factor columns data. mainly useful extracting original factor levels predictors training set. get_outcome_levels() small wrapper around get_levels() extracting levels factor outcome first calls standardize() y.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract factor levels from a data frame — get_levels","text":"","code":"get_levels(data) get_outcome_levels(y)"},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract factor levels from a data frame — get_levels","text":"data data.frame extract levels . y outcome. can : factor vector numeric vector 1D numeric array numeric matrix column names 2D numeric array column names data frame numeric factor columns","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract factor levels from a data frame — get_levels","text":"named list many elements factor columns data y. names names factor columns, values character vectors levels. factor columns, NULL returned.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/get_levels.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract factor levels from a data frame — get_levels","text":"","code":"# Factor columns are returned with their levels get_levels(iris) #> $Species #> [1] \"setosa\" \"versicolor\" \"virginica\" #> # No factor columns get_levels(mtcars) #> NULL # standardize() is first run on `y` # which converts the input to a data frame # with an automatically named column, `\".outcome\"` get_outcome_levels(y = factor(letters[1:5])) #> $.outcome #> [1] \"a\" \"b\" \"c\" \"d\" \"e\" #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-example-data.html","id":null,"dir":"Reference","previous_headings":"","what":"Example data for hardhat — hardhat-example-data","title":"Example data for hardhat — hardhat-example-data","text":"Example data hardhat","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-example-data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Example data for hardhat — hardhat-example-data","text":"example_train,example_test tibbles","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-example-data.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Example data for hardhat — hardhat-example-data","text":"Data objects training test set variables: three numeric two factor columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-example-data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Example data for hardhat — hardhat-example-data","text":"","code":"data(\"hardhat-example-data\")"},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Generics for object extraction — hardhat-extract","title":"Generics for object extraction — hardhat-extract","text":"generics used extract elements various model objects. Methods defined packages, tune, workflows, workflowsets, returned object always . extract_fit_engine() returns engine specific fit embedded within parsnip model fit. example, using parsnip::linear_reg() \"lm\" engine, returns underlying lm object. extract_fit_parsnip() returns parsnip model fit. extract_mold() returns preprocessed \"mold\" object returned mold(). contains information preprocessing, including either prepped recipe, formula terms object, variable selectors. extract_spec_parsnip() returns parsnip model specification. extract_preprocessor() returns formula, recipe, variable expressions used preprocessing. extract_recipe() returns recipe, possibly estimated. extract_workflow() returns workflow, possibly fit. extract_parameter_dials() returns single dials parameter object. extract_parameter_set_dials() returns set dials parameter objects. extract_fit_time() returns tibble fit times.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generics for object extraction — hardhat-extract","text":"","code":"extract_workflow(x, ...) extract_recipe(x, ...) extract_spec_parsnip(x, ...) extract_fit_parsnip(x, ...) extract_fit_engine(x, ...) extract_mold(x, ...) extract_preprocessor(x, ...) extract_postprocessor(x, ...) extract_parameter_dials(x, ...) extract_parameter_set_dials(x, ...) extract_fit_time(x, ...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generics for object extraction — hardhat-extract","text":"x object. ... Extra arguments passed methods.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Generics for object extraction — hardhat-extract","text":"","code":"# See packages where methods are defined for examples, such as `parsnip` or # `workflows`."},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-package.html","id":null,"dir":"Reference","previous_headings":"","what":"hardhat: Construct Modeling Packages — hardhat-package","title":"hardhat: Construct Modeling Packages — hardhat-package","text":"Building modeling packages hard. large amount effort generally goes providing implementation new method efficient, fast, correct, often less emphasis put user interface. good interface requires specialized knowledge S3 methods formulas, average package developer might . goal 'hardhat' reduce burden around building new modeling packages providing functionality preprocessing, predicting, validating input.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/hardhat-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"hardhat: Construct Modeling Packages — hardhat-package","text":"Maintainer: Hannah Frick hannah@posit.co (ORCID) Authors: Davis Vaughan davis@posit.co Max Kuhn max@posit.co contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Importance weights — importance_weights","title":"Importance weights — importance_weights","text":"importance_weights() creates vector importance weights allow apply context dependent weight observations. Importance weights supplied non-negative double vector, fractional values allowed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Importance weights — importance_weights","text":"","code":"importance_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Importance weights — importance_weights","text":"x double vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Importance weights — importance_weights","text":"new importance weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Importance weights — importance_weights","text":"Importance weights focus much row data set influence model estimation. can based data arbitrarily set achieve goal. tidymodels, importance weights affect model estimation supervised recipes steps. used yardstick functions calculating measures model performance.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/importance_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Importance weights — importance_weights","text":"","code":"importance_weights(c(1.5, 2.3, 10)) #> #> [1] 1.5 2.3 10.0"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x a preprocessing blueprint? — is_blueprint","title":"Is x a preprocessing blueprint? — is_blueprint","text":"is_blueprint() checks x inherits \"hardhat_blueprint\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x a preprocessing blueprint? — is_blueprint","text":"","code":"is_blueprint(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x a preprocessing blueprint? — is_blueprint","text":"x object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x a preprocessing blueprint? — is_blueprint","text":"","code":"is_blueprint(default_xy_blueprint()) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x a case weights vector? — is_case_weights","title":"Is x a case weights vector? — is_case_weights","text":"is_case_weights() checks x inherits \"hardhat_case_weights\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x a case weights vector? — is_case_weights","text":"","code":"is_case_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x a case weights vector? — is_case_weights","text":"x object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is x a case weights vector? — is_case_weights","text":"single TRUE FALSE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_case_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x a case weights vector? — is_case_weights","text":"","code":"is_case_weights(1) #> [1] FALSE is_case_weights(frequency_weights(1)) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x a frequency weights vector? — is_frequency_weights","title":"Is x a frequency weights vector? — is_frequency_weights","text":"is_frequency_weights() checks x inherits \"hardhat_frequency_weights\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x a frequency weights vector? — is_frequency_weights","text":"","code":"is_frequency_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x a frequency weights vector? — is_frequency_weights","text":"x object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is x a frequency weights vector? — is_frequency_weights","text":"single TRUE FALSE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_frequency_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x a frequency weights vector? — is_frequency_weights","text":"","code":"is_frequency_weights(1) #> [1] FALSE is_frequency_weights(frequency_weights(1)) #> [1] TRUE is_frequency_weights(importance_weights(1)) #> [1] FALSE"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x an importance weights vector? — is_importance_weights","title":"Is x an importance weights vector? — is_importance_weights","text":"is_importance_weights() checks x inherits \"hardhat_importance_weights\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x an importance weights vector? — is_importance_weights","text":"","code":"is_importance_weights(x)"},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x an importance weights vector? — is_importance_weights","text":"x object.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is x an importance weights vector? — is_importance_weights","text":"single TRUE FALSE.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/is_importance_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x an importance weights vector? — is_importance_weights","text":"","code":"is_importance_weights(1) #> [1] FALSE is_importance_weights(frequency_weights(1)) #> [1] FALSE is_importance_weights(importance_weights(1)) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct a model frame — model_frame","title":"Construct a model frame — model_frame","text":"model_frame() stricter version stats::model.frame(). number differences, main rows never dropped return value list frame terms separated two distinct objects.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct a model frame — model_frame","text":"","code":"model_frame(formula, data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct a model frame — model_frame","text":"formula formula terms object representing terms model frame. data data frame matrix containing terms formula.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct a model frame — model_frame","text":"named list two elements: \"data\": tibble containing model frame. \"terms\": terms object containing terms model frame.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Construct a model frame — model_frame","text":"following explains rationale difference arguments compared stats::model.frame(): subset: allowed number rows model_frame() run always . na.action: allowed forced \"na.pass\" number rows model_frame() run always . drop.unused.levels: allowed seems inconsistent data result model_frame() ever factor column different levels, unless specified though original_levels. required, done recipe step explicitly. xlev: allowed check done ahead time. Use scream() check integrity data training set required. ...: exposed offsets handled separately, necessary pass weights rows never dropped (weights subset alongside rest design matrix). non-predictor columns required, use \"roles\" features recipes. important always use results model_frame() model_matrix() rather stats::model.matrix() tibble result model_frame() terms object attached. model.matrix(, ) called directly, call model.frame() made automatically, can give faulty results.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_frame.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Construct a model frame — model_frame","text":"","code":"# --------------------------------------------------------------------------- # Example usage framed <- model_frame(Species ~ Sepal.Width, iris) framed$data #> # A tibble: 150 × 2 #> Species Sepal.Width #> #> 1 setosa 3.5 #> 2 setosa 3 #> 3 setosa 3.2 #> 4 setosa 3.1 #> 5 setosa 3.6 #> 6 setosa 3.9 #> 7 setosa 3.4 #> 8 setosa 3.4 #> 9 setosa 2.9 #> 10 setosa 3.1 #> # ℹ 140 more rows framed$terms #> Species ~ Sepal.Width #> attr(,\"variables\") #> list(Species, Sepal.Width) #> attr(,\"factors\") #> Sepal.Width #> Species 0 #> Sepal.Width 1 #> attr(,\"term.labels\") #> [1] \"Sepal.Width\" #> attr(,\"order\") #> [1] 1 #> attr(,\"intercept\") #> [1] 1 #> attr(,\"response\") #> [1] 1 #> attr(,\".Environment\") #> #> attr(,\"predvars\") #> list(Species, Sepal.Width) #> attr(,\"dataClasses\") #> Species Sepal.Width #> \"factor\" \"numeric\" # --------------------------------------------------------------------------- # Missing values never result in dropped rows iris2 <- iris iris2$Sepal.Width[1] <- NA framed2 <- model_frame(Species ~ Sepal.Width, iris2) head(framed2$data) #> # A tibble: 6 × 2 #> Species Sepal.Width #> #> 1 setosa NA #> 2 setosa 3 #> 3 setosa 3.2 #> 4 setosa 3.1 #> 5 setosa 3.6 #> 6 setosa 3.9 nrow(framed2$data) == nrow(iris2) #> [1] TRUE"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct a design matrix — model_matrix","title":"Construct a design matrix — model_matrix","text":"model_matrix() stricter version stats::model.matrix(). Notably, model_matrix() never drop rows, result tibble.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct a design matrix — model_matrix","text":"","code":"model_matrix(terms, data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct a design matrix — model_matrix","text":"terms terms object construct model matrix . typically terms object returned corresponding call model_frame(). data tibble construct design matrix . typically tibble returned corresponding call model_frame().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct a design matrix — model_matrix","text":"tibble containing design matrix.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Construct a design matrix — model_matrix","text":"following explains rationale difference arguments compared stats::model.matrix(): contrasts.arg: Set contrasts argument, options(\"contrasts\") globally, assign contrast factor interest directly using stats::contrasts(). See examples section. xlev: allowed model.frame() never called, unnecessary. ...: allowed default method model.matrix() use , lm method uses pass potential offsets weights , handled differently hardhat.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Construct a design matrix — model_matrix","text":"","code":"# --------------------------------------------------------------------------- # Example usage framed <- model_frame(Sepal.Width ~ Species, iris) model_matrix(framed$terms, framed$data) #> # A tibble: 150 × 3 #> `(Intercept)` Speciesversicolor Speciesvirginica #> #> 1 1 0 0 #> 2 1 0 0 #> 3 1 0 0 #> 4 1 0 0 #> 5 1 0 0 #> 6 1 0 0 #> 7 1 0 0 #> 8 1 0 0 #> 9 1 0 0 #> 10 1 0 0 #> # ℹ 140 more rows # --------------------------------------------------------------------------- # Missing values never result in dropped rows iris2 <- iris iris2$Species[1] <- NA framed2 <- model_frame(Sepal.Width ~ Species, iris2) model_matrix(framed2$terms, framed2$data) #> # A tibble: 150 × 3 #> `(Intercept)` Speciesversicolor Speciesvirginica #> #> 1 1 NA NA #> 2 1 0 0 #> 3 1 0 0 #> 4 1 0 0 #> 5 1 0 0 #> 6 1 0 0 #> 7 1 0 0 #> 8 1 0 0 #> 9 1 0 0 #> 10 1 0 0 #> # ℹ 140 more rows # --------------------------------------------------------------------------- # Contrasts # Default contrasts y <- factor(c(\"a\", \"b\")) x <- data.frame(y = y) framed <- model_frame(~y, x) # Setting contrasts directly y_with_contrast <- y contrasts(y_with_contrast) <- contr.sum(2) x2 <- data.frame(y = y_with_contrast) framed2 <- model_frame(~y, x2) # Compare! model_matrix(framed$terms, framed$data) #> # A tibble: 2 × 2 #> `(Intercept)` yb #> #> 1 1 0 #> 2 1 1 model_matrix(framed2$terms, framed2$data) #> # A tibble: 2 × 2 #> `(Intercept)` y1 #> #> 1 1 1 #> 2 1 -1 # Also, can set the contrasts globally global_override <- c(unordered = \"contr.sum\", ordered = \"contr.poly\") rlang::with_options( .expr = { model_matrix(framed$terms, framed$data) }, contrasts = global_override ) #> # A tibble: 2 × 2 #> `(Intercept)` y1 #> #> 1 1 1 #> 2 1 -1"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract a model offset — model_offset","title":"Extract a model offset — model_offset","text":"model_offset() extracts numeric offset model frame. inspired stats::model.offset(), nicer error messages slightly stricter.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract a model offset — model_offset","text":"","code":"model_offset(terms, data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract a model offset — model_offset","text":"terms \"terms\" object corresponding data, returned call model_frame(). data data frame returned call model_frame().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract a model offset — model_offset","text":"numeric vector representing offset.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract a model offset — model_offset","text":"column tagged offset numeric, nice error message thrown telling exactly column problematic. stats::model.offset() also allows column named \"(offset)\" considered offset along others tagged stats::offset(). However, stats::model.matrix() recognize columns offsets (remove ). inconsistency, columns named \"(offset)\" treated specially model_offset().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/model_offset.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract a model offset — model_offset","text":"","code":"x <- model.frame(Species ~ offset(Sepal.Width), iris) model_offset(terms(x), x) #> [1] 3.5 3.0 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 3.7 3.4 3.0 3.0 4.0 4.4 3.9 #> [18] 3.5 3.8 3.8 3.4 3.7 3.6 3.3 3.4 3.0 3.4 3.5 3.4 3.2 3.1 3.4 4.1 4.2 #> [35] 3.1 3.2 3.5 3.6 3.0 3.4 3.5 2.3 3.2 3.5 3.8 3.0 3.8 3.2 3.7 3.3 3.2 #> [52] 3.2 3.1 2.3 2.8 2.8 3.3 2.4 2.9 2.7 2.0 3.0 2.2 2.9 2.9 3.1 3.0 2.7 #> [69] 2.2 2.5 3.2 2.8 2.5 2.8 2.9 3.0 2.8 3.0 2.9 2.6 2.4 2.4 2.7 2.7 3.0 #> [86] 3.4 3.1 2.3 3.0 2.5 2.6 3.0 2.6 2.3 2.7 3.0 2.9 2.9 2.5 2.8 3.3 2.7 #> [103] 3.0 2.9 3.0 3.0 2.5 2.9 2.5 3.6 3.2 2.7 3.0 2.5 2.8 3.2 3.0 3.8 2.6 #> [120] 2.2 3.2 2.8 2.8 2.7 3.3 3.2 2.8 3.0 2.8 3.0 2.8 3.8 2.8 2.8 2.6 3.0 #> [137] 3.4 3.1 3.0 3.1 3.1 3.1 2.7 3.2 3.3 3.0 2.5 3.0 3.4 3.0 xx <- model.frame(Species ~ offset(Sepal.Width) + offset(Sepal.Length), iris) model_offset(terms(xx), xx) #> [1] 8.6 7.9 7.9 7.7 8.6 9.3 8.0 8.4 7.3 8.0 9.1 8.2 7.8 #> [14] 7.3 9.8 10.1 9.3 8.6 9.5 8.9 8.8 8.8 8.2 8.4 8.2 8.0 #> [27] 8.4 8.7 8.6 7.9 7.9 8.8 9.3 9.7 8.0 8.2 9.0 8.5 7.4 #> [40] 8.5 8.5 6.8 7.6 8.5 8.9 7.8 8.9 7.8 9.0 8.3 10.2 9.6 #> [53] 10.0 7.8 9.3 8.5 9.6 7.3 9.5 7.9 7.0 8.9 8.2 9.0 8.5 #> [66] 9.8 8.6 8.5 8.4 8.1 9.1 8.9 8.8 8.9 9.3 9.6 9.6 9.7 #> [79] 8.9 8.3 7.9 7.9 8.5 8.7 8.4 9.4 9.8 8.6 8.6 8.0 8.1 #> [92] 9.1 8.4 7.3 8.3 8.7 8.6 9.1 7.6 8.5 9.6 8.5 10.1 9.2 #> [105] 9.5 10.6 7.4 10.2 9.2 10.8 9.7 9.1 9.8 8.2 8.6 9.6 9.5 #> [118] 11.5 10.3 8.2 10.1 8.4 10.5 9.0 10.0 10.4 9.0 9.1 9.2 10.2 #> [131] 10.2 11.7 9.2 9.1 8.7 10.7 9.7 9.5 9.0 10.0 9.8 10.0 8.5 #> [144] 10.0 10.0 9.7 8.8 9.5 9.6 8.9 # Problematic columns are caught with intuitive errors tryCatch( expr = { x <- model.frame(~ offset(Species), iris) model_offset(terms(x), x) }, error = function(e) { print(e$message) } ) #> Column, 'offset(Species)', is tagged as an offset, but is not numeric. All offsets must be numeric."},{"path":"https://hardhat.tidymodels.org/dev/reference/modeling-usethis.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a modeling package — modeling-usethis","title":"Create a modeling package — modeling-usethis","text":"create_modeling_package() : Call usethis::create_package() set new R package. Call use_modeling_deps(). Call use_modeling_files(). use_modeling_deps() : Add hardhat, rlang, stats Imports Add recipes Suggests roxygen2 available, use roxygen markdown use_modeling_files() : Add package documentation file Generate populate 3 files R/: {{model}}-constructor.R {{model}}-fit.R {{model}}-predict.R","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/modeling-usethis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a modeling package — modeling-usethis","text":"","code":"create_modeling_package(path, model, fields = NULL, open = interactive()) use_modeling_deps() use_modeling_files(model)"},{"path":"https://hardhat.tidymodels.org/dev/reference/modeling-usethis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a modeling package — modeling-usethis","text":"path path. exists, used. exist, created, provided parent path exists. model string. name high level modeling function users call. example, \"linear_regression\". used populate skeleton. Spaces allowed. fields named list fields add DESCRIPTION, potentially overriding default values. See usethis::use_description() can set personalized defaults using package options. open TRUE, activates new project: RStudio desktop, package opened new session. RStudio server, current RStudio project activated. Otherwise, working directory active project changed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/modeling-usethis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a modeling package — modeling-usethis","text":"create_modeling_package() returns project path invisibly. use_modeling_deps() returns invisibly. use_modeling_files() return model invisibly.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":null,"dir":"Reference","previous_headings":"","what":"Mold data for modeling — mold","title":"Mold data for modeling — mold","text":"mold() applies appropriate processing steps required get training data ready fed model. use various blueprints understand preprocess data come various forms, formula recipe. blueprints consistent return values others, unique enough help page. Click learn use one conjunction mold(). XY Method - default_xy_blueprint() Formula Method - default_formula_blueprint() Recipes Method - default_recipe_blueprint()","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mold data for modeling — mold","text":"","code":"mold(x, ...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mold data for modeling — mold","text":"x object. See method specific implementations linked Description information. ... used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mold data for modeling — mold","text":"named list containing 4 elements: predictors: tibble containing molded predictors used model. outcomes: tibble containing molded outcomes used model. blueprint: method specific \"hardhat_blueprint\" object use making predictions. extras: Either NULL blueprint returns extra information, named list containing extra information.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/mold.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mold data for modeling — mold","text":"","code":"# See the method specific documentation linked in Description # for the details of each blueprint, and more examples. # XY mold(iris[\"Sepal.Width\"], iris$Species) #> $predictors #> # A tibble: 150 × 1 #> Sepal.Width #> #> 1 3.5 #> 2 3 #> 3 3.2 #> 4 3.1 #> 5 3.6 #> 6 3.9 #> 7 3.4 #> 8 3.4 #> 9 2.9 #> 10 3.1 #> # ℹ 140 more rows #> #> $outcomes #> # A tibble: 150 × 1 #> .outcome #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 140 more rows #> #> $blueprint #> XY blueprint: #> #> # Predictors: 1 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> #> $extras #> NULL #> # Formula mold(Species ~ Sepal.Width, iris) #> $predictors #> # A tibble: 150 × 1 #> Sepal.Width #> #> 1 3.5 #> 2 3 #> 3 3.2 #> 4 3.1 #> 5 3.6 #> 6 3.9 #> 7 3.4 #> 8 3.4 #> 9 2.9 #> 10 3.1 #> # ℹ 140 more rows #> #> $outcomes #> # A tibble: 150 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 140 more rows #> #> $blueprint #> Formula blueprint: #> #> # Predictors: 1 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> Indicators: traditional #> #> $extras #> $extras$offset #> NULL #> #> # Recipe library(recipes) mold(recipe(Species ~ Sepal.Width, iris), iris) #> $predictors #> # A tibble: 150 × 1 #> Sepal.Width #> #> 1 3.5 #> 2 3 #> 3 3.2 #> 4 3.1 #> 5 3.6 #> 6 3.9 #> 7 3.4 #> 8 3.4 #> 9 2.9 #> 10 3.1 #> # ℹ 140 more rows #> #> $outcomes #> # A tibble: 150 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 140 more rows #> #> $blueprint #> Recipe blueprint: #> #> # Predictors: 1 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> #> $extras #> $extras$roles #> NULL #> #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/new-blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a new preprocessing blueprint — new_formula_blueprint","title":"Create a new preprocessing blueprint — new_formula_blueprint","text":"base classes creating new preprocessing blueprints. blueprints inherit one created new_blueprint(), default method specific blueprints inherit three . want create processing blueprint specific method, generally subclass one method specific blueprints . want create completely new preprocessing blueprint totally new preprocessing method (.e. formula, xy, recipe method) subclass new_blueprint(). addition creating blueprint subclass, likely also need provide S3 methods run_mold() run_forge() subclass.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new-blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a new preprocessing blueprint — new_formula_blueprint","text":"","code":"new_formula_blueprint( intercept = FALSE, allow_novel_levels = FALSE, ptypes = NULL, formula = NULL, indicators = \"traditional\", composition = \"tibble\", ..., subclass = character() ) new_recipe_blueprint( intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, strings_as_factors = TRUE, composition = \"tibble\", ptypes = NULL, recipe = NULL, ..., subclass = character() ) new_xy_blueprint( intercept = FALSE, allow_novel_levels = FALSE, composition = \"tibble\", ptypes = NULL, ..., subclass = character() ) new_blueprint( intercept = FALSE, allow_novel_levels = FALSE, composition = \"tibble\", ptypes = NULL, ..., subclass = character() )"},{"path":"https://hardhat.tidymodels.org/dev/reference/new-blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a new preprocessing blueprint — new_formula_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). ptypes Either NULL, named list 2 elements, predictors outcomes, 0-row tibbles. ptypes generated automatically mold() time used validate new_data prediction time. formula Either NULL, formula specifies predictors outcomes preprocessed. argument set automatically mold() time. indicators single character string. Control factors expanded dummy variable indicator columns. One : \"traditional\" - default. Create dummy variables using traditional model.matrix() infrastructure. Generally creates K - 1 indicator columns factor, K number levels factor. \"none\" - Leave factor variables alone. expansion done. \"one_hot\" - Create dummy variables using one-hot encoding approach expands unordered factors K indicator columns, rather K - 1. composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. ... Name-value pairs additional elements blueprints subclass blueprint. subclass character vector. subclasses blueprint. fresh already trained operations re-trained prep() called? strings_as_factors character columns converted factors prep() called? recipe Either NULL, unprepped recipe. argument set automatically mold() time.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new-blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a new preprocessing blueprint — new_formula_blueprint","text":"preprocessing blueprint, list containing inputs used arguments function, along class specific type blueprint created.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new-default-blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a new default blueprint — new_default_formula_blueprint","title":"Create a new default blueprint — new_default_formula_blueprint","text":"page contains constructors default blueprints. can extended want add extra behavior top default blueprints already , generally extend non-default versions constructors found documentation new_blueprint().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new-default-blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a new default blueprint — new_default_formula_blueprint","text":"","code":"new_default_formula_blueprint( intercept = FALSE, allow_novel_levels = FALSE, ptypes = NULL, formula = NULL, indicators = \"traditional\", composition = \"tibble\", terms = list(predictors = NULL, outcomes = NULL), levels = NULL, ..., subclass = character() ) new_default_recipe_blueprint( intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, strings_as_factors = TRUE, composition = \"tibble\", ptypes = NULL, recipe = NULL, extra_role_ptypes = NULL, ..., subclass = character() ) new_default_xy_blueprint( intercept = FALSE, allow_novel_levels = FALSE, composition = \"tibble\", ptypes = NULL, ..., subclass = character() )"},{"path":"https://hardhat.tidymodels.org/dev/reference/new-default-blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a new default blueprint — new_default_formula_blueprint","text":"intercept logical. intercept included processed data? information used process function mold forge function list. allow_novel_levels logical. novel factor levels allowed prediction time? information used clean function forge function list, passed scream(). ptypes Either NULL, named list 2 elements, predictors outcomes, 0-row tibbles. ptypes generated automatically mold() time used validate new_data prediction time. formula Either NULL, formula specifies predictors outcomes preprocessed. argument set automatically mold() time. indicators single character string. Control factors expanded dummy variable indicator columns. One : \"traditional\" - default. Create dummy variables using traditional model.matrix() infrastructure. Generally creates K - 1 indicator columns factor, K number levels factor. \"none\" - Leave factor variables alone. expansion done. \"one_hot\" - Create dummy variables using one-hot encoding approach expands unordered factors K indicator columns, rather K - 1. composition Either \"tibble\", \"matrix\", \"dgCMatrix\" format processed predictors. \"matrix\" \"dgCMatrix\" chosen, predictors must numeric preprocessing method applied; otherwise error thrown. terms named list two elements, predictors outcomes. elements terms objects describe terms outcomes predictors separately. argument set automatically mold() time. levels Either NULL named list character vectors correspond levels observed converting character predictor columns factors mold(). argument set automatically mold() time. ... Name-value pairs additional elements blueprints subclass blueprint. subclass character vector. subclasses blueprint. fresh already trained operations re-trained prep() called? strings_as_factors character columns converted factors prep() called? recipe Either NULL, unprepped recipe. argument set automatically mold() time. extra_role_ptypes named list. names unique non-standard recipe roles (.e. everything except \"predictors\" \"outcomes\"). values prototypes original columns role. used validation forge().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Extend case weights — new_case_weights","title":"Extend case weights — new_case_weights","text":"new_case_weights() developer oriented function constructing new case weights type. type abstract type little functionality. , class required argument.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extend case weights — new_case_weights","text":"","code":"new_case_weights(x, ..., class)"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extend case weights — new_case_weights","text":"x integer double vector. ... Name-value pairs defining attributes class Name subclass.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extend case weights — new_case_weights","text":"new subclassed case weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_case_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extend case weights — new_case_weights","text":"","code":"new_case_weights(1:5, class = \"my_weights\") #> #> [1] 1 2 3 4 5"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct a frequency weights vector — new_frequency_weights","title":"Construct a frequency weights vector — new_frequency_weights","text":"new_frequency_weights() developer oriented function constructing new frequency weights vector. Generally, use frequency_weights() instead.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct a frequency weights vector — new_frequency_weights","text":"","code":"new_frequency_weights(x = integer(), ..., class = character())"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct a frequency weights vector — new_frequency_weights","text":"x integer vector. ... Name-value pairs defining attributes class Name subclass.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct a frequency weights vector — new_frequency_weights","text":"new frequency weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_frequency_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Construct a frequency weights vector — new_frequency_weights","text":"","code":"new_frequency_weights() #> new_frequency_weights(1:5) #> #> [1] 1 2 3 4 5"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":null,"dir":"Reference","previous_headings":"","what":"Construct an importance weights vector — new_importance_weights","title":"Construct an importance weights vector — new_importance_weights","text":"new_importance_weights() developer oriented function constructing new importance weights vector. Generally, use importance_weights() instead.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Construct an importance weights vector — new_importance_weights","text":"","code":"new_importance_weights(x = double(), ..., class = character())"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Construct an importance weights vector — new_importance_weights","text":"x double vector. ... Name-value pairs defining attributes class Name subclass.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Construct an importance weights vector — new_importance_weights","text":"new importance weights vector.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_importance_weights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Construct an importance weights vector — new_importance_weights","text":"","code":"new_importance_weights() #> new_importance_weights(c(1.5, 2.3, 10)) #> #> [1] 1.5 2.3 10.0"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":null,"dir":"Reference","previous_headings":"","what":"Constructor for a base model — new_model","title":"Constructor for a base model — new_model","text":"model scalar object, classified Advanced R. , takes uniquely named elements ... combines list class class. entire object represent single model.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Constructor for a base model — new_model","text":"","code":"new_model(..., blueprint = default_xy_blueprint(), class = character())"},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Constructor for a base model — new_model","text":"... Name-value pairs elements specific model defined class. blueprint preprocessing blueprint returned call mold(). class character vector representing class model.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Constructor for a base model — new_model","text":"new scalar model object, represented classed list named elements specified ....","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Constructor for a base model — new_model","text":"every model multiple interfaces, including formula recipes interfaces, models blueprint can process new data predict() called. easiest way generate blueprint information required prediction time use one returned call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/new_model.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Constructor for a base model — new_model","text":"","code":"new_model( custom_element = \"my-elem\", blueprint = default_xy_blueprint(), class = \"custom_model\" ) #> #> $custom_element #> [1] \"my-elem\" #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a vector containing sets of quantiles — quantile_pred","title":"Create a vector containing sets of quantiles — quantile_pred","text":"quantile_pred() special vector class used efficiently store predictions quantile regression model. requires quantile levels row predicted.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a vector containing sets of quantiles — quantile_pred","text":"","code":"quantile_pred(values, quantile_levels = double()) extract_quantile_levels(x) # S3 method for class 'quantile_pred' as_tibble(x, ..., .rows = NULL, .name_repair = \"minimal\", rownames = NULL) # S3 method for class 'quantile_pred' as.matrix(x, ...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a vector containing sets of quantiles — quantile_pred","text":"values matrix values. column correspond one quantile levels. quantile_levels vector probabilities corresponding values. x object produced quantile_pred(). ... currently used. .rows, .name_repair, rownames Arguments used required original S3 method.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a vector containing sets of quantiles — quantile_pred","text":"quantile_pred() returns vector values associated quantile levels. extract_quantile_levels() returns numeric vector levels. as_tibble() returns tibble rows \".pred_quantile\", \".quantile_levels\", \".row\". .matrix() returns unnamed matrix rows samples, columns quantile levels, entries predictions.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/quantile_pred.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a vector containing sets of quantiles — quantile_pred","text":"","code":".pred_quantile <- quantile_pred(matrix(rnorm(20), 5), c(.2, .4, .6, .8)) unclass(.pred_quantile) #> [[1]] #> [1] 0.5060559 -0.4771927 -0.1102855 0.1340882 #> #> [[2]] #> [1] -0.5747400 -0.9983864 -0.5110095 -0.4906859 #> #> [[3]] #> [1] -0.5466319 -0.7762539 -0.9111954 -0.4405479 #> #> [[4]] #> [1] -0.56445200 0.06445882 -0.83717168 0.45958944 #> #> [[5]] #> [1] -0.8900378 0.9594941 2.4158352 -0.6937202 #> #> attr(,\"quantile_levels\") #> [1] 0.2 0.4 0.6 0.8 # Access the underlying information extract_quantile_levels(.pred_quantile) #> [1] 0.2 0.4 0.6 0.8 # Matrix format as.matrix(.pred_quantile) #> [,1] [,2] [,3] [,4] #> [1,] 0.5060559 -0.47719270 -0.1102855 0.1340882 #> [2,] -0.5747400 -0.99838644 -0.5110095 -0.4906859 #> [3,] -0.5466319 -0.77625389 -0.9111954 -0.4405479 #> [4,] -0.5644520 0.06445882 -0.8371717 0.4595894 #> [5,] -0.8900378 0.95949406 2.4158352 -0.6937202 # Tidy format library(tibble) as_tibble(.pred_quantile) #> # A tibble: 20 × 3 #> .pred_quantile .quantile_levels .row #> #> 1 0.506 0.2 1 #> 2 -0.477 0.4 1 #> 3 -0.110 0.6 1 #> 4 0.134 0.8 1 #> 5 -0.575 0.2 2 #> 6 -0.998 0.4 2 #> 7 -0.511 0.6 2 #> 8 -0.491 0.8 2 #> 9 -0.547 0.2 3 #> 10 -0.776 0.4 3 #> 11 -0.911 0.6 3 #> 12 -0.441 0.8 3 #> 13 -0.564 0.2 4 #> 14 0.0645 0.4 4 #> 15 -0.837 0.6 4 #> 16 0.460 0.8 4 #> 17 -0.890 0.2 5 #> 18 0.959 0.4 5 #> 19 2.42 0.6 5 #> 20 -0.694 0.8 5"},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":null,"dir":"Reference","previous_headings":"","what":"Recompose a data frame into another form — recompose","title":"Recompose a data frame into another form — recompose","text":"recompose() takes data frame converts one : tibble data frame matrix sparse matrix (using Matrix package) internal function used hardhat recipes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Recompose a data frame into another form — recompose","text":"","code":"recompose(data, ..., composition = \"tibble\")"},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Recompose a data frame into another form — recompose","text":"data data frame. ... dots future extensions must empty. composition One : \"tibble\" convert tibble. \"data.frame\" convert base data frame. \"matrix\" convert matrix. columns must numeric. \"dgCMatrix\" convert sparse matrix. columns must numeric, Matrix package must installed.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Recompose a data frame into another form — recompose","text":"output type determined composition.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/recompose.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Recompose a data frame into another form — recompose","text":"","code":"df <- vctrs::data_frame(x = 1) recompose(df) #> # A tibble: 1 × 1 #> x #> #> 1 1 recompose(df, composition = \"matrix\") #> x #> [1,] 1 # All columns must be numeric to convert to a matrix df <- vctrs::data_frame(x = 1, y = \"a\") try(recompose(df, composition = \"matrix\")) #> Error in recompose(df, composition = \"matrix\") : #> `data` must only contain numeric columns. #> ℹ These columns aren't numeric: \"y\"."},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Refresh a preprocessing blueprint — refresh_blueprint","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"refresh_blueprint() developer facing generic function called end update_blueprint(). simply wrapper around method specific new_*_blueprint() function runs updated blueprint constructor ensure elements blueprint still valid update.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"","code":"refresh_blueprint(blueprint)"},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"blueprint preprocessing blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"blueprint returned call corresponding constructor.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"implement custom blueprint, export refresh_blueprint() method just calls constructor blueprint passes elements blueprint constructor.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/refresh_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Refresh a preprocessing blueprint — refresh_blueprint","text":"","code":"blueprint <- default_xy_blueprint() # This should never be done manually, but is essentially # what `update_blueprint(blueprint, intercept = TRUE)` does for you blueprint$intercept <- TRUE # Then update_blueprint() will call refresh_blueprint() # to ensure that the structure is correct refresh_blueprint(blueprint) #> XY blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: TRUE #> Novel Levels: FALSE #> Composition: tibble # So you can't do something like... blueprint_bad <- blueprint blueprint_bad$intercept <- 1 # ...because the constructor will catch it try(refresh_blueprint(blueprint_bad)) #> Error in new_blueprint(intercept = intercept, allow_novel_levels = allow_novel_levels, : #> `intercept` must be `TRUE` or `FALSE`, not the number 1. # And update_blueprint() catches this automatically try(update_blueprint(blueprint, intercept = 1)) #> Error in new_blueprint(intercept = intercept, allow_novel_levels = allow_novel_levels, : #> `intercept` must be `TRUE` or `FALSE`, not the number 1."},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":null,"dir":"Reference","previous_headings":"","what":"forge() according to a blueprint — run-forge","title":"forge() according to a blueprint — run-forge","text":"developer facing function used creating blueprint subclass. called forge() dispatches S3 class blueprint. gives opportunity forge new data way specific blueprint. run_forge() always called forge() arguments, unlike run_mold(), different interfaces calling forge(). run_forge() always called : run_forge(blueprint, new_data = new_data, outcomes = outcomes) write blueprint subclass new_xy_blueprint(), new_recipe_blueprint(), new_formula_blueprint(), new_blueprint(), run_forge() method signature must match .","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"forge() according to a blueprint — run-forge","text":"","code":"run_forge(blueprint, new_data, ..., outcomes = FALSE) # S3 method for class 'default_formula_blueprint' run_forge(blueprint, new_data, ..., outcomes = FALSE) # S3 method for class 'default_recipe_blueprint' run_forge(blueprint, new_data, ..., outcomes = FALSE) # S3 method for class 'default_xy_blueprint' run_forge(blueprint, new_data, ..., outcomes = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"forge() according to a blueprint — run-forge","text":"blueprint preprocessing blueprint. new_data data frame matrix predictors process. outcomes = TRUE, also contain outcomes process. ... used. outcomes logical. outcomes processed returned well?","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"forge() according to a blueprint — run-forge","text":"run_forge() methods return object immediately returned forge(). See return value section forge() understand structure return value look like.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-forge.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"forge() according to a blueprint — run-forge","text":"","code":"bp <- default_xy_blueprint() outcomes <- mtcars[\"mpg\"] predictors <- mtcars predictors$mpg <- NULL mold <- run_mold(bp, x = predictors, y = outcomes) run_forge(mold$blueprint, new_data = predictors) #> $predictors #> # A tibble: 32 × 10 #> cyl disp hp drat wt qsec vs am gear carb #> #> 1 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> #> $outcomes #> NULL #> #> $extras #> NULL #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":null,"dir":"Reference","previous_headings":"","what":"mold() according to a blueprint — run-mold","title":"mold() according to a blueprint — run-mold","text":"developer facing function used creating blueprint subclass. called mold() dispatches S3 class blueprint. gives opportunity mold data way specific blueprint. run_mold() called different arguments depending interface mold() used: XY interface: run_mold(blueprint, x = x, y = y) Formula interface: run_mold(blueprint, data = data) Additionally, blueprint updated contain formula. Recipe interface: run_mold(blueprint, data = data) Additionally, blueprint updated contain recipe. write blueprint subclass new_xy_blueprint(), new_recipe_blueprint(), new_formula_blueprint() run_mold() method signature must match whichever interface listed used. write completely new blueprint inheriting new_blueprint() write new mold() method (using xy, formula, recipe interface), full control run_mold() called.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"mold() according to a blueprint — run-mold","text":"","code":"run_mold(blueprint, ...) # S3 method for class 'default_formula_blueprint' run_mold(blueprint, ..., data) # S3 method for class 'default_recipe_blueprint' run_mold(blueprint, ..., data) # S3 method for class 'default_xy_blueprint' run_mold(blueprint, ..., x, y)"},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"mold() according to a blueprint — run-mold","text":"blueprint preprocessing blueprint. ... used. Required extensibility. data data frame matrix containing outcomes predictors. x data frame matrix containing predictors. y data frame, matrix, vector containing outcomes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"mold() according to a blueprint — run-mold","text":"run_mold() methods return object immediately returned mold(). See return value section mold() understand structure return value look like.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/run-mold.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"mold() according to a blueprint — run-mold","text":"","code":"bp <- default_xy_blueprint() outcomes <- mtcars[\"mpg\"] predictors <- mtcars predictors$mpg <- NULL run_mold(bp, x = predictors, y = outcomes) #> $predictors #> # A tibble: 32 × 10 #> cyl disp hp drat wt qsec vs am gear carb #> #> 1 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> #> $outcomes #> # A tibble: 32 × 1 #> mpg #> #> 1 21 #> 2 21 #> 3 22.8 #> 4 21.4 #> 5 18.7 #> 6 18.1 #> 7 14.3 #> 8 24.4 #> 9 22.8 #> 10 19.2 #> # ℹ 22 more rows #> #> $blueprint #> XY blueprint: #> #> # Predictors: 10 #> # Outcomes: 1 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble #> #> $extras #> NULL #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":null,"dir":"Reference","previous_headings":"","what":"Scream — scream","title":"Scream — scream","text":"scream() ensures structure data prototype, ptype. hood, vctrs::vec_cast() used, casts column data type corresponding column ptype. casting enforces number important structural checks, including limited : Data Classes - Checks class column data corresponding column ptype. Novel Levels - Checks factor columns data new levels compared ptype columns. new levels, warning issued coerced NA. check optional, can turned allow_novel_levels = TRUE. Level Recovery - Checks factor columns data missing factor levels compared ptype columns. missing levels, restored.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Scream — scream","text":"","code":"scream(data, ptype, allow_novel_levels = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Scream — scream","text":"data data frame containing new data check structure . ptype data frame prototype cast data . commonly 0-row slice training set. allow_novel_levels novel factor levels data allowed? safest approach default, throws warning novel levels found, coerces NA values. Setting argument TRUE ignore novel levels. argument apply ordered factors. Novel levels allowed ordered factors level ordering critical part type.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Scream — scream","text":"tibble containing required columns required structural modifications made.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Scream — scream","text":"scream() called forge() shrink() actual processing done. Generally, need call scream() directly, forge() . scream() used standalone function, good practice call shrink() right checks scream() ensure required column names actually exist data. checks exist shrink().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"factor-levels","dir":"Reference","previous_headings":"","what":"Factor Levels","title":"Scream — scream","text":"scream() tries helpful recovering missing factor levels warning novel levels. following graphic outlines scream() handles factor levels coercing column data column ptype. Note ordered factor handing much stricter factor handling. Ordered factors data must exactly levels ordered factors ptype.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/scream.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Scream — scream","text":"","code":"# --------------------------------------------------------------------------- # Setup train <- iris[1:100, ] test <- iris[101:150, ] # mold() is run at model fit time # and a formula preprocessing blueprint is recorded x <- mold(log(Sepal.Width) ~ Species, train) # Inside the result of mold() are the prototype tibbles # for the predictors and the outcomes ptype_pred <- x$blueprint$ptypes$predictors ptype_out <- x$blueprint$ptypes$outcomes # --------------------------------------------------------------------------- # shrink() / scream() # Pass the test data, along with a prototype, to # shrink() to extract the prototype columns test_shrunk <- shrink(test, ptype_pred) # Now pass that to scream() to perform validation checks # If no warnings / errors are thrown, the checks were # successful! scream(test_shrunk, ptype_pred) #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows # --------------------------------------------------------------------------- # Outcomes # To also extract the outcomes, use the outcome prototype test_outcome <- shrink(test, ptype_out) scream(test_outcome, ptype_out) #> # A tibble: 50 × 1 #> Sepal.Width #> #> 1 3.3 #> 2 2.7 #> 3 3 #> 4 2.9 #> 5 3 #> 6 3 #> 7 2.5 #> 8 2.9 #> 9 2.5 #> 10 3.6 #> # ℹ 40 more rows # --------------------------------------------------------------------------- # Casting # scream() uses vctrs::vec_cast() to intelligently convert # new data to the prototype automatically. This means # it can automatically perform certain conversions, like # coercing character columns to factors. test2 <- test test2$Species <- as.character(test2$Species) test2_shrunk <- shrink(test2, ptype_pred) scream(test2_shrunk, ptype_pred) #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows # It can also recover missing factor levels. # For example, it is plausible that the test data only had the # \"virginica\" level test3 <- test test3$Species <- factor(test3$Species, levels = \"virginica\") test3_shrunk <- shrink(test3, ptype_pred) test3_fixed <- scream(test3_shrunk, ptype_pred) # scream() recovered the missing levels levels(test3_fixed$Species) #> [1] \"setosa\" \"versicolor\" \"virginica\" # --------------------------------------------------------------------------- # Novel levels # When novel levels with any data are present in `data`, the default # is to coerce them to `NA` values with a warning. test4 <- test test4$Species <- as.character(test4$Species) test4$Species[1] <- \"new_level\" test4$Species <- factor( test4$Species, levels = c(levels(test$Species), \"new_level\") ) test4 <- shrink(test4, ptype_pred) # Warning is thrown test4_removed <- scream(test4, ptype_pred) #> Warning: Novel levels found in column 'Species': 'new_level'. The levels have been removed, and values have been coerced to 'NA'. # Novel level is removed levels(test4_removed$Species) #> [1] \"setosa\" \"versicolor\" \"virginica\" # No warning is thrown test4_kept <- scream(test4, ptype_pred, allow_novel_levels = TRUE) # Novel level is kept levels(test4_kept$Species) #> [1] \"setosa\" \"versicolor\" \"virginica\" \"new_level\""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":null,"dir":"Reference","previous_headings":"","what":"Subset only required columns — shrink","title":"Subset only required columns — shrink","text":"shrink() subsets data contain required columns specified prototype, ptype.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Subset only required columns — shrink","text":"","code":"shrink(data, ptype)"},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Subset only required columns — shrink","text":"data data frame containing data subset. ptype data frame prototype containing required columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Subset only required columns — shrink","text":"tibble containing required columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Subset only required columns — shrink","text":"shrink() called forge() scream() actual processing done.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/shrink.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Subset only required columns — shrink","text":"","code":"# --------------------------------------------------------------------------- # Setup train <- iris[1:100, ] test <- iris[101:150, ] # --------------------------------------------------------------------------- # shrink() # mold() is run at model fit time # and a formula preprocessing blueprint is recorded x <- mold(log(Sepal.Width) ~ Species, train) # Inside the result of mold() are the prototype tibbles # for the predictors and the outcomes ptype_pred <- x$blueprint$ptypes$predictors ptype_out <- x$blueprint$ptypes$outcomes # Pass the test data, along with a prototype, to # shrink() to extract the prototype columns shrink(test, ptype_pred) #> # A tibble: 50 × 1 #> Species #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows # To extract the outcomes, just use the # outcome prototype shrink(test, ptype_out) #> # A tibble: 50 × 1 #> Sepal.Width #> #> 1 3.3 #> 2 2.7 #> 3 3 #> 4 2.9 #> 5 3 #> 6 3 #> 7 2.5 #> 8 2.9 #> 9 2.5 #> 10 3.6 #> # ℹ 40 more rows # shrink() makes sure that the columns # required by `ptype` actually exist in the data # and errors nicely when they don't test2 <- subset(test, select = -Species) try(shrink(test2, ptype_pred)) #> Error in validate_column_names(data, cols) : #> The following required columns are missing: 'Species'."},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":null,"dir":"Reference","previous_headings":"","what":"Spruce up multi-outcome predictions — spruce-multiple","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"family spruce_*_multiple() functions converts multi-outcome predictions standardized format. generally called prediction implementation function specific type prediction return.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"","code":"spruce_numeric_multiple(...) spruce_class_multiple(...) spruce_prob_multiple(...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"... Multiple vectors predictions: spruce_numeric_multiple(), numeric vectors equal size. spruce_class_multiple(), factors \"hard\" class predictions equal size. spruce_prob_multiple(), tibbles equal size, result calling spruce_prob() matrix prediction probabilities. ... named, name used suffix resulting column name, otherwise positional index used.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"spruce_numeric_multiple(), tibble numeric columns named pattern .pred_*. spruce_class_multiple(), tibble factor columns named pattern .pred_class_*. spruce_prob_multiple(), tibble data frame columns named pattern .pred_*.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce-multiple.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Spruce up multi-outcome predictions — spruce-multiple","text":"","code":"spruce_numeric_multiple(1:3, foo = 2:4) #> # A tibble: 3 × 2 #> .pred_1 .pred_foo #> #> 1 1 2 #> 2 2 3 #> 3 3 4 spruce_class_multiple( one_step = factor(c(\"a\", \"b\", \"c\")), two_step = factor(c(\"a\", \"c\", \"c\")) ) #> # A tibble: 3 × 2 #> .pred_class_one_step .pred_class_two_step #> #> 1 a a #> 2 b c #> 3 c c one_step <- matrix(c(.3, .7, .0, .1, .3, .6), nrow = 2, byrow = TRUE) two_step <- matrix(c(.2, .7, .1, .2, .4, .4), nrow = 2, byrow = TRUE) binary <- matrix(c(.5, .5, .4, .6), nrow = 2, byrow = TRUE) spruce_prob_multiple( one_step = spruce_prob(c(\"a\", \"b\", \"c\"), one_step), two_step = spruce_prob(c(\"a\", \"b\", \"c\"), two_step), binary = spruce_prob(c(\"yes\", \"no\"), binary) ) #> # A tibble: 2 × 3 #> .pred_one_step$.pred_a .pred_two_step$.pred_a .pred_binary$.pred_yes #> #> 1 0.3 0.2 0.5 #> 2 0.1 0.2 0.4 #> # ℹ 5 more variables: .pred_one_step$.pred_b , $.pred_c , #> # .pred_two_step$.pred_b , $.pred_c , #> # .pred_binary$.pred_no "},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":null,"dir":"Reference","previous_headings":"","what":"Spruce up predictions — spruce","title":"Spruce up predictions — spruce","text":"family spruce_*() functions convert predictions standardized format. generally called prediction implementation function specific type prediction return.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Spruce up predictions — spruce","text":"","code":"spruce_numeric(pred) spruce_class(pred_class) spruce_prob(pred_levels, prob_matrix)"},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Spruce up predictions — spruce","text":"pred (type = \"numeric\") numeric vector predictions. pred_class (type = \"class\") factor \"hard\" class predictions. pred_levels, prob_matrix (type = \"prob\") pred_levels character vector original levels outcome used training. prob_matrix numeric matrix class probabilities many columns levels pred_levels, order.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Spruce up predictions — spruce","text":"tibble, ideally number rows new_data passed predict(). column names number columns vary based function used, standardized.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/spruce.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Spruce up predictions — spruce","text":"running spruce_*() function, always use validation function validate_prediction_size() ensure number rows returned number rows input (new_data).","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":null,"dir":"Reference","previous_headings":"","what":"Standardize the outcome — standardize","title":"Standardize the outcome — standardize","text":"time, input model flexible enough capture number different input types user. standardize() focuses capturing flexibility outcome.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Standardize the outcome — standardize","text":"","code":"standardize(y)"},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Standardize the outcome — standardize","text":"y outcome. can : factor vector numeric vector 1D numeric array numeric matrix column names 2D numeric array column names data frame numeric factor columns","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Standardize the outcome — standardize","text":"possible values y transformed tibble standardization. Vectors transformed tibble single column named \".outcome\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Standardize the outcome — standardize","text":"standardize() called mold() using XY interface (.e. y argument supplied).","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/standardize.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Standardize the outcome — standardize","text":"","code":"standardize(1:5) #> # A tibble: 5 × 1 #> .outcome #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 standardize(factor(letters[1:5])) #> # A tibble: 5 × 1 #> .outcome #> #> 1 a #> 2 b #> 3 c #> 4 d #> 5 e mat <- matrix(1:10, ncol = 2) colnames(mat) <- c(\"a\", \"b\") standardize(mat) #> # A tibble: 5 × 2 #> a b #> #> 1 1 6 #> 2 2 7 #> 3 3 8 #> 4 4 9 #> 5 5 10 df <- data.frame(x = 1:5, y = 6:10) standardize(df) #> # A tibble: 5 × 2 #> x y #> #> 1 1 6 #> 2 2 7 #> 3 3 8 #> 4 4 9 #> 5 5 10"},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":null,"dir":"Reference","previous_headings":"","what":"Mark arguments for tuning — tune","title":"Mark arguments for tuning — tune","text":"tune() argument placeholder used recipes, parsnip, tune packages. marks recipes step parsnip model arguments tuning.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mark arguments for tuning — tune","text":"","code":"tune(id = \"\")"},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mark arguments for tuning — tune","text":"id single character value can used differentiate parameters used multiple places name, user wants add note specified parameter.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mark arguments for tuning — tune","text":"call object echos user's input.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/tune.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mark arguments for tuning — tune","text":"","code":"tune() #> tune() tune(\"your name here\") #> tune(\"your name here\") # In practice, `tune()` is used alongside recipes or parsnip to mark # specific arguments for tuning library(recipes) recipe(mpg ~ ., data = mtcars) %>% step_normalize(all_numeric_predictors()) %>% step_pca(all_numeric_predictors, num_comp = tune()) #> #> ── Recipe ──────────────────────────────────────────────────────────────── #> #> ── Inputs #> Number of variables by role #> outcome: 1 #> predictor: 10 #> #> ── Operations #> • Centering and scaling for: all_numeric_predictors() #> • PCA extraction with: all_numeric_predictors"},{"path":"https://hardhat.tidymodels.org/dev/reference/update_blueprint.html","id":null,"dir":"Reference","previous_headings":"","what":"Update a preprocessing blueprint — update_blueprint","title":"Update a preprocessing blueprint — update_blueprint","text":"update_blueprint() correct way alter elements existing blueprint object. two benefits just blueprint$elem <- new_elem. name updating must already exist blueprint. prevents accidentally updating non-existent elements. constructor blueprint automatically run update refresh_blueprint() ensure blueprint still valid.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/update_blueprint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update a preprocessing blueprint — update_blueprint","text":"","code":"update_blueprint(blueprint, ...)"},{"path":"https://hardhat.tidymodels.org/dev/reference/update_blueprint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update a preprocessing blueprint — update_blueprint","text":"blueprint preprocessing blueprint. ... Name-value pairs existing elements blueprint updated.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/update_blueprint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Update a preprocessing blueprint — update_blueprint","text":"","code":"blueprint <- default_xy_blueprint() # `intercept` defaults to FALSE blueprint #> XY blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: FALSE #> Novel Levels: FALSE #> Composition: tibble update_blueprint(blueprint, intercept = TRUE) #> XY blueprint: #> #> # Predictors: 0 #> # Outcomes: 0 #> Intercept: TRUE #> Novel Levels: FALSE #> Composition: tibble # Can't update non-existent elements try(update_blueprint(blueprint, intercpt = TRUE)) #> Error in update_blueprint(blueprint, intercpt = TRUE) : #> All elements of `...` must already exist. #> ℹ The following fields are new: \"intercpt\". # Can't add non-valid elements try(update_blueprint(blueprint, intercept = 1)) #> Error in new_blueprint(intercept = intercept, allow_novel_levels = allow_novel_levels, : #> `intercept` must be `TRUE` or `FALSE`, not the number 1."},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that data contains required column names — validate_column_names","title":"Ensure that data contains required column names — validate_column_names","text":"validate - asserts following: column names data must contain original_names. check - returns following: ok logical. check pass? missing_names character vector. missing column names.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that data contains required column names — validate_column_names","text":"","code":"validate_column_names(data, original_names) check_column_names(data, original_names)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that data contains required column names — validate_column_names","text":"data data frame check. original_names character vector. original column names.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that data contains required column names — validate_column_names","text":"validate_column_names() returns data invisibly. check_column_names() returns named list two components, ok, missing_names.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that data contains required column names — validate_column_names","text":"special error thrown missing column named \".outcome\". happens case mold() called using xy-method, vector y value supplied rather data frame matrix. case, y coerced data frame, automatic name \".outcome\" added, looked forge(). happens, user tries request outcomes using forge(..., outcomes = TRUE) supplied new_data contain required \".outcome\" column, special error thrown telling . See examples!","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that data contains required column names — validate_column_names","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_column_names.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that data contains required column names — validate_column_names","text":"","code":"# --------------------------------------------------------------------------- original_names <- colnames(mtcars) test <- mtcars bad_test <- test[, -c(3, 4)] # All good check_column_names(test, original_names) #> $ok #> [1] TRUE #> #> $missing_names #> character(0) #> # Missing 2 columns check_column_names(bad_test, original_names) #> $ok #> [1] FALSE #> #> $missing_names #> [1] \"disp\" \"hp\" #> # Will error try(validate_column_names(bad_test, original_names)) #> Error in validate_column_names(bad_test, original_names) : #> The following required columns are missing: 'disp', 'hp'. # --------------------------------------------------------------------------- # Special error when `.outcome` is missing train <- iris[1:100, ] test <- iris[101:150, ] train_x <- subset(train, select = -Species) train_y <- train$Species # Here, y is a vector processed <- mold(train_x, train_y) # So the default column name is `\".outcome\"` processed$outcomes #> # A tibble: 100 × 1 #> .outcome #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 90 more rows # It doesn't affect forge() normally forge(test, processed$blueprint) #> $predictors #> # A tibble: 50 × 4 #> Sepal.Length Sepal.Width Petal.Length Petal.Width #> #> 1 6.3 3.3 6 2.5 #> 2 5.8 2.7 5.1 1.9 #> 3 7.1 3 5.9 2.1 #> 4 6.3 2.9 5.6 1.8 #> 5 6.5 3 5.8 2.2 #> 6 7.6 3 6.6 2.1 #> 7 4.9 2.5 4.5 1.7 #> 8 7.3 2.9 6.3 1.8 #> 9 6.7 2.5 5.8 1.8 #> 10 7.2 3.6 6.1 2.5 #> # ℹ 40 more rows #> #> $outcomes #> NULL #> #> $extras #> NULL #> # But if the outcome is requested, and `\".outcome\"` # is not present in `new_data`, an error is thrown # with very specific instructions try(forge(test, processed$blueprint, outcomes = TRUE)) #> Error in validate_missing_name_isnt_.outcome(check$missing_names) : #> The following required columns are missing: '.outcome'. #> #> (This indicates that `mold()` was called with a vector for `y`. When this is the case, and the outcome columns are requested in `forge()`, `new_data` must include a column with the automatically generated name, '.outcome', containing the outcome.) # To get this to work, just create an .outcome column in new_data test$.outcome <- test$Species forge(test, processed$blueprint, outcomes = TRUE) #> $predictors #> # A tibble: 50 × 4 #> Sepal.Length Sepal.Width Petal.Length Petal.Width #> #> 1 6.3 3.3 6 2.5 #> 2 5.8 2.7 5.1 1.9 #> 3 7.1 3 5.9 2.1 #> 4 6.3 2.9 5.6 1.8 #> 5 6.5 3 5.8 2.2 #> 6 7.6 3 6.6 2.1 #> 7 4.9 2.5 4.5 1.7 #> 8 7.3 2.9 6.3 1.8 #> 9 6.7 2.5 5.8 1.8 #> 10 7.2 3.6 6.1 2.5 #> # ℹ 40 more rows #> #> $outcomes #> # A tibble: 50 × 1 #> .outcome #> #> 1 virginica #> 2 virginica #> 3 virginica #> 4 virginica #> 5 virginica #> 6 virginica #> 7 virginica #> 8 virginica #> 9 virginica #> 10 virginica #> # ℹ 40 more rows #> #> $extras #> NULL #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"validate - asserts following: formula must duplicates terms left right hand side formula. check - returns following: ok logical. check pass? duplicates character vector. duplicate terms.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"","code":"validate_no_formula_duplication(formula, original = FALSE) check_no_formula_duplication(formula, original = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"formula formula check. original logical. original names checked, names processing used? FALSE, y ~ log(y) allowed names \"y\" \"log(y)\", TRUE, y ~ log(y) allowed original names \"y\".","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"validate_no_formula_duplication() returns formula invisibly. check_no_formula_duplication() returns named list two components, ok duplicates.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_no_formula_duplication.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure no duplicate terms appear in formula — validate_no_formula_duplication","text":"","code":"# All good check_no_formula_duplication(y ~ x) #> $ok #> [1] TRUE #> #> $duplicates #> character(0) #> # Not good! check_no_formula_duplication(y ~ y) #> $ok #> [1] FALSE #> #> $duplicates #> [1] \"y\" #> # This is generally okay check_no_formula_duplication(y ~ log(y)) #> $ok #> [1] TRUE #> #> $duplicates #> character(0) #> # But you can be more strict check_no_formula_duplication(y ~ log(y), original = TRUE) #> $ok #> [1] FALSE #> #> $duplicates #> [1] \"y\" #> # This would throw an error try(validate_no_formula_duplication(log(y) ~ log(y))) #> Error in validate_no_formula_duplication(log(y) ~ log(y)) : #> The following terms are duplicated on the left and right hand side of the `formula`: 'log(y)'."},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"validate - asserts following: outcomes must binary factor columns. check - returns following: ok logical. check pass? bad_cols character vector. names columns problems. num_levels integer vector. actual number levels columns problems.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"","code":"validate_outcomes_are_binary(outcomes) check_outcomes_are_binary(outcomes)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"outcomes object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"validate_outcomes_are_binary() returns outcomes invisibly. check_outcomes_are_binary() returns named list three components, ok, bad_cols, num_levels.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"expected way use validation function supply $outcomes element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_binary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that the outcome has binary factors — validate_outcomes_are_binary","text":"","code":"# Not a binary factor. 0 levels check_outcomes_are_binary(data.frame(x = 1)) #> $ok #> [1] FALSE #> #> $bad_cols #> [1] \"x\" #> #> $num_levels #> [1] 0 #> # Not a binary factor. 1 level check_outcomes_are_binary(data.frame(x = factor(\"A\"))) #> $ok #> [1] FALSE #> #> $bad_cols #> [1] \"x\" #> #> $num_levels #> [1] 1 #> # All good check_outcomes_are_binary(data.frame(x = factor(c(\"A\", \"B\")))) #> $ok #> [1] TRUE #> #> $bad_cols #> character(0) #> #> $num_levels #> integer(0) #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"validate - asserts following: outcomes must factor columns. check - returns following: ok logical. check pass? bad_classes named list. names names problematic columns, values classes matching column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"","code":"validate_outcomes_are_factors(outcomes) check_outcomes_are_factors(outcomes)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"outcomes object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"validate_outcomes_are_factors() returns outcomes invisibly. check_outcomes_are_factors() returns named list two components, ok bad_classes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"expected way use validation function supply $outcomes element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_factors.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that the outcome has only factor columns — validate_outcomes_are_factors","text":"","code":"# Not a factor column. check_outcomes_are_factors(data.frame(x = 1)) #> $ok #> [1] FALSE #> #> $bad_classes #> $bad_classes$x #> [1] \"numeric\" #> #> # All good check_outcomes_are_factors(data.frame(x = factor(c(\"A\", \"B\")))) #> $ok #> [1] TRUE #> #> $bad_classes #> list() #>"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"validate - asserts following: outcomes must numeric columns. check - returns following: ok logical. check pass? bad_classes named list. names names problematic columns, values classes matching column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"","code":"validate_outcomes_are_numeric(outcomes) check_outcomes_are_numeric(outcomes)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"outcomes object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"validate_outcomes_are_numeric() returns outcomes invisibly. check_outcomes_are_numeric() returns named list two components, ok bad_classes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"expected way use validation function supply $outcomes element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_numeric.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure outcomes are all numeric — validate_outcomes_are_numeric","text":"","code":"# All good check_outcomes_are_numeric(mtcars) #> $ok #> [1] TRUE #> #> $bad_classes #> list() #> # Species is not numeric check_outcomes_are_numeric(iris) #> $ok #> [1] FALSE #> #> $bad_classes #> $bad_classes$Species #> [1] \"factor\" #> #> # This gives an intelligent error message try(validate_outcomes_are_numeric(iris)) #> Error in validate_outcomes_are_numeric(iris) : #> All outcomes must be numeric, but the following are not: #> 'Species': 'factor'"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"validate - asserts following: outcomes must 1 column. Atomic vectors treated 1 column matrices. check - returns following: ok logical. check pass? n_cols single numeric. actual number columns.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"","code":"validate_outcomes_are_univariate(outcomes) check_outcomes_are_univariate(outcomes)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"outcomes object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"validate_outcomes_are_univariate() returns outcomes invisibly. check_outcomes_are_univariate() returns named list two components, ok n_cols.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"expected way use validation function supply $outcomes element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_outcomes_are_univariate.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that the outcome is univariate — validate_outcomes_are_univariate","text":"","code":"validate_outcomes_are_univariate(data.frame(x = 1)) try(validate_outcomes_are_univariate(mtcars)) #> Error in validate_outcomes_are_univariate(mtcars) : #> The outcome must be univariate, but 11 columns were found."},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure that predictions have the correct number of rows — validate_prediction_size","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"validate - asserts following: size pred must size new_data. check - returns following: ok logical. check pass? size_new_data single numeric. size new_data. size_pred single numeric. size pred.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"","code":"validate_prediction_size(pred, new_data) check_prediction_size(pred, new_data)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"pred tibble. predictions return prediction type. often created using one spruce functions, like spruce_numeric(). new_data data frame new predictors possibly outcomes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"validate_prediction_size() returns pred invisibly. check_prediction_size() returns named list three components, ok, size_new_data, size_pred.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"validation function one developer focused rather user focused. final check used right value returned specific predict() method, mainly \"good practice\" sanity check ensure prediction blueprint always returns number rows new_data, one modeling conventions package tries promote.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_prediction_size.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure that predictions have the correct number of rows — validate_prediction_size","text":"","code":"# Say new_data has 5 rows new_data <- mtcars[1:5, ] # And somehow you generate predictions # for those 5 rows pred_vec <- 1:5 # Then you use `spruce_numeric()` to clean # up these numeric predictions pred <- spruce_numeric(pred_vec) pred #> # A tibble: 5 × 1 #> .pred #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 # Use this check to ensure that # the number of rows or pred match new_data check_prediction_size(pred, new_data) #> $ok #> [1] TRUE #> #> $size_new_data #> [1] 5 #> #> $size_pred #> [1] 5 #> # An informative error message is thrown # if the rows are different try(validate_prediction_size(spruce_numeric(1:4), new_data)) #> Error in validate_prediction_size(spruce_numeric(1:4), new_data) : #> The size of `new_data` (5) must match the size of `pred` (4)."},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":null,"dir":"Reference","previous_headings":"","what":"Ensure predictors are all numeric — validate_predictors_are_numeric","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"validate - asserts following: predictors must numeric columns. check - returns following: ok logical. check pass? bad_classes named list. names names problematic columns, values classes matching column.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"","code":"validate_predictors_are_numeric(predictors) check_predictors_are_numeric(predictors)"},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"predictors object check.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"validate_predictors_are_numeric() returns predictors invisibly. check_predictors_are_numeric() returns named list two components, ok, bad_classes.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"expected way use validation function supply $predictors element result call mold().","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"validation","dir":"Reference","previous_headings":"","what":"Validation","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"hardhat provides validation functions two levels. check_*(): check condition, return list. list always contains least one element, ok, logical specifies check passed. check also check specific elements returned list can used construct meaningful error messages. validate_*(): check condition, error pass. functions call corresponding check function, provide default error message. , developer, want different error message, call check_*() function , provide validation function.","code":""},{"path":[]},{"path":"https://hardhat.tidymodels.org/dev/reference/validate_predictors_are_numeric.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ensure predictors are all numeric — validate_predictors_are_numeric","text":"","code":"# All good check_predictors_are_numeric(mtcars) #> $ok #> [1] TRUE #> #> $bad_classes #> list() #> # Species is not numeric check_predictors_are_numeric(iris) #> $ok #> [1] FALSE #> #> $bad_classes #> $bad_classes$Species #> [1] \"factor\" #> #> # This gives an intelligent error message try(validate_predictors_are_numeric(iris)) #> Error in validate_predictors_are_numeric(iris) : #> All predictors must be numeric, but the following are not: #> 'Species': 'factor'"},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":null,"dir":"Reference","previous_headings":"","what":"Weighted table — weighted_table","title":"Weighted table — weighted_table","text":"weighted_table() computes weighted contingency table based factors provided ... double vector weights provided weights. can seen weighted extension base::table() alternative stats::xtabs(). weighted_table() always uses exact set levels returned levels() constructing table. results following properties: Missing values found factors never included table unless explicit NA factor level. needed, can added factor base::addNA() forcats::fct_expand(x, NA). Levels found factors actually used underlying data included table value 0. needed, can drop unused factor levels re-running factor factor(), calling forcats::fct_drop(). See examples section information properties.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Weighted table — weighted_table","text":"","code":"weighted_table(..., weights, na_remove = FALSE)"},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Weighted table — weighted_table","text":"... Factors equal length use weighted table. ... named, names propagate onto \"dimnames names\" resulting table. least one factor must provided. weights double vector weights used fill cells weighted table. must length factors provided .... na_remove single TRUE FALSE handling whether missing values weights removed summing weights.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Weighted table — weighted_table","text":"weighted table array double values.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Weighted table — weighted_table","text":"result weighted_table() \"table\" class attached . double array. \"table\" objects defined containing integer counts, weighted tables can utilize fractional weights.","code":""},{"path":"https://hardhat.tidymodels.org/dev/reference/weighted_table.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Weighted table — weighted_table","text":"","code":"x <- factor(c(\"x\", \"y\", \"z\", \"x\", \"x\", \"y\")) y <- factor(c(\"a\", \"b\", \"a\", \"a\", \"b\", \"b\")) w <- c(1.5, 2, 1.1, .5, 3, 2) weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 2.0 3 #> y 0.0 4 #> z 1.1 0 # --------------------------------------------------------------------------- # If `weights` contains missing values, then missing values will be # propagated into the weighted table x <- factor(c(\"x\", \"y\", \"y\")) y <- factor(c(\"a\", \"b\", \"b\")) w <- c(1, NA, 3) weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 1 0 #> y 0 NA # You can remove the missing values while summing up the weights with # `na_remove = TRUE` weighted_table(x = x, y = y, weights = w, na_remove = TRUE) #> y #> x a b #> x 1 0 #> y 0 3 # --------------------------------------------------------------------------- # If there are missing values in the factors, those typically don't show # up in the weighted table x <- factor(c(\"x\", NA, \"y\", \"x\")) y <- factor(c(\"a\", \"b\", \"a\", NA)) w <- 1:4 weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 1 0 #> y 3 0 # This is because the missing values aren't considered explicit levels levels(x) #> [1] \"x\" \"y\" # You can force them to show up in the table by using `addNA()` ahead of time # (or `forcats::fct_expand(x, NA)`) x <- addNA(x, ifany = TRUE) y <- addNA(y, ifany = TRUE) levels(x) #> [1] \"x\" \"y\" NA weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 1 0 4 #> y 3 0 0 #> 0 2 0 # --------------------------------------------------------------------------- # If there are levels in your factors that aren't actually used in the # underlying data, then they will still show up in the table with a `0` value x <- factor(c(\"x\", \"y\", \"x\"), levels = c(\"x\", \"y\", \"z\")) y <- factor(c(\"a\", \"b\", \"a\"), levels = c(\"a\", \"b\", \"c\")) w <- 1:3 weighted_table(x = x, y = y, weights = w) #> y #> x a b c #> x 4 0 0 #> y 0 2 0 #> z 0 0 0 # If you want to drop these empty factor levels from the result, you can # rerun `factor()` ahead of time to drop them (or `forcats::fct_drop()`) x <- factor(x) y <- factor(y) levels(x) #> [1] \"x\" \"y\" weighted_table(x = x, y = y, weights = w) #> y #> x a b #> x 4 0 #> y 0 2"},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-development-version","dir":"Changelog","previous_headings":"","what":"hardhat (development version)","title":"hardhat (development version)","text":"Added new vector class called quantile_pred() house predictions made quantile regression model (tidymodels/parsnip#1191, @dajmcdon).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-140","dir":"Changelog","previous_headings":"","what":"hardhat 1.4.0","title":"hardhat 1.4.0","text":"CRAN release: 2024-06-02 Added extract_postprocessor() generic (#247). Added extract_fit_time() generic (#218).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-131","dir":"Changelog","previous_headings":"","what":"hardhat 1.3.1","title":"hardhat 1.3.1","text":"CRAN release: 2024-02-02 Changed Rd name modeling-package -> modeling-usethis request CRAN.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-130","dir":"Changelog","previous_headings":"","what":"hardhat 1.3.0","title":"hardhat 1.3.0","text":"CRAN release: 2023-03-30 New family spruce_*_multiple() functions support standardizing multi-outcome predictions (#223, contributions @cregouby). New fct_encode_one_hot() encodes factor one-hot indicator matrix (#215). default_recipe_blueprint() gained strings_as_factors argument, passed recipes::prep() (#212). Using formula blueprint indicators = \"none\" character predictors now works properly provide character column contains single value (#213). Using formula blueprint indicators = \"traditional\" indicators = \"one_hot\" character predictors now properly enforces factor levels generated predictors new_data forge() (#213). Using formula blueprint indicators = \"none\" now works correctly variable formula space name (#217). mold() forge() generally less overhead (#235, #236). Added documentation importance frequency weights ?importance_weights() ?frequency_weights() (#214). New internal recompose() helper (#220).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-120","dir":"Changelog","previous_headings":"","what":"hardhat 1.2.0","title":"hardhat 1.2.0","text":"CRAN release: 2022-06-30 reverted change made hardhat 1.0.0 caused recipe preprocessors drop non-standard roles default calling forge(). Determining roles required bake() time really something controlled within recipes, hardhat. results following changes (#207): new argument, bake_dependent_roles, added default_recipe_blueprint() 1.0.0 removed. longer needed new behavior. default, forge() pass columns new_data bake() except roles \"outcome\" \"case_weights\". outcomes = TRUE, also pass \"outcome\" role. essentially pre-1.0.0 behavior, means , default, non-standard roles required bake() time. assumption now also enforced recipes 1.0.0, even aren’t using hardhat workflow. development version recipes, become recipes 1.0.0, new update_role_requirements() function can used declare role required bake() time. hardhat now knows respect feature, forge() won’t pass columns new_data bake() roles aren’t required bake() time.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-110","dir":"Changelog","previous_headings":"","what":"hardhat 1.1.0","title":"hardhat 1.1.0","text":"CRAN release: 2022-06-10 Fixed bug results calling mold() using hardhat < 1.0.0 longer compatible calling forge() hardhat >= 1.0.0. occur save workflow object fitting , load R session uses newer version hardhat (#200). Internal details related blueprints work alongside mold() forge() heavily re-factored support fix #200. changes mostly internal developer focused. include: Blueprints longer store clean/process functions used calling mold() forge(). stored blueprint$mold$clean(), blueprint$mold$process(), blueprint$forge$clean(), blueprint$forge$process() strictly internal use. Storing blueprint caused problems blueprints created old versions hardhat unlikely compatible newer versions hardhat. change means new_blueprint() blueprint constructors longer mold forge arguments. run_mold() repurposed. Rather calling $clean() $process() functions (, mentioned , longer blueprint), methods S3 generic rewritten directly call current versions clean process functions live hardhat. result less accidental breaking changes. New run_forge() forge() equivalent run_mold(). handles clean/process steps previously handled $clean() $process() functions stored directly blueprint.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-100","dir":"Changelog","previous_headings":"","what":"hardhat 1.0.0","title":"hardhat 1.0.0","text":"CRAN release: 2022-06-01 Recipe preprocessors now ignore non-standard recipe roles (.e. \"outcome\" \"predictor\") default calling forge(). Previously, assumed non-standard role columns present original training data also required test data forge() called. seems often case columns actually required bake() new data, often won’t even present making predictions new data. example, custom \"case_weights\" role might required computing case-weighted estimates prep() time, won’t necessary bake() time (since estimates already pre-computed stored). account case require specific non-standard role present bake() time, default_recipe_blueprint() gained new argument, bake_dependent_roles, can set character vector non-standard roles required. New weighted_table() generating weighted contingency table, similar table() (#191). New experimental family functions working case weights. particular, frequency_weights() importance_weights() (#190). use_modeling_files() create_modeling_package() longer open package documentation file current RStudio session (#192). rlang >=1.0.2 vctrs >=0.4.1 now required. Bumped required R version >= 3.4.0 reflect tidyverse standards.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-020","dir":"Changelog","previous_headings":"","what":"hardhat 0.2.0","title":"hardhat 0.2.0","text":"CRAN release: 2022-01-24 Moved tune() tune hardhat (#181). Added extract_parameter_dials() extract_parameter_set_dials() generics extend family extract_*() generics. mold() longer misinterprets :: interaction term (#174). indicators = \"none\", mold() longer misinterprets factor columns part inline function similarly named non-factor column also present (#182).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-016","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.6","title":"hardhat 0.1.6","text":"CRAN release: 2021-07-14 Added new family extract_*() S3 generics extracting important components various tidymodels objects. S3 methods defined tidymodels packages. example, tune register extract_workflow() method easily extract workflow embedded within result tune::last_fit(). logical indicators argument longer allowed default_formula_blueprint(). soft-deprecated hardhat 0.1.4, now result error (#144).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-015","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.5","title":"hardhat 0.1.5","text":"CRAN release: 2020-11-09 use_modeling_files() (therefore, create_modeling_package()) now ensures generated functions templated model name. makes easier add multiple models package (#152). preprocessors can now mold() forge() predictors one three output formats (either tibble, matrix, dgCMatrix sparse matrix) via composition argument blueprint (#100, #150).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-014","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.4","title":"hardhat 0.1.4","text":"CRAN release: 2020-07-02 Setting indicators = \"none\" default_formula_blueprint() longer accidentally expands character columns dummy variable columns. now left completely untouched pass characters. indicators = \"traditional\" indicators = \"one_hot\", character columns treated unordered factors (#139). indicators argument default_formula_blueprint() now takes character input rather logical. update: Logical input indicators continue work, warning, hardhat 0.1.6, formally deprecated. also new indicators = \"one_hot\" option expands factor columns K dummy variable columns corresponding K levels factor, rather traditional K - 1 expansion.","code":"indicators = TRUE -> indicators = \"traditional\" indicators = FALSE -> indicators = \"none\""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-013","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.3","title":"hardhat 0.1.3","text":"CRAN release: 2020-05-20 Updated stay current latest vctrs 0.3.0 conventions. scream() now stricter checking ordered factor levels new data ptype used training time. Ordered factors must now exactly set levels training prediction time. See ?scream new graphic outlining factor levels handled (#132). novel factor level check scream() longer throws novel level warning NA values (#131).","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-012","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.2","title":"hardhat 0.1.2","text":"CRAN release: 2020-02-28 default_recipe_blueprint() now defaults prepping recipes fresh = TRUE. safer default, guards user accidentally skipping preprocessing step tuning (#122). model_matrix() now correctly strips attributes result internal call model.matrix().","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-011","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.1","title":"hardhat 0.1.1","text":"CRAN release: 2020-01-08 forge() now works correctly used recipe predictor multiple roles (#120). Require recipes 0.1.8 incorporate important bug fix juice() 0-column selections.","code":""},{"path":"https://hardhat.tidymodels.org/dev/news/index.html","id":"hardhat-010","dir":"Changelog","previous_headings":"","what":"hardhat 0.1.0","title":"hardhat 0.1.0","text":"CRAN release: 2019-12-16 Added NEWS.md file track changes package.","code":""}]