New Upstream Release - r-cran-themis
Ready changes
Summary
Merged new upstream version: 1.0.1 (was: 1.0.0).
Diff
diff --git a/DESCRIPTION b/DESCRIPTION
index 5032c67..584ae65 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,9 +1,11 @@
Package: themis
Title: Extra Recipes Steps for Dealing with Unbalanced Data
-Version: 1.0.0
-Authors@R:
- person("Emil", "Hvitfeldt", , "emilhhvitfeldt@gmail.com", role = c("aut", "cre"),
- comment = c(ORCID = "0000-0002-0679-1945"))
+Version: 1.0.1
+Authors@R: c(
+ person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = c("aut", "cre"),
+ comment = c(ORCID = "0000-0002-0679-1945")),
+ person(given = "Posit Software, PBC", role = c("cph", "fnd"))
+ )
Description: A dataset with an uneven number of cases in each class is
said to be unbalanced. Many models produce a subpar performance on
unbalanced datasets. A dataset can be balanced by increasing the
@@ -15,20 +17,22 @@ Description: A dataset with an uneven number of cases in each class is
link removal 1976 <https://ieeexplore.ieee.org/document/4309452>.
License: MIT + file LICENSE
URL: https://github.com/tidymodels/themis,
- https://themis.tidymodels.org, https://themis.tidymodels.org/
+ https://themis.tidymodels.org
BugReports: https://github.com/tidymodels/themis/issues
-Depends: R (>= 3.4), recipes (>= 1.0.0)
-Imports: gower, lifecycle, dplyr, generics (>= 0.1.0), purrr, RANN,
- rlang, ROSE, tibble, withr, glue, hardhat
-Suggests: covr, ggplot2, modeldata, testthat (>= 3.0.0)
+Depends: R (>= 3.4), recipes (>= 1.0.4)
+Imports: gower, lifecycle (>= 1.0.3), dplyr, generics (>= 0.1.0),
+ purrr, RANN, rlang, ROSE, tibble, withr, glue, hardhat
+Suggests: covr, dials (>= 1.2.0), ggplot2, modeldata, testthat (>=
+ 3.0.0)
Config/Needs/website: tidyverse/tidytemplate
Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
-RoxygenNote: 7.2.0.9000
+RoxygenNote: 7.2.3
NeedsCompilation: no
-Packaged: 2022-07-02 00:07:01 UTC; emilhvitfeldt
-Author: Emil Hvitfeldt [aut, cre] (<https://orcid.org/0000-0002-0679-1945>)
-Maintainer: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
+Packaged: 2023-04-14 22:27:43 UTC; emilhvitfeldt
+Author: Emil Hvitfeldt [aut, cre] (<https://orcid.org/0000-0002-0679-1945>),
+ Posit Software, PBC [cph, fnd]
+Maintainer: Emil Hvitfeldt <emil.hvitfeldt@posit.co>
Repository: CRAN
-Date/Publication: 2022-07-02 06:40:02 UTC
+Date/Publication: 2023-04-14 23:00:02 UTC
diff --git a/LICENSE b/LICENSE
index caed670..8f14b5c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,2 +1,2 @@
-YEAR: 2020
-COPYRIGHT HOLDER: Emil Hvitfeldt
+YEAR: 2023
+COPYRIGHT HOLDER: themis authors
diff --git a/MD5 b/MD5
index 3217b48..02661b6 100644
--- a/MD5
+++ b/MD5
@@ -1,29 +1,29 @@
-fb11057860e6fe7f43e41672feb2ebdd *DESCRIPTION
-8fc0d8e1cf835e1670787e63b4801f78 *LICENSE
-7d62e840a81f54b2dae68493bdc366b2 *NAMESPACE
-2d1ade599cb77b18452a86b020d92e56 *NEWS.md
-c263a331dcf2602871d72969d1ce2534 *R/adasyn_impl.R
-c9b099a7258ce8928a3cdd0e940a6980 *R/bsmote_impl.R
+0d9a86c5e8879e5a18153d545145b07b *DESCRIPTION
+29df898535b62cc49ed47ab48878dc4b *LICENSE
+2ce202e98dc673f7d276916e50b2a356 *NAMESPACE
+9bb88aedc1d737c78e39e03421c34645 *NEWS.md
+6db308f940b931feddb50cd8d41c035a *R/adasyn_impl.R
+61be14acf476300b3ed92bc2bdc158a8 *R/bsmote_impl.R
ef0f647228746e52320ee138166ab572 *R/data.R
-c41496165ab626d34f613517594f4ca9 *R/misc.R
-88258e6ed269f88eab94746967742f67 *R/nearmiss_impl.R
+e4dd4001fa1c5373f0fe57a7139d8ca7 *R/misc.R
+3af9cd0a12d2c81259c857d212a5015c *R/nearmiss_impl.R
d2b1815f5245db4092de343b19c83452 *R/reexports.R
-d179019a984d149a9100f5f89c251e66 *R/smote_impl.R
-b4f801ee094ab931636bb14c486756b3 *R/smotenc_impl.R
-86b2382f20ee88e4ca6628e4ed0e84d9 *R/step_adasyn.R
-d3ab00980ac0680dce85a4f8706844f5 *R/step_bsmote.R
-cfc21034c9a1a0338d7fbcc25681c7b0 *R/step_downsample.R
-80b0ac8d0dc281f4a0c3392229f0ca04 *R/step_nearmiss.R
-dec503bb390a143b22052ad540a712fe *R/step_rose.R
-dda95a331226b40f9924843bbcf0bd8c *R/step_smote.R
-442519c4ba5d5f69e1e189f1dc350ab0 *R/step_smotenc.R
-9320a584a92e7809d69d7ba68c2a0aaf *R/step_tomek.R
-7ba9889423c8c783663c772a5cd54213 *R/step_upsample.R
-a15b436510266b7e2e89f107384f1d34 *R/themis-package.R
+78cb1bf0f25841f5eb1dd97a3160a4fe *R/smote_impl.R
+7d104d40226bdc5c59880a6717b65c76 *R/smotenc_impl.R
+aa1d1f4ec16bbe90786794762d692a28 *R/step_adasyn.R
+444146dc3afc7e5d56c52883c0457cc7 *R/step_bsmote.R
+54934e2044725ab20bd8409bb77de489 *R/step_downsample.R
+850aa0b4cb95e909fe4131c8b73de1f1 *R/step_nearmiss.R
+4a40c6760ebd1a3886046f3a29a565d0 *R/step_rose.R
+acc1ffa324486b6dfa0acec85e574834 *R/step_smote.R
+ab533fcbfc88f33c5c852b2969458438 *R/step_smotenc.R
+c05022673839bf7cfb0e01595b883b4d *R/step_tomek.R
+10f49d6526e5c86fdd7af04b7edc0444 *R/step_upsample.R
+158831b2b6fda7c6af2a6e51c3322531 *R/themis-package.R
21c0f1af5919f727d8a3181c9cb891be *R/tidy.R
-e5af6231bce3d557862c38aa290c7c3a *R/tomek_impl.R
-dbaf7ed434f5fb3ba7af53f37d0fa9c0 *R/tuneable.R
-c41046269c329c843edead87e754c688 *README.md
+c91a731729c7d06c4b2bee7c3786bf54 *R/tomek_impl.R
+36b132dbaedaccc289d3f6745408a0d0 *R/tunable.R
+41bf97923296313974d4f349dd85bd67 *README.md
0c6ad5b3446721162c89589e8077ab1c *build/partial.rdb
ba1c10f7bd10d47c7ab960187a72f23c *data/circle_example.rda
4cc818112c005d26ed6faf245720860f *man/adasyn.Rd
@@ -42,50 +42,54 @@ c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg
27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg
53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg
1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg
+fbd893b0f8f27e457fe118a86586c25d *man/figures/logo.png
80a4f19b7c42d76efd52b0fcf97088b8 *man/nearmiss.Rd
1eea1b22cfe9205ab3a593ec452ca30b *man/reexports.Rd
2a57ec3ebdedc2e2a2abd5c4961c3da6 *man/required_pkgs.step.Rd
+74aa7f707377f995177812e12526b2a8 *man/rmd/tunable-args.Rmd
ff5053a7c9221519fe74defabd7ef129 *man/smote.Rd
74047b2a3d3dac02482aea9708e2ccfe *man/smotenc.Rd
-dc4e93c61444b459a0d4a3b8e1d6158e *man/step_adasyn.Rd
-5f7f91c9736a56f601c71e337a4615e8 *man/step_bsmote.Rd
-e585a04089293ad6ec5e7fc4e1e00ab3 *man/step_downsample.Rd
-ab0cec36c2e06fa569de1c1e1b3f16e1 *man/step_nearmiss.Rd
-05e3b9459b2aeff7462a48868a77a4d2 *man/step_rose.Rd
-1b28367c2ce1d8c395df770cbe411603 *man/step_smote.Rd
-dd2204364a27dae8e087dfedfff90a0c *man/step_smotenc.Rd
+dab9ef9dbff053ee37cfeb0f1a89514a *man/step_adasyn.Rd
+a4604a833b26ea2a948ea4dc6f89932f *man/step_bsmote.Rd
+8455fa46001d481c8dd46232b8ed0698 *man/step_downsample.Rd
+4b8d04dd01043ac12816598b817f22b6 *man/step_nearmiss.Rd
+26e3f34d88177e886dc8b051bc8cd5be *man/step_rose.Rd
+1d05d0e797f0b10f10467d7e213f0222 *man/step_smote.Rd
+0f82a33463305b2e8a0f410d0e50385c *man/step_smotenc.Rd
1edd2eb97151d6ac65001072dc29b503 *man/step_tomek.Rd
-ef21d63209f8d0f9064b821d6b18a205 *man/step_upsample.Rd
-6bbd466de264d498586bce46e592ead7 *man/themis-package.Rd
+f62caed8e3f96568dcbfbe603506f367 *man/step_upsample.Rd
+6c0de6c24416b26be14d64dcd833fd04 *man/themis-package.Rd
4a60f02f6e49733f3c7c15e88b992f96 *man/tidy.recipe.Rd
d5a92c5be824219de2badc64fb9e24b6 *man/tomek.Rd
-ce0c4763d3531623be46c7c19750814e *man/tunable.step_adasyn.Rd
+fa37f5f5a25dc26e73dd48a07b8bd0eb *man/tunable_themis.Rd
19c296e16132ecd6d694bde113e15f41 *tests/testthat.R
-4674482884b522f83b6664958945c717 *tests/testthat/_snaps/adasyn_impl.md
-10c487f0a1e657796cfbb5125e474347 *tests/testthat/_snaps/bsmote_impl.md
-12d49b05356f19d98f4d89ba8339759e *tests/testthat/_snaps/smote_impl.md
-0cbbaaedb1dd1711d27d7d4533d4312f *tests/testthat/_snaps/smotenc.md
-03a5f7e678e6bac7b661476429093507 *tests/testthat/_snaps/step_adasyn.md
-fc2d61543aa96473b0b6d5c0d26ae162 *tests/testthat/_snaps/step_bsmote.md
-62f16671d159e1f220c357fc3000d8c6 *tests/testthat/_snaps/step_downsample.md
-2c7c849c0dff12e2385393373712b668 *tests/testthat/_snaps/step_nearmiss.md
-87f23e6ba0dfada822434dc56ac69b2d *tests/testthat/_snaps/step_rose.md
-d8306f31ba46711a2be8e065bbb64554 *tests/testthat/_snaps/step_smote.md
-7402b89a1e3d0803443a4ec17875710c *tests/testthat/_snaps/step_tomek.md
-553872eef71fd4c5b7cf03e99f10a481 *tests/testthat/_snaps/step_upsample.md
-cff42e06348a02e3a73b602d1a60887e *tests/testthat/_snaps/tomek_impl.md
+b0420c528c484afac000b215960b565a *tests/testthat/_snaps/adasyn_impl.md
+cbcbc6ebec2b20085298e23953ea2558 *tests/testthat/_snaps/bsmote_impl.md
+bdc2e8810ab3ce8b4ef20274858baee9 *tests/testthat/_snaps/extension_check.md
+b3aa85b1614d649b9e913021aeb4dafe *tests/testthat/_snaps/smote_impl.md
+d1205ae89977295ee2764f5ebdbdba83 *tests/testthat/_snaps/smotenc.md
+62c3a3cb8a4863c50f7323ab381eafa6 *tests/testthat/_snaps/step_adasyn.md
+2c5442b7ed1f37374ce59dba3b16d262 *tests/testthat/_snaps/step_bsmote.md
+3163d50fb1a8fce740da4c14fc009dc3 *tests/testthat/_snaps/step_downsample.md
+dd56a33e864df7d96df13a30595cf212 *tests/testthat/_snaps/step_nearmiss.md
+56516f8250baa57464054360b0c27731 *tests/testthat/_snaps/step_rose.md
+cf27847761a150a5f42aa0c38f16cccd *tests/testthat/_snaps/step_smote.md
+c7bcf1bf4b4254f3e326e97a228d3bc1 *tests/testthat/_snaps/step_tomek.md
+16922318b8225c20c21dde6a44bf1a31 *tests/testthat/_snaps/step_upsample.md
+0fa351619a065140117d200b6c5a8535 *tests/testthat/_snaps/tomek_impl.md
9d5ad4b8bc9b71449702eb5bb6138afc *tests/testthat/test-S3-methods.R
6310eb8ff56c00380abfefe4976989bb *tests/testthat/test-adasyn_impl.R
bbb892e3bdb8211a3edd3b4f97023140 *tests/testthat/test-bsmote_impl.R
-e4b37a0d6853e5c0fc5bc807769d00a1 *tests/testthat/test-smote_impl.R
-c36533fef70c8193312b108698c492ad *tests/testthat/test-smotenc.R
-aaf3ddbfa7d4608e5e0191a249d0f1a6 *tests/testthat/test-step_adasyn.R
-7a2c5217d5f505a4810df079137fb049 *tests/testthat/test-step_bsmote.R
-d6ec6a799bc327ec3b06cddd020a1338 *tests/testthat/test-step_downsample.R
-83568f2ec741b775f0cbdd2ca38376dc *tests/testthat/test-step_nearmiss.R
-4594bb6bfa926387f9d33ee0f3033feb *tests/testthat/test-step_rose.R
-9a1e6de30e150bd0638427c3f95fb68b *tests/testthat/test-step_smote.R
+b12dfcd23fb2b35cf9b7813d6b6dbeb7 *tests/testthat/test-extension_check.R
+964f73d8941a8ada90575561cc510594 *tests/testthat/test-smote_impl.R
+ece535a8df6fb6d85c22a9f9b05ebffe *tests/testthat/test-smotenc.R
+f81d8c6e97561a4df3568780249e95e2 *tests/testthat/test-step_adasyn.R
+9fd33e1cef14d60b1757227bb22fde4c *tests/testthat/test-step_bsmote.R
+a535f4549e413eb58fb2eacf97886f4a *tests/testthat/test-step_downsample.R
+7c72f24aa0b01b810fe6ee71a93ec91d *tests/testthat/test-step_nearmiss.R
+276d7ce41b53ba84e611a9d4b1a8e8cb *tests/testthat/test-step_rose.R
+6d85ce8fe6173f10d0eeb782319722c0 *tests/testthat/test-step_smote.R
3c7fbf6e85ec7a490ac68134a635962f *tests/testthat/test-step_tomek.R
-c559694d0c698fbb5b65a054a2547bef *tests/testthat/test-step_upsample.R
+83f00a08efa85f9a873ce8a3623dbde2 *tests/testthat/test-step_upsample.R
5cb85d943122c92c7ffd2d88679f9ae5 *tests/testthat/test-tomek_impl.R
14d8a299f8c2d850b4036c353f2d6a0b *tests/testthat/testthat-problems.rds
diff --git a/NAMESPACE b/NAMESPACE
index 82306e4..c2c0c25 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -97,6 +97,7 @@ importFrom(recipes,recipes_eval_select)
importFrom(recipes,sel2char)
importFrom(recipes,step)
importFrom(rlang,":=")
+importFrom(rlang,caller_env)
importFrom(rlang,enquos)
importFrom(tibble,as_tibble)
importFrom(tibble,tibble)
diff --git a/NEWS.md b/NEWS.md
index 79a4113..2db7913 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,11 @@
+# themis 1.0.1
+
+## Improvements
+
+* Fixed bug where some upsampling functions would error if no upsampling was needed. (#119)
+
+* Steps with tunable arguments now have those arguments listed in the documentation.
+
# themis 1.0.0
* Added case weights support for `step_upsample()` and `step_downsample()`
diff --git a/R/adasyn_impl.R b/R/adasyn_impl.R
index d6c1c80..f27c6df 100644
--- a/R/adasyn_impl.R
+++ b/R/adasyn_impl.R
@@ -52,12 +52,12 @@ adasyn <- function(df, var, k = 5, over_ratio = 1) {
predictors <- setdiff(colnames(df), var)
check_numeric(df[, predictors])
- check_na(select(df, -all_of(var)), "adasyn")
+ check_na(select(df, -all_of(var)))
adasyn_impl(df, var, k, over_ratio)
}
-adasyn_impl <- function(df, var, k = 5, over_ratio = 1) {
+adasyn_impl <- function(df, var, k = 5, over_ratio = 1, call = caller_env()) {
majority_count <- max(table(df[[var]]))
ratio_target <- majority_count * over_ratio
which_upsample <- which(table(df[[var]]) < ratio_target)
@@ -86,7 +86,8 @@ adasyn_impl <- function(df, var, k = 5, over_ratio = 1) {
rlang::abort(
glue(
"Not enough observations of '{min_names[i]}' to perform ADASYN."
- )
+ ),
+ call = call
)
}
diff --git a/R/bsmote_impl.R b/R/bsmote_impl.R
index 636c1e2..94f5cf4 100644
--- a/R/bsmote_impl.R
+++ b/R/bsmote_impl.R
@@ -77,7 +77,7 @@ bsmote <- function(df, var, k = 5, over_ratio = 1, all_neighbors = FALSE) {
predictors <- setdiff(colnames(df), var)
check_numeric(df[, predictors])
- check_na(select(df, -all_of(var)), "bsmote")
+ check_na(select(df, -all_of(var)))
bsmote_impl(df, var, k, over_ratio)
}
diff --git a/R/misc.R b/R/misc.R
index b0a9730..0ede487 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -4,19 +4,22 @@ string2formula <- function(x) {
out
}
-check_na <- function(data, step) {
+check_na <- function(data, step, call = caller_env()) {
na_cols <- vapply(data, function(x) any(is.na(x)), FUN.VALUE = logical(1))
if (any(na_cols)) {
cols <- paste(names(na_cols)[na_cols], collapse = ", ")
- rlang::abort(glue(
- "`{step}` cannot have any missing values. NAs found ind: {cols}."
- ))
+ rlang::abort(
+ glue(
+ "Cannot have any missing values. NAs found ind: {cols}."
+ ),
+ call = call
+ )
}
}
-check_2_levels_only <- function(data, col_name) {
+check_2_levels_only <- function(data, col_name, call = caller_env()) {
if (length(levels(data[[col_name]])) != 2) {
- rlang::abort(glue("`{col_name}` must only have 2 levels."))
+ rlang::abort(glue("`{col_name}` must only have 2 levels."), call = call)
}
}
@@ -30,9 +33,9 @@ check_numeric <- function(dat) {
invisible(all_good)
}
-check_column_factor <- function(data, column) {
+check_column_factor <- function(data, column, call = caller_env()) {
if (!is.factor(data[[column]])) {
- rlang::abort(glue("`{column}` should be a factor variable."))
+ rlang::abort(glue("`{column}` should be a factor variable."), call = call)
}
}
diff --git a/R/nearmiss_impl.R b/R/nearmiss_impl.R
index 884dbe2..69e159b 100644
--- a/R/nearmiss_impl.R
+++ b/R/nearmiss_impl.R
@@ -52,7 +52,7 @@ nearmiss <- function(df, var, k = 5, under_ratio = 1) {
predictors <- setdiff(colnames(df), var)
check_numeric(df[, predictors])
- check_na(select(df, -all_of(var)), "nearmiss")
+ check_na(select(df, -all_of(var)))
nearmiss_impl(df, var, ignore_vars = character(), k, under_ratio)
}
diff --git a/R/smote_impl.R b/R/smote_impl.R
index 059bd7a..6e21325 100644
--- a/R/smote_impl.R
+++ b/R/smote_impl.R
@@ -60,12 +60,12 @@ smote <- function(df, var, k = 5, over_ratio = 1) {
predictors <- setdiff(colnames(df), var)
check_numeric(df[, predictors])
- check_na(select(df, -all_of(var)), "smote")
+ check_na(select(df, -all_of(var)))
smote_impl(df, var, k, over_ratio)
}
-smote_impl <- function(df, var, k, over_ratio) {
+smote_impl <- function(df, var, k, over_ratio, call = caller_env()) {
data <- split(df, df[[var]])
majority_count <- max(table(df[[var]]))
ratio_target <- majority_count * over_ratio
@@ -83,14 +83,16 @@ smote_impl <- function(df, var, k, over_ratio) {
rlang::abort(
glue(
"Not enough observations of '{min_names[i]}' to perform SMOTE."
- )
+ ),
+ call = call
)
}
synthetic <- smote_data(minority, k = k, n_samples = samples_needed[i])
out_df <- as.data.frame(synthetic)
names(out_df) <- setdiff(names(df), var)
- out_df[var] <- data[[names(samples_needed)[i]]][[var]][1]
+ out_df_nrow <- min(nrow(out_df), 1)
+ out_df[var] <- data[[names(samples_needed)[i]]][[var]][out_df_nrow]
out_df <- out_df[names(df)]
out_dfs[[i]] <- out_df
}
diff --git a/R/smotenc_impl.R b/R/smotenc_impl.R
index 1782011..8dd645d 100644
--- a/R/smotenc_impl.R
+++ b/R/smotenc_impl.R
@@ -64,7 +64,7 @@ smotenc <- function(df, var, k = 5, over_ratio = 1) {
rlang::abort("`k` must be non-negative.")
}
- check_na(select(df, -all_of(var)), "smotenc")
+ check_na(select(df, -all_of(var)))
smotenc_impl(df, var, k, over_ratio)
}
@@ -134,7 +134,9 @@ smotenc_data <- function(data, k, n_samples, smotenc_ids = seq_len(nrow(data)))
# Runs a nearest neighbor search
# outputs a matrix, each row is a minority instance and each column is a nearest neighbor
# k is +1 because the sample is always a nearest neighbor to itself
- ids <- t(gower::gower_topn(x = data, y = data, n = k + 1)$index)
+ suppressWarnings(
+ ids <- t(gower::gower_topn(x = data, y = data, n = k + 1, )$index)
+ )
# shuffles minority indicies and repeats that shuffling until the desired number of samples is reached
indexes <- rep(sample(smotenc_ids), length.out = n_samples)
diff --git a/R/step_adasyn.R b/R/step_adasyn.R
index c87ada9..a1cd909 100644
--- a/R/step_adasyn.R
+++ b/R/step_adasyn.R
@@ -38,6 +38,12 @@
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
#' (the selectors or variables selected) will be returned.
#'
+#' ```{r, echo = FALSE, results="asis"}
+#' step <- "step_adasyn"
+#' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")
+#' cat(result)
+#' ```
+#'
#' @template case-weights-not-supported
#'
#' @references He, H., Bai, Y., Garcia, E. and Li, S. 2008. ADASYN: Adaptive
@@ -148,8 +154,9 @@ prep.step_adasyn <- function(x, training, info = NULL, ...) {
}
predictors <- setdiff(get_from_info(info, "predictor"), col_name)
- check_type(training[, predictors], TRUE)
- check_na(select(training, all_of(c(col_name, predictors))), "step_adasyn")
+
+ check_type(training[, predictors], types = c("double", "integer"))
+ check_na(select(training, all_of(c(col_name, predictors))))
step_adasyn_new(
terms = x$terms,
@@ -219,6 +226,20 @@ tidy.step_adasyn <- function(x, ...) {
res
}
+#' @export
+#' @rdname tunable_themis
+tunable.step_adasyn <- function(x, ...) {
+ tibble::tibble(
+ name = c("over_ratio", "neighbors"),
+ call_info = list(
+ list(pkg = "dials", fun = "over_ratio"),
+ list(pkg = "dials", fun = "neighbors", range = c(1, 10))
+ ),
+ source = "recipe",
+ component = "step_adasyn",
+ component_id = x$id
+ )
+}
#' S3 methods for tracking which additional packages are needed for steps.
#'
diff --git a/R/step_bsmote.R b/R/step_bsmote.R
index 3c3742a..d7eb62f 100644
--- a/R/step_bsmote.R
+++ b/R/step_bsmote.R
@@ -60,6 +60,12 @@
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
#' (the selectors or variables selected) will be returned.
#'
+#' ```{r, echo = FALSE, results="asis"}
+#' step <- "step_bsmote"
+#' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")
+#' cat(result)
+#' ```
+#'
#' @template case-weights-not-supported
#'
#' @references Hui Han, Wen-Yuan Wang, and Bing-Huan Mao. Borderline-smote:
@@ -181,8 +187,8 @@ prep.step_bsmote <- function(x, training, info = NULL, ...) {
predictors <- setdiff(get_from_info(info, "predictor"), col_name)
- check_type(training[, predictors], TRUE)
- check_na(select(training, all_of(c(col_name, predictors))), "step_bsmote")
+ check_type(training[, predictors], types = c("double", "integer"))
+ check_na(select(training, all_of(c(col_name, predictors))))
step_bsmote_new(
terms = x$terms,
@@ -253,6 +259,22 @@ tidy.step_bsmote <- function(x, ...) {
res
}
+#' @export
+#' @rdname tunable_themis
+tunable.step_bsmote <- function(x, ...) {
+ tibble::tibble(
+ name = c("over_ratio", "neighbors", "all_neighbors"),
+ call_info = list(
+ list(pkg = "dials", fun = "over_ratio"),
+ list(pkg = "dials", fun = "neighbors"),
+ list(pkg = "dials", fun = "all_neighbors")
+ ),
+ source = "recipe",
+ component = "step_bsmote",
+ component_id = x$id
+ )
+}
+
#' @rdname required_pkgs.step
#' @export
required_pkgs.step_bsmote <- function(x, ...) {
diff --git a/R/step_downsample.R b/R/step_downsample.R
index 3e5bc60..2b179c6 100644
--- a/R/step_downsample.R
+++ b/R/step_downsample.R
@@ -54,6 +54,12 @@
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
#' (the selectors or variables selected) will be returned.
#'
+#' ```{r, echo = FALSE, results="asis"}
+#' step <- "step_downsample"
+#' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")
+#' cat(result)
+#' ```
+#'
#' @template case-weights-unsupervised
#'
#' @family Steps for under-sampling
@@ -179,7 +185,7 @@ prep.step_downsample <- function(x, training, info = NULL, ...) {
minority <- min(obs_freq)
}
- check_na(select(training, all_of(col_name)), "step_downsample")
+ check_na(select(training, all_of(col_name)))
step_downsample_new(
terms = x$terms,
@@ -276,6 +282,19 @@ tidy.step_downsample <- function(x, ...) {
res
}
+#' @export
+#' @rdname tunable_themis
+tunable.step_downsample <- function(x, ...) {
+ tibble::tibble(
+ name = "under_ratio",
+ call_info = list(
+ list(pkg = "dials", fun = "under_ratio")
+ ),
+ source = "recipe",
+ component = "step_downsample",
+ component_id = x$id
+ )
+}
#' @rdname required_pkgs.step
#' @export
diff --git a/R/step_nearmiss.R b/R/step_nearmiss.R
index d7bbd44..bdc2e90 100644
--- a/R/step_nearmiss.R
+++ b/R/step_nearmiss.R
@@ -25,8 +25,8 @@
#' the variable used to sample.
#'
#' @details
-#' This methods retained the points form the majority classes which has the
-#' smallest mean distance to the k nearest points in the other classes.
+#' This method retains the points from the majority class which have the
+#' smallest mean distance to the k nearest points in the minority class.
#'
#' All columns in the data are sampled and returned by [juice()]
#' and [bake()].
@@ -42,6 +42,12 @@
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
#' (the selectors or variables selected) will be returned.
#'
+#' ```{r, echo = FALSE, results="asis"}
+#' step <- "step_nearmiss"
+#' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")
+#' cat(result)
+#' ```
+#'
#' @template case-weights-not-supported
#'
#' @references Inderjeet Mani and I Zhang. knn approach to unbalanced data
@@ -158,8 +164,8 @@ prep.step_nearmiss <- function(x, training, info = NULL, ...) {
predictors <- setdiff(get_from_info(info, "predictor"), col_name)
- check_type(training[, predictors], TRUE)
- check_na(select(training, all_of(c(col_name, predictors))), "step_nearmiss")
+ check_type(training[, predictors], types = c("double", "integer"))
+ check_na(select(training, all_of(c(col_name, predictors))))
step_nearmiss_new(
terms = x$terms,
@@ -231,7 +237,20 @@ tidy.step_nearmiss <- function(x, ...) {
res
}
-
+#' @export
+#' @rdname tunable_themis
+tunable.step_nearmiss <- function(x, ...) {
+ tibble::tibble(
+ name = c("under_ratio", "neighbors"),
+ call_info = list(
+ list(pkg = "dials", fun = "under_ratio"),
+ list(pkg = "dials", fun = "neighbors", range = c(1, 10))
+ ),
+ source = "recipe",
+ component = "step_nearmiss",
+ component_id = x$id
+ )
+}
#' @rdname required_pkgs.step
#' @export
diff --git a/R/step_rose.R b/R/step_rose.R
index 7c55e18..380409b 100644
--- a/R/step_rose.R
+++ b/R/step_rose.R
@@ -53,6 +53,12 @@
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
#' (the selectors or variables selected) will be returned.
#'
+#' ```{r, echo = FALSE, results="asis"}
+#' step <- "step_rose"
+#' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")
+#' cat(result)
+#' ```
+#'
#' @template case-weights-not-supported
#'
#' @references Lunardon, N., Menardi, G., and Torelli, N. (2014). ROSE: a
@@ -165,7 +171,7 @@ prep.step_rose <- function(x, training, info = NULL, ...) {
}
predictors <- setdiff(get_from_info(info, "predictor"), col_name)
- check_na(select(training, all_of(col_name)), "step_rose")
+ check_na(select(training, all_of(col_name)))
step_rose_new(
terms = x$terms,
@@ -253,6 +259,20 @@ tidy.step_rose <- function(x, ...) {
res
}
+#' @export
+#' @rdname tunable_themis
+tunable.step_rose <- function(x, ...) {
+ tibble::tibble(
+ name = c("over_ratio"),
+ call_info = list(
+ list(pkg = "dials", fun = "over_ratio")
+ ),
+ source = "recipe",
+ component = "step_rose",
+ component_id = x$id
+ )
+}
+
#' @rdname required_pkgs.step
#' @export
required_pkgs.step_rose <- function(x, ...) {
diff --git a/R/step_smote.R b/R/step_smote.R
index a96801c..e67b95c 100644
--- a/R/step_smote.R
+++ b/R/step_smote.R
@@ -46,6 +46,12 @@
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
#' (the selectors or variables selected) will be returned.
#'
+#' ```{r, echo = FALSE, results="asis"}
+#' step <- "step_smote"
+#' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")
+#' cat(result)
+#' ```
+#'
#' @template case-weights-not-supported
#'
#' @references Chawla, N. V., Bowyer, K. W., Hall, L. O., and Kegelmeyer,
@@ -155,8 +161,9 @@ prep.step_smote <- function(x, training, info = NULL, ...) {
}
predictors <- setdiff(get_from_info(info, "predictor"), col_name)
- check_type(training[, predictors], TRUE)
- check_na(select(training, all_of(c(col_name, predictors))), "step_smote")
+
+ check_type(training[, predictors], types = c("double", "integer"))
+ check_na(select(training, all_of(c(col_name, predictors))))
step_smote_new(
terms = x$terms,
@@ -226,6 +233,20 @@ tidy.step_smote <- function(x, ...) {
res
}
+#' @export
+#' @rdname tunable_themis
+tunable.step_smote <- function(x, ...) {
+ tibble::tibble(
+ name = c("over_ratio", "neighbors"),
+ call_info = list(
+ list(pkg = "dials", fun = "over_ratio"),
+ list(pkg = "dials", fun = "neighbors", range = c(1, 10))
+ ),
+ source = "recipe",
+ component = "step_smote",
+ component_id = x$id
+ )
+}
#' @rdname required_pkgs.step
#' @export
diff --git a/R/step_smotenc.R b/R/step_smotenc.R
index 7d09b5f..a6e5dd3 100644
--- a/R/step_smotenc.R
+++ b/R/step_smotenc.R
@@ -47,6 +47,12 @@
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
#' (the selectors or variables selected) will be returned.
#'
+#' ```{r, echo = FALSE, results="asis"}
+#' step <- "step_smotenc"
+#' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")
+#' cat(result)
+#' ```
+#'
#' @template case-weights-not-supported
#'
#' @references Chawla, N. V., Bowyer, K. W., Hall, L. O., and Kegelmeyer,
@@ -140,7 +146,7 @@ prep.step_smotenc <- function(x, training, info = NULL, ...) {
}
predictors <- setdiff(get_from_info(info, "predictor"), col_name)
- check_na(select(training, all_of(c(col_name, predictors))), "step_smotenc")
+ check_na(select(training, all_of(c(col_name, predictors))))
step_smotenc_new(
terms = x$terms,
@@ -195,7 +201,6 @@ print.step_smotenc <-
invisible(x)
}
-
#' @rdname tidy.recipe
#' @param x A `step_smotenc` object.
#' @export
@@ -210,6 +215,20 @@ tidy.step_smotenc <- function(x, ...) {
res
}
+#' @export
+#' @rdname tunable_themis
+tunable.step_smotenc <- function(x, ...) {
+ tibble::tibble(
+ name = c("over_ratio", "neighbors"),
+ call_info = list(
+ list(pkg = "dials", fun = "over_ratio"),
+ list(pkg = "dials", fun = "neighbors", range = c(1, 10))
+ ),
+ source = "recipe",
+ component = "step_smotenc",
+ component_id = x$id
+ )
+}
#' @rdname required_pkgs.step
#' @export
diff --git a/R/step_tomek.R b/R/step_tomek.R
index 335b44e..154bc03 100644
--- a/R/step_tomek.R
+++ b/R/step_tomek.R
@@ -142,8 +142,9 @@ prep.step_tomek <- function(x, training, info = NULL, ...) {
}
predictors <- setdiff(get_from_info(info, "predictor"), col_name)
- check_type(training[, predictors], TRUE)
- check_na(select(training, all_of(c(col_name, predictors))), "step_tomek")
+
+ check_type(training[, predictors], types = c("double", "integer"))
+ check_na(select(training, all_of(c(col_name, predictors))))
step_tomek_new(
terms = x$terms,
diff --git a/R/step_upsample.R b/R/step_upsample.R
index 21122bf..e656f2a 100644
--- a/R/step_upsample.R
+++ b/R/step_upsample.R
@@ -49,6 +49,12 @@
#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
#' (the selectors or variables selected) will be returned.
#'
+#' ```{r, echo = FALSE, results="asis"}
+#' step <- "step_upsample"
+#' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")
+#' cat(result)
+#' ```
+#'
#' @template case-weights-unsupervised
#'
#' @family Steps for over-sampling
@@ -175,7 +181,7 @@ prep.step_upsample <- function(x, training, info = NULL, ...) {
majority <- max(obs_freq)
}
- check_na(select(training, all_of(col_name)), "step_upsample")
+ check_na(select(training, all_of(col_name)))
step_upsample_new(
terms = x$terms,
@@ -271,6 +277,20 @@ tidy.step_upsample <- function(x, ...) {
res
}
+#' @export
+#' @rdname tunable_themis
+tunable.step_upsample <- function(x, ...) {
+ tibble::tibble(
+ name = c("over_ratio"),
+ call_info = list(
+ list(pkg = "dials", fun = "over_ratio")
+ ),
+ source = "recipe",
+ component = "step_upsample",
+ component_id = x$id
+ )
+}
+
#' @rdname required_pkgs.step
#' @export
required_pkgs.step_upsample <- function(x, ...) {
diff --git a/R/themis-package.R b/R/themis-package.R
index 44b9f87..de9d6ae 100644
--- a/R/themis-package.R
+++ b/R/themis-package.R
@@ -24,6 +24,7 @@
#' @importFrom recipes sel2char
#' @importFrom recipes step
#' @importFrom rlang :=
+#' @importFrom rlang caller_env
#' @importFrom rlang enquos
#' @importFrom ROSE ROSE
#' @importFrom tibble as_tibble
diff --git a/R/tomek_impl.R b/R/tomek_impl.R
index 776c1e2..00511b1 100644
--- a/R/tomek_impl.R
+++ b/R/tomek_impl.R
@@ -36,7 +36,7 @@ tomek <- function(df, var) {
predictors <- setdiff(colnames(df), var)
check_numeric(df[, predictors])
- check_na(select(df, -all_of(var)), "tomek")
+ check_na(select(df, -all_of(var)))
df[-tomek_impl(df, var), ]
}
diff --git a/R/tunable.R b/R/tunable.R
new file mode 100644
index 0000000..bb720fa
--- /dev/null
+++ b/R/tunable.R
@@ -0,0 +1,11 @@
+#' tunable methods for themis
+#'
+#' These functions define what parameters _can_ be tuned for specific steps.
+#' They also define the recommended objects from the `dials` package that can
+#' be used to generate new parameter values and other characteristics.
+#' @param x A recipe step object
+#' @param ... Not used.
+#' @name tunable_themis
+#' @return A tibble object.
+#' @keywords internal
+NULL
diff --git a/R/tuneable.R b/R/tuneable.R
deleted file mode 100644
index 67dfe7a..0000000
--- a/R/tuneable.R
+++ /dev/null
@@ -1,125 +0,0 @@
-#' tunable methods for themis
-#'
-#' These functions define what parameters _can_ be tuned for specific steps.
-#' They also define the recommended objects from the `dials` package that can
-#' be used to generate new parameter values and other characteristics.
-#' @param x A recipe step object
-#' @param ... Not used.
-#' @return A tibble object.
-#' @keywords internal
-#' @export
-tunable.step_adasyn <- function(x, ...) {
- tibble::tibble(
- name = c("over_ratio", "neighbors"),
- call_info = list(
- list(pkg = "dials", fun = "over_ratio"),
- list(pkg = "dials", fun = "neighbors", range = c(1, 10))
- ),
- source = "recipe",
- component = "step_adasyn",
- component_id = x$id
- )
-}
-
-#' @export
-#' @rdname tunable.step_adasyn
-tunable.step_bsmote <- function(x, ...) {
- tibble::tibble(
- name = c("over_ratio", "neighbors", "all_neighbors"),
- call_info = list(
- list(pkg = "dials", fun = "over_ratio"),
- list(pkg = "dials", fun = "neighbors"),
- list(pkg = "dials", fun = "all_neighbors")
- ),
- source = "recipe",
- component = "step_bsmote",
- component_id = x$id
- )
-}
-
-#' @export
-#' @rdname tunable.step_adasyn
-tunable.step_downsample <- function(x, ...) {
- tibble::tibble(
- name = "under_ratio",
- call_info = list(
- list(pkg = "dials", fun = "under_ratio")
- ),
- source = "recipe",
- component = "step_downsample",
- component_id = x$id
- )
-}
-
-#' @export
-#' @rdname tunable.step_adasyn
-tunable.step_nearmiss <- function(x, ...) {
- tibble::tibble(
- name = c("under_ratio", "neighbors"),
- call_info = list(
- list(pkg = "dials", fun = "under_ratio"),
- list(pkg = "dials", fun = "neighbors", range = c(1, 10))
- ),
- source = "recipe",
- component = "step_nearmiss",
- component_id = x$id
- )
-}
-
-#' @export
-#' @rdname tunable.step_adasyn
-tunable.step_rose <- function(x, ...) {
- tibble::tibble(
- name = c("over_ratio"),
- call_info = list(
- list(pkg = "dials", fun = "over_ratio")
- ),
- source = "recipe",
- component = "step_rose",
- component_id = x$id
- )
-}
-
-#' @export
-#' @rdname tunable.step_adasyn
-tunable.step_smote <- function(x, ...) {
- tibble::tibble(
- name = c("over_ratio", "neighbors"),
- call_info = list(
- list(pkg = "dials", fun = "over_ratio"),
- list(pkg = "dials", fun = "neighbors", range = c(1, 10))
- ),
- source = "recipe",
- component = "step_smote",
- component_id = x$id
- )
-}
-
-#' @export
-#' @rdname tunable.step_adasyn
-tunable.step_upsample <- function(x, ...) {
- tibble::tibble(
- name = c("over_ratio"),
- call_info = list(
- list(pkg = "dials", fun = "over_ratio")
- ),
- source = "recipe",
- component = "step_upsample",
- component_id = x$id
- )
-}
-
-#' @export
-#' @rdname tunable.step_adasyn
-tunable.step_smotenc <- function(x, ...) {
- tibble::tibble(
- name = c("over_ratio", "neighbors"),
- call_info = list(
- list(pkg = "dials", fun = "over_ratio"),
- list(pkg = "dials", fun = "neighbors", range = c(1, 10))
- ),
- source = "recipe",
- component = "step_smotenc",
- component_id = x$id
- )
-}
diff --git a/README.md b/README.md
index e28d013..c3c1a81 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
<!-- README.md is generated from README.Rmd. Please edit that file -->
-# themis
+# themis <a href="https://themis.tidymodels.org"><img src="man/figures/logo.png" align="right" height="138" /></a>
<!-- badges: start -->
@@ -34,8 +34,8 @@ install.packages("themis")
Install the development version from GitHub with:
``` r
-# install.packages("remotes")
-remotes::install_github("tidymodels/themis")
+# install.packages("pak")
+pak::pak("tidymodels/themis")
```
## Example
@@ -93,7 +93,7 @@ example_data %>%
geom_bar()
```
-<img src="man/figures/README-unnamed-chunk-2-1.png" width="100%" />
+<img src="man/figures/README-unnamed-chunk-2-1.png" alt="Bar chart with 5 columns. class on the x-axis and count on the y-axis. Class a has height 10, b has 20, c has 30, d has 40, and e has 50." width="100%" />
### Upsample / Over-sampling
@@ -121,7 +121,7 @@ recipe(~., example_data) %>%
geom_bar()
```
-<img src="man/figures/README-unnamed-chunk-3-1.png" width="100%" />
+<img src="man/figures/README-unnamed-chunk-3-1.png" alt="Bar chart with 5 columns. class on the x-axis and count on the y-axis. class a, b, c, d, and e all have a height of 50." width="100%" />
and by setting `over_ratio = 0.5` we upsample any minority class with
less samples then 50% of the majority up to have 50% of the majority.
@@ -135,7 +135,7 @@ recipe(~., example_data) %>%
geom_bar()
```
-<img src="man/figures/README-unnamed-chunk-4-1.png" width="100%" />
+<img src="man/figures/README-unnamed-chunk-4-1.png" alt="Bar chart with 5 columns. class on the x-axis and count on the y-axis. Class a has height 25, b has 25, c has 30, d has 40, and e has 50." width="100%" />
### Downsample / Under-sampling
@@ -161,7 +161,7 @@ recipe(~., example_data) %>%
geom_bar()
```
-<img src="man/figures/README-unnamed-chunk-5-1.png" width="100%" />
+<img src="man/figures/README-unnamed-chunk-5-1.png" alt="Bar chart with 5 columns. class on the x-axis and count on the y-axis. Class a, b, c, d, and e all have a height of 10." width="100%" />
and by setting `under_ratio = 2` we downsample any majority class with
more then 200% samples of the minority class down to have to 200%
@@ -176,7 +176,7 @@ recipe(~., example_data) %>%
geom_bar()
```
-<img src="man/figures/README-unnamed-chunk-6-1.png" width="100%" />
+<img src="man/figures/README-unnamed-chunk-6-1.png" alt="Bar chart with 5 columns. class on the x-axis and count on the y-axis. Class a has height 10, b, c, d, and e have ha height of 20." width="100%" />
## Contributing
@@ -184,18 +184,18 @@ This project is released with a [Contributor Code of
Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html).
By contributing to this project, you agree to abide by its terms.
-- For questions and discussions about tidymodels packages, modeling,
- and machine learning, [join us on RStudio
- Community](https://community.rstudio.com/new-topic?category_id=15&tags=tidymodels,question).
+- For questions and discussions about tidymodels packages, modeling, and
+ machine learning, [join us on RStudio
+ Community](https://community.rstudio.com/new-topic?category_id=15&tags=tidymodels,question).
-- If you think you have encountered a bug, please [submit an
- issue](https://github.com/tidymodels/themis/issues).
+- If you think you have encountered a bug, please [submit an
+ issue](https://github.com/tidymodels/themis/issues).
-- Either way, learn how to create and share a
- [reprex](https://reprex.tidyverse.org/articles/articles/learn-reprex.html)
- (a minimal, reproducible example), to clearly communicate about your
- code.
+- Either way, learn how to create and share a
+ [reprex](https://reprex.tidyverse.org/articles/articles/learn-reprex.html)
+ (a minimal, reproducible example), to clearly communicate about your
+ code.
-- Check out further details on [contributing guidelines for tidymodels
- packages](https://www.tidymodels.org/contribute/) and [how to get
- help](https://www.tidymodels.org/help/).
+- Check out further details on [contributing guidelines for tidymodels
+ packages](https://www.tidymodels.org/contribute/) and [how to get
+ help](https://www.tidymodels.org/help/).
diff --git a/debian/changelog b/debian/changelog
index 70d81af..d4d4e64 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+r-cran-themis (1.0.1-1) UNRELEASED; urgency=low
+
+ * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk> Thu, 08 Jun 2023 04:22:49 -0000
+
r-cran-themis (1.0.0-1) unstable; urgency=medium
* New upstream version
diff --git a/man/figures/logo.png b/man/figures/logo.png
new file mode 100644
index 0000000..1bd4187
Binary files /dev/null and b/man/figures/logo.png differ
diff --git a/man/rmd/tunable-args.Rmd b/man/rmd/tunable-args.Rmd
new file mode 100644
index 0000000..7074e58
--- /dev/null
+++ b/man/rmd/tunable-args.Rmd
@@ -0,0 +1,34 @@
+```{r, include = FALSE}
+get_dials <- function(x) {
+ if (any(names(x) == "range")) {
+ cl <- rlang::call2(x$fun, .ns = x$pkg, range = x$range)
+ } else {
+ cl <- rlang::call2(x$fun, .ns = x$pkg)
+ }
+ rlang::eval_tidy(cl)
+}
+get_param_list <- function(x) {
+ args <- formals(x)
+ params <- getS3method("tunable", x)(list()) %>%
+ dplyr::mutate(
+ default = args[name],
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type),
+ item = glue::glue("- `{name}`: {label} (type: {type}, default: {default})\n\n")
+ )
+ params$item
+}
+```
+
+# Tuning Parameters
+
+```{r echo = FALSE}
+param <- get_param_list(step)
+```
+
+This step has `r length(param)` tuning parameters:
+
+```{r echo = FALSE, results = "asis"}
+param
+```
diff --git a/man/step_adasyn.Rd b/man/step_adasyn.Rd
index d7cb2ac..453ad95 100644
--- a/man/step_adasyn.Rd
+++ b/man/step_adasyn.Rd
@@ -83,6 +83,14 @@ When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble with columns \co
(the selectors or variables selected) will be returned.
}
+\section{Tuning Parameters}{
+This step has 2 tuning parameters:
+\itemize{
+\item \code{over_ratio}: Over-Sampling Ratio (type: double, default: 1)
+\item \code{neighbors}: # Nearest Neighbors (type: integer, default: 5)
+}
+}
+
\section{Case weights}{
diff --git a/man/step_bsmote.Rd b/man/step_bsmote.Rd
index f582965..b39b4a1 100644
--- a/man/step_bsmote.Rd
+++ b/man/step_bsmote.Rd
@@ -106,6 +106,15 @@ When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble with columns \co
(the selectors or variables selected) will be returned.
}
+\section{Tuning Parameters}{
+This step has 3 tuning parameters:
+\itemize{
+\item \code{over_ratio}: Over-Sampling Ratio (type: double, default: 1)
+\item \code{neighbors}: # Nearest Neighbors (type: integer, default: 5)
+\item \code{all_neighbors}: All Neighbors (type: logical, default: FALSE)
+}
+}
+
\section{Case weights}{
diff --git a/man/step_downsample.Rd b/man/step_downsample.Rd
index 1bb11a7..a280ab3 100644
--- a/man/step_downsample.Rd
+++ b/man/step_downsample.Rd
@@ -97,6 +97,13 @@ When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble with columns \co
(the selectors or variables selected) will be returned.
}
+\section{Tuning Parameters}{
+This step has 1 tuning parameters:
+\itemize{
+\item \code{under_ratio}: Under-Sampling Ratio (type: double, default: 1)
+}
+}
+
\section{Case weights}{
diff --git a/man/step_nearmiss.Rd b/man/step_nearmiss.Rd
index c00890d..32a1484 100644
--- a/man/step_nearmiss.Rd
+++ b/man/step_nearmiss.Rd
@@ -71,8 +71,8 @@ in the majority class based on their distance to other points in the
same class.
}
\details{
-This methods retained the points form the majority classes which has the
-smallest mean distance to the k nearest points in the other classes.
+This method retains the points from the majority class which have the
+smallest mean distance to the k nearest points in the minority class.
All columns in the data are sampled and returned by \code{\link[=juice]{juice()}}
and \code{\link[=bake]{bake()}}.
@@ -88,6 +88,14 @@ When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble with columns \co
(the selectors or variables selected) will be returned.
}
+\section{Tuning Parameters}{
+This step has 2 tuning parameters:
+\itemize{
+\item \code{under_ratio}: Under-Sampling Ratio (type: double, default: 1)
+\item \code{neighbors}: # Nearest Neighbors (type: integer, default: 5)
+}
+}
+
\section{Case weights}{
diff --git a/man/step_rose.Rd b/man/step_rose.Rd
index ec27eb0..dccd23d 100644
--- a/man/step_rose.Rd
+++ b/man/step_rose.Rd
@@ -102,6 +102,13 @@ When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble with columns \co
(the selectors or variables selected) will be returned.
}
+\section{Tuning Parameters}{
+This step has 1 tuning parameters:
+\itemize{
+\item \code{over_ratio}: Over-Sampling Ratio (type: double, default: 1)
+}
+}
+
\section{Case weights}{
diff --git a/man/step_smote.Rd b/man/step_smote.Rd
index bfd84d7..2b0cf97 100644
--- a/man/step_smote.Rd
+++ b/man/step_smote.Rd
@@ -91,6 +91,14 @@ When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble with columns \co
(the selectors or variables selected) will be returned.
}
+\section{Tuning Parameters}{
+This step has 2 tuning parameters:
+\itemize{
+\item \code{over_ratio}: Over-Sampling Ratio (type: double, default: 1)
+\item \code{neighbors}: # Nearest Neighbors (type: integer, default: 5)
+}
+}
+
\section{Case weights}{
diff --git a/man/step_smotenc.Rd b/man/step_smotenc.Rd
index 443f637..8b67fb1 100644
--- a/man/step_smotenc.Rd
+++ b/man/step_smotenc.Rd
@@ -92,6 +92,14 @@ When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble with columns \co
(the selectors or variables selected) will be returned.
}
+\section{Tuning Parameters}{
+This step has 2 tuning parameters:
+\itemize{
+\item \code{over_ratio}: Over-Sampling Ratio (type: double, default: 1)
+\item \code{neighbors}: # Nearest Neighbors (type: integer, default: 5)
+}
+}
+
\section{Case weights}{
diff --git a/man/step_upsample.Rd b/man/step_upsample.Rd
index dc69336..74e4c2a 100644
--- a/man/step_upsample.Rd
+++ b/man/step_upsample.Rd
@@ -92,6 +92,13 @@ When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble with columns \co
(the selectors or variables selected) will be returned.
}
+\section{Tuning Parameters}{
+This step has 1 tuning parameters:
+\itemize{
+\item \code{over_ratio}: Over-Sampling Ratio (type: double, default: 1)
+}
+}
+
\section{Case weights}{
diff --git a/man/themis-package.Rd b/man/themis-package.Rd
index 26d86f8..80c60da 100644
--- a/man/themis-package.Rd
+++ b/man/themis-package.Rd
@@ -6,6 +6,8 @@
\alias{themis-package}
\title{themis: Extra Recipes Steps for Dealing with Unbalanced Data}
\description{
+\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
+
A dataset with an uneven number of cases in each class is said to be unbalanced. Many models produce a subpar performance on unbalanced datasets. A dataset can be balanced by increasing the number of minority cases using SMOTE 2011 \href{https://arxiv.org/abs/1106.1813}{arXiv:1106.1813}, BorderlineSMOTE 2005 \doi{10.1007/11538059_91} and ADASYN 2008 \url{https://ieeexplore.ieee.org/document/4633969}. Or by decreasing the number of majority cases using NearMiss 2003 \url{https://www.site.uottawa.ca/~nat/Workshop2003/jzhang.pdf} or Tomek link removal 1976 \url{https://ieeexplore.ieee.org/document/4309452}.
}
\seealso{
@@ -13,13 +15,17 @@ Useful links:
\itemize{
\item \url{https://github.com/tidymodels/themis}
\item \url{https://themis.tidymodels.org}
- \item \url{https://themis.tidymodels.org/}
\item Report bugs at \url{https://github.com/tidymodels/themis/issues}
}
}
\author{
-\strong{Maintainer}: Emil Hvitfeldt \email{emilhhvitfeldt@gmail.com} (\href{https://orcid.org/0000-0002-0679-1945}{ORCID})
+\strong{Maintainer}: Emil Hvitfeldt \email{emil.hvitfeldt@posit.co} (\href{https://orcid.org/0000-0002-0679-1945}{ORCID})
+
+Other contributors:
+\itemize{
+ \item Posit Software, PBC [copyright holder, funder]
+}
}
\keyword{internal}
diff --git a/man/tunable.step_adasyn.Rd b/man/tunable_themis.Rd
similarity index 82%
rename from man/tunable.step_adasyn.Rd
rename to man/tunable_themis.Rd
index 25c920c..28b08b7 100644
--- a/man/tunable.step_adasyn.Rd
+++ b/man/tunable_themis.Rd
@@ -1,5 +1,7 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tuneable.R
+% Please edit documentation in R/step_adasyn.R, R/step_bsmote.R,
+% R/step_downsample.R, R/step_nearmiss.R, R/step_rose.R, R/step_smote.R,
+% R/step_smotenc.R, R/step_upsample.R, R/tunable.R
\name{tunable.step_adasyn}
\alias{tunable.step_adasyn}
\alias{tunable.step_bsmote}
@@ -7,8 +9,9 @@
\alias{tunable.step_nearmiss}
\alias{tunable.step_rose}
\alias{tunable.step_smote}
-\alias{tunable.step_upsample}
\alias{tunable.step_smotenc}
+\alias{tunable.step_upsample}
+\alias{tunable_themis}
\title{tunable methods for themis}
\usage{
\method{tunable}{step_adasyn}(x, ...)
@@ -23,9 +26,9 @@
\method{tunable}{step_smote}(x, ...)
-\method{tunable}{step_upsample}(x, ...)
-
\method{tunable}{step_smotenc}(x, ...)
+
+\method{tunable}{step_upsample}(x, ...)
}
\arguments{
\item{x}{A recipe step object}
diff --git a/tests/testthat/_snaps/adasyn_impl.md b/tests/testthat/_snaps/adasyn_impl.md
index 798d96a..1503f01 100644
--- a/tests/testthat/_snaps/adasyn_impl.md
+++ b/tests/testthat/_snaps/adasyn_impl.md
@@ -25,7 +25,7 @@
Code
adasyn(circle_example0, var = "class")
Error <rlang_error>
- `adasyn` cannot have any missing values. NAs found ind: x.
+ Cannot have any missing values. NAs found ind: x.
---
diff --git a/tests/testthat/_snaps/bsmote_impl.md b/tests/testthat/_snaps/bsmote_impl.md
index a739353..eced36d 100644
--- a/tests/testthat/_snaps/bsmote_impl.md
+++ b/tests/testthat/_snaps/bsmote_impl.md
@@ -25,7 +25,7 @@
Code
bsmote(circle_example0, var = "class")
Error <rlang_error>
- `bsmote` cannot have any missing values. NAs found ind: x.
+ Cannot have any missing values. NAs found ind: x.
---
diff --git a/tests/testthat/_snaps/extension_check.md b/tests/testthat/_snaps/extension_check.md
new file mode 100644
index 0000000..bd10fa0
--- /dev/null
+++ b/tests/testthat/_snaps/extension_check.md
@@ -0,0 +1,7 @@
+# recipes_extension_check
+
+ Code
+ recipes::recipes_extension_check(pkg = "themis")
+ Message <cliMessage>
+ v All steps have all method!
+
diff --git a/tests/testthat/_snaps/smote_impl.md b/tests/testthat/_snaps/smote_impl.md
index afb57c3..7095d21 100644
--- a/tests/testthat/_snaps/smote_impl.md
+++ b/tests/testthat/_snaps/smote_impl.md
@@ -25,7 +25,7 @@
Code
smote(circle_example0, var = "class")
Error <rlang_error>
- `smote` cannot have any missing values. NAs found ind: x.
+ Cannot have any missing values. NAs found ind: x.
---
diff --git a/tests/testthat/_snaps/smotenc.md b/tests/testthat/_snaps/smotenc.md
index 4e3ef3e..2622866 100644
--- a/tests/testthat/_snaps/smotenc.md
+++ b/tests/testthat/_snaps/smotenc.md
@@ -2,71 +2,69 @@
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 2
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
-
- SMOTENC based on class
+ -- Operations
+ * SMOTENC based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 2
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- SMOTENC based on class [trained]
+ -- Operations
+ * SMOTENC based on: class | Trained
# empty printing
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
-
- SMOTENC based on <none>
+ -- Operations
+ * SMOTENC based on: <none>
---
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 32 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- SMOTENC based on <none> [trained]
+ -- Operations
+ * SMOTENC based on: <none> | Trained
diff --git a/tests/testthat/_snaps/step_adasyn.md b/tests/testthat/_snaps/step_adasyn.md
index 7ffa118..521c319 100644
--- a/tests/testthat/_snaps/step_adasyn.md
+++ b/tests/testthat/_snaps/step_adasyn.md
@@ -2,106 +2,114 @@
Code
recipe(Status ~ Age, data = credit_data0) %>% step_adasyn(Status) %>% prep()
- Error <rlang_error>
- Not enough observations of 'dummy' to perform ADASYN.
+ Error <recipes_error_step>
+ Error in `step_adasyn()`:
+ Caused by error in `bake()`:
+ ! Not enough observations of 'dummy' to perform ADASYN.
# printing
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 2
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
-
- adasyn based on class
+ -- Operations
+ * adasyn based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 2
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- adasyn based on class [trained]
+ -- Operations
+ * adasyn based on: class | Trained
# bad data
Code
rec %>% step_adasyn(x) %>% prep()
- Error <rlang_error>
- `x` should be a factor variable.
+ Error <recipes_error_step>
+ Error in `step_adasyn()`:
+ Caused by error in `prep()`:
+ ! `x` should be a factor variable.
---
Code
rec %>% step_adasyn(class, id) %>% prep()
- Error <rlang_error>
- The selector should select at most a single variable
+ Error <recipes_error_step>
+ Error in `step_adasyn()`:
+ Caused by error in `prep()`:
+ ! The selector should select at most a single variable
# errors if character are present
Code
recipe(~., data = df_char) %>% step_adasyn(x) %>% prep()
- Error <rlang_error>
- All columns selected for the step should be numeric
+ Error <recipes_error_step>
+ Error in `step_adasyn()`:
+ Caused by error in `prep()`:
+ ! All columns selected for the step should be double, or integer.
# NA in response
Code
recipe(Job ~ Age, data = credit_data) %>% step_adasyn(Job) %>% prep()
- Error <rlang_error>
- `step_adasyn` cannot have any missing values. NAs found ind: Job.
+ Error <recipes_error_step>
+ Error in `step_adasyn()`:
+ Caused by error in `prep()`:
+ ! Cannot have any missing values. NAs found ind: Job.
# empty printing
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
-
- adasyn based on <none>
+ -- Operations
+ * adasyn based on: <none>
---
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 32 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- adasyn based on <none> [trained]
+ -- Operations
+ * adasyn based on: <none> | Trained
diff --git a/tests/testthat/_snaps/step_bsmote.md b/tests/testthat/_snaps/step_bsmote.md
index 8a197df..e506a2d 100644
--- a/tests/testthat/_snaps/step_bsmote.md
+++ b/tests/testthat/_snaps/step_bsmote.md
@@ -2,99 +2,105 @@
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 2
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
-
- BorderlineSMOTE based on class
+ -- Operations
+ * BorderlineSMOTE based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 2
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- BorderlineSMOTE based on class [trained]
+ -- Operations
+ * BorderlineSMOTE based on: class | Trained
# bad data
Code
rec %>% step_bsmote(x) %>% prep()
- Error <rlang_error>
- `x` should be a factor variable.
+ Error <recipes_error_step>
+ Error in `step_bsmote()`:
+ Caused by error in `prep()`:
+ ! `x` should be a factor variable.
---
Code
rec %>% step_bsmote(class, id) %>% prep()
- Error <rlang_error>
- The selector should select at most a single variable
+ Error <recipes_error_step>
+ Error in `step_bsmote()`:
+ Caused by error in `prep()`:
+ ! The selector should select at most a single variable
# errors if character are present
Code
recipe(~., data = df_char) %>% step_bsmote(x) %>% prep()
- Error <rlang_error>
- All columns selected for the step should be numeric
+ Error <recipes_error_step>
+ Error in `step_bsmote()`:
+ Caused by error in `prep()`:
+ ! All columns selected for the step should be double, or integer.
# NA in response
Code
recipe(Job ~ Age, data = credit_data) %>% step_bsmote(Job) %>% prep()
- Error <rlang_error>
- `step_bsmote` cannot have any missing values. NAs found ind: Job.
+ Error <recipes_error_step>
+ Error in `step_bsmote()`:
+ Caused by error in `prep()`:
+ ! Cannot have any missing values. NAs found ind: Job.
# empty printing
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
-
- BorderlineSMOTE based on <none>
+ -- Operations
+ * BorderlineSMOTE based on: <none>
---
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 32 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- BorderlineSMOTE based on <none> [trained]
+ -- Operations
+ * BorderlineSMOTE based on: <none> | Trained
diff --git a/tests/testthat/_snaps/step_downsample.md b/tests/testthat/_snaps/step_downsample.md
index 22f5d63..6958650 100644
--- a/tests/testthat/_snaps/step_downsample.md
+++ b/tests/testthat/_snaps/step_downsample.md
@@ -4,127 +4,129 @@
new_rec <- recipe(~., data = circle_example) %>% step_downsample(class, ratio = 2)
Error <lifecycle_error_deprecated>
The `ratio` argument of `step_downsample()` was deprecated in themis 0.2.0 and is now defunct.
- Please use the `under_ratio` argument instead.
+ i Please use the `under_ratio` argument instead.
# printing
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- predictor 4
+ -- Inputs
+ Number of variables by role
+ predictor: 4
- Operations:
-
- Down-sampling based on class
+ -- Operations
+ * Down-sampling based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- predictor 4
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ predictor: 4
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- Down-sampling based on class [trained]
+ -- Operations
+ * Down-sampling based on: class | Trained
# bad data
Code
rec %>% step_downsample(x) %>% prep()
- Error <rlang_error>
- `x` should be a factor variable.
+ Error <recipes_error_step>
+ Error in `step_downsample()`:
+ Caused by error in `prep()`:
+ ! `x` should be a factor variable.
---
Code
rec %>% step_downsample(class, id) %>% prep()
- Error <rlang_error>
- The selector should select at most a single variable
+ Error <recipes_error_step>
+ Error in `step_downsample()`:
+ Caused by error in `prep()`:
+ ! The selector should select at most a single variable
# empty printing
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Operations:
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Down-sampling based on <none>
+ -- Operations
+ * Down-sampling based on: <none>
---
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Training data contained 32 data points and no missing data.
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- Operations:
-
- Down-sampling based on <none> [trained]
+ -- Operations
+ * Down-sampling based on: <none> | Trained
# case_weights
Code
rec1_p
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- case_weights 1
- predictor 4
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ predictor: 4
+ case_weights: 1
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- Down-sampling based on class [weighted, trained]
+ -- Operations
+ * Down-sampling based on: class | Trained, weighted
---
Code
rec1_p
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- case_weights 1
- predictor 4
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ predictor: 4
+ case_weights: 1
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- Down-sampling based on class [ignored weights, trained]
+ -- Operations
+ * Down-sampling based on: class | Trained, ignored weights
diff --git a/tests/testthat/_snaps/step_nearmiss.md b/tests/testthat/_snaps/step_nearmiss.md
index 88cd8f7..9294d52 100644
--- a/tests/testthat/_snaps/step_nearmiss.md
+++ b/tests/testthat/_snaps/step_nearmiss.md
@@ -2,99 +2,105 @@
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 2
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
-
- NEARMISS-1 based on class
+ -- Operations
+ * NEARMISS-1 based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 2
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- NEARMISS-1 based on class [trained]
+ -- Operations
+ * NEARMISS-1 based on: class | Trained
# bad data
Code
rec %>% step_nearmiss(x) %>% prep()
- Error <rlang_error>
- `x` should be a factor variable.
+ Error <recipes_error_step>
+ Error in `step_nearmiss()`:
+ Caused by error in `prep()`:
+ ! `x` should be a factor variable.
---
Code
rec %>% step_nearmiss(class, id) %>% prep()
- Error <rlang_error>
- The selector should select at most a single variable
+ Error <recipes_error_step>
+ Error in `step_nearmiss()`:
+ Caused by error in `prep()`:
+ ! The selector should select at most a single variable
# errors if character are present
Code
recipe(~., data = df_char) %>% step_nearmiss(x) %>% prep()
- Error <rlang_error>
- All columns selected for the step should be numeric
+ Error <recipes_error_step>
+ Error in `step_nearmiss()`:
+ Caused by error in `prep()`:
+ ! All columns selected for the step should be double, or integer.
# NA in response
Code
recipe(Job ~ Age, data = credit_data) %>% step_nearmiss(Job) %>% prep()
- Error <rlang_error>
- `step_nearmiss` cannot have any missing values. NAs found ind: Job.
+ Error <recipes_error_step>
+ Error in `step_nearmiss()`:
+ Caused by error in `prep()`:
+ ! Cannot have any missing values. NAs found ind: Job.
# empty printing
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
-
- NEARMISS-1 based on <none>
+ -- Operations
+ * NEARMISS-1 based on: <none>
---
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 32 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- NEARMISS-1 based on <none> [trained]
+ -- Operations
+ * NEARMISS-1 based on: <none> | Trained
diff --git a/tests/testthat/_snaps/step_rose.md b/tests/testthat/_snaps/step_rose.md
index 2a746d5..9228807 100644
--- a/tests/testthat/_snaps/step_rose.md
+++ b/tests/testthat/_snaps/step_rose.md
@@ -2,99 +2,105 @@
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 2
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
-
- ROSE based on class
+ -- Operations
+ * ROSE based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 2
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- ROSE based on class [trained]
+ -- Operations
+ * ROSE based on: class | Trained
# bad data
Code
rec %>% step_rose(x) %>% prep()
- Error <rlang_error>
- `x` should be a factor variable.
+ Error <recipes_error_step>
+ Error in `step_rose()`:
+ Caused by error in `prep()`:
+ ! `x` should be a factor variable.
---
Code
rec %>% step_rose(class, id) %>% prep()
- Error <rlang_error>
- The selector should select at most a single variable
+ Error <recipes_error_step>
+ Error in `step_rose()`:
+ Caused by error in `prep()`:
+ ! The selector should select at most a single variable
# NA in response
Code
recipe(Status ~ Age, data = credit_data0) %>% step_rose(Status) %>% prep()
- Error <rlang_error>
- `step_rose` cannot have any missing values. NAs found ind: Status.
+ Error <recipes_error_step>
+ Error in `step_rose()`:
+ Caused by error in `prep()`:
+ ! Cannot have any missing values. NAs found ind: Status.
# only except 2 classes
Code
recipe(~., data = df_char) %>% step_rose(x) %>% prep()
- Error <rlang_error>
- `x` must only have 2 levels.
+ Error <recipes_error_step>
+ Error in `step_rose()`:
+ Caused by error in `prep()`:
+ ! `x` must only have 2 levels.
# empty printing
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
-
- ROSE based on <none>
+ -- Operations
+ * ROSE based on: <none>
---
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 32 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- ROSE based on <none> [trained]
+ -- Operations
+ * ROSE based on: <none> | Trained
diff --git a/tests/testthat/_snaps/step_smote.md b/tests/testthat/_snaps/step_smote.md
index b6a5257..ae346f9 100644
--- a/tests/testthat/_snaps/step_smote.md
+++ b/tests/testthat/_snaps/step_smote.md
@@ -2,106 +2,114 @@
Code
recipe(Status ~ Age, data = credit_data0) %>% step_smote(Status) %>% prep()
- Error <rlang_error>
- Not enough observations of 'dummy' to perform SMOTE.
+ Error <recipes_error_step>
+ Error in `step_smote()`:
+ Caused by error in `bake()`:
+ ! Not enough observations of 'dummy' to perform SMOTE.
# printing
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 2
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
-
- SMOTE based on class
+ -- Operations
+ * SMOTE based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 2
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- SMOTE based on class [trained]
+ -- Operations
+ * SMOTE based on: class | Trained
# bad data
Code
rec %>% step_smote(x) %>% prep()
- Error <rlang_error>
- `x` should be a factor variable.
+ Error <recipes_error_step>
+ Error in `step_smote()`:
+ Caused by error in `prep()`:
+ ! `x` should be a factor variable.
---
Code
rec %>% step_smote(class, id) %>% prep()
- Error <rlang_error>
- The selector should select at most a single variable
+ Error <recipes_error_step>
+ Error in `step_smote()`:
+ Caused by error in `prep()`:
+ ! The selector should select at most a single variable
# errors if character are present
Code
recipe(~., data = df_char) %>% step_smote(x) %>% prep()
- Error <rlang_error>
- All columns selected for the step should be numeric
+ Error <recipes_error_step>
+ Error in `step_smote()`:
+ Caused by error in `prep()`:
+ ! All columns selected for the step should be double, or integer.
# NA in response
Code
recipe(Job ~ Age, data = credit_data) %>% step_smote(Job) %>% prep()
- Error <rlang_error>
- `step_smote` cannot have any missing values. NAs found ind: Job.
+ Error <recipes_error_step>
+ Error in `step_smote()`:
+ Caused by error in `prep()`:
+ ! Cannot have any missing values. NAs found ind: Job.
# empty printing
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
-
- SMOTE based on <none>
+ -- Operations
+ * SMOTE based on: <none>
---
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 32 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- SMOTE based on <none> [trained]
+ -- Operations
+ * SMOTE based on: <none> | Trained
diff --git a/tests/testthat/_snaps/step_tomek.md b/tests/testthat/_snaps/step_tomek.md
index e4b5587..b64dcce 100644
--- a/tests/testthat/_snaps/step_tomek.md
+++ b/tests/testthat/_snaps/step_tomek.md
@@ -2,99 +2,105 @@
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 2
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
-
- Tomek based on class
+ -- Operations
+ * Tomek based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 2
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 2
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- Tomek based on class [trained]
+ -- Operations
+ * Tomek based on: class | Trained
# bad data
Code
rec %>% step_smote(x) %>% prep()
- Error <rlang_error>
- `x` should be a factor variable.
+ Error <recipes_error_step>
+ Error in `step_smote()`:
+ Caused by error in `prep()`:
+ ! `x` should be a factor variable.
---
Code
rec %>% step_smote(class, id) %>% prep()
- Error <rlang_error>
- The selector should select at most a single variable
+ Error <recipes_error_step>
+ Error in `step_smote()`:
+ Caused by error in `prep()`:
+ ! The selector should select at most a single variable
# errors if character are present
Code
recipe(~., data = df_char) %>% step_tomek(x) %>% prep()
- Error <rlang_error>
- All columns selected for the step should be numeric
+ Error <recipes_error_step>
+ Error in `step_tomek()`:
+ Caused by error in `prep()`:
+ ! All columns selected for the step should be double, or integer.
# NA in response
Code
recipe(Status ~ Age, data = credit_data0) %>% step_tomek(Status) %>% prep()
- Error <rlang_error>
- `step_tomek` cannot have any missing values. NAs found ind: Status.
+ Error <recipes_error_step>
+ Error in `step_tomek()`:
+ Caused by error in `prep()`:
+ ! Cannot have any missing values. NAs found ind: Status.
# empty printing
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
-
- Tomek based on <none>
+ -- Operations
+ * Tomek based on: <none>
---
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 32 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Operations:
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- Tomek based on <none> [trained]
+ -- Operations
+ * Tomek based on: <none> | Trained
diff --git a/tests/testthat/_snaps/step_upsample.md b/tests/testthat/_snaps/step_upsample.md
index 04a87aa..f3f14e1 100644
--- a/tests/testthat/_snaps/step_upsample.md
+++ b/tests/testthat/_snaps/step_upsample.md
@@ -4,127 +4,129 @@
new_rec <- recipe(~., data = circle_example) %>% step_upsample(class, ratio = 2)
Error <lifecycle_error_deprecated>
The `ratio` argument of `step_downsample()` was deprecated in themis 0.2.0 and is now defunct.
- Please use the `over_ratio` argument instead.
+ i Please use the `over_ratio` argument instead.
# printing
Code
print(rec)
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- predictor 4
+ -- Inputs
+ Number of variables by role
+ predictor: 4
- Operations:
-
- Up-sampling based on class
+ -- Operations
+ * Up-sampling based on: class
---
Code
prep(rec)
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- predictor 4
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ predictor: 4
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- Up-sampling based on class [trained]
+ -- Operations
+ * Up-sampling based on: class | Trained
# bad data
Code
rec %>% step_upsample(x) %>% prep()
- Error <rlang_error>
- `x` should be a factor variable.
+ Error <recipes_error_step>
+ Error in `step_upsample()`:
+ Caused by error in `prep()`:
+ ! `x` should be a factor variable.
---
Code
rec %>% step_upsample(class, id) %>% prep()
- Error <rlang_error>
- The selector should select at most a single variable
+ Error <recipes_error_step>
+ Error in `step_upsample()`:
+ Caused by error in `prep()`:
+ ! The selector should select at most a single variable
# empty printing
Code
rec
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- outcome 1
- predictor 10
+ -- Recipe ----------------------------------------------------------------------
- Operations:
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Up-sampling based on <none>
+ -- Operations
+ * Up-sampling based on: <none>
---
Code
rec
- Output
- Recipe
+ Message <cliMessage>
- Inputs:
+ -- Recipe ----------------------------------------------------------------------
- role #variables
- outcome 1
- predictor 10
+ -- Inputs
+ Number of variables by role
+ outcome: 1
+ predictor: 10
- Training data contained 32 data points and no missing data.
+ -- Training information
+ Training data contained 32 data points and no incomplete rows.
- Operations:
-
- Up-sampling based on <none> [trained]
+ -- Operations
+ * Up-sampling based on: <none> | Trained
# case_weights
Code
rec1_p
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- case_weights 1
- predictor 4
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ predictor: 4
+ case_weights: 1
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- Up-sampling based on class [weighted, trained]
+ -- Operations
+ * Up-sampling based on: class | Trained, weighted
---
Code
rec1_p
- Output
- Recipe
-
- Inputs:
+ Message <cliMessage>
- role #variables
- case_weights 1
- predictor 4
+ -- Recipe ----------------------------------------------------------------------
- Training data contained 400 data points and no missing data.
+ -- Inputs
+ Number of variables by role
+ predictor: 4
+ case_weights: 1
- Operations:
+ -- Training information
+ Training data contained 400 data points and no incomplete rows.
- Up-sampling based on class [ignored weights, trained]
+ -- Operations
+ * Up-sampling based on: class | Trained, ignored weights
diff --git a/tests/testthat/_snaps/tomek_impl.md b/tests/testthat/_snaps/tomek_impl.md
index 998dcf1..4e3d3f8 100644
--- a/tests/testthat/_snaps/tomek_impl.md
+++ b/tests/testthat/_snaps/tomek_impl.md
@@ -25,5 +25,5 @@
Code
tomek(circle_example0, var = "class")
Error <rlang_error>
- `tomek` cannot have any missing values. NAs found ind: x.
+ Cannot have any missing values. NAs found ind: x.
diff --git a/tests/testthat/test-extension_check.R b/tests/testthat/test-extension_check.R
new file mode 100644
index 0000000..c2e3627
--- /dev/null
+++ b/tests/testthat/test-extension_check.R
@@ -0,0 +1,7 @@
+test_that("recipes_extension_check", {
+ expect_snapshot(
+ recipes::recipes_extension_check(
+ pkg = "themis"
+ )
+ )
+})
diff --git a/tests/testthat/test-smote_impl.R b/tests/testthat/test-smote_impl.R
index 868842c..5c1c2cf 100644
--- a/tests/testthat/test-smote_impl.R
+++ b/tests/testthat/test-smote_impl.R
@@ -69,3 +69,13 @@ test_that("ordering of columns shouldn't matter", {
NA
)
})
+
+test_that("Doesn't error if no upsampling is done (#119)", {
+ dat <- data.frame(
+ outcome = c(rep("X", 101), rep("Z", 50)),
+ X1 = 1)
+
+ expect_no_error(
+ smote_impl(dat, "outcome", 5, over_ratio = 0.5)
+ )
+})
diff --git a/tests/testthat/test-smotenc.R b/tests/testthat/test-smotenc.R
index 1622796..0923794 100644
--- a/tests/testthat/test-smotenc.R
+++ b/tests/testthat/test-smotenc.R
@@ -3,21 +3,6 @@ library(recipes)
library(dplyr)
library(modeldata)
-test_that("tunable", {
- rec <-
- recipe(~., data = mtcars) %>%
- step_smotenc(all_predictors(), under_ratio = 1)
- rec_param <- tunable.step_smote(rec$steps[[1]])
- expect_equal(rec_param$name, c("over_ratio", "neighbors"))
- expect_true(all(rec_param$source == "recipe"))
- expect_true(is.list(rec_param$call_info))
- expect_equal(nrow(rec_param), 2)
- expect_equal(
- names(rec_param),
- c("name", "call_info", "source", "component", "component_id")
- )
-})
-
test_that("errors if there isn't enough data", {
data("credit_data")
credit_data0 <- credit_data
@@ -305,3 +290,42 @@ test_that("empty printing", {
expect_snapshot(rec)
})
+
+test_that("Doesn't error if no upsampling is done (#119)", {
+ dat <- data.frame(
+ outcome = c(rep("X", 101), rep("Z", 50)),
+ X1 = 1
+ )
+
+ expect_no_error(
+ smotenc_impl(dat, "outcome", 5, over_ratio = 0.5)
+ )
+})
+
+test_that("tunable", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_smotenc(all_predictors())
+ rec_param <- tunable.step_smotenc(rec$steps[[1]])
+ expect_equal(rec_param$name, c("over_ratio", "neighbors"))
+ expect_true(all(rec_param$source == "recipe"))
+ expect_true(is.list(rec_param$call_info))
+ expect_equal(nrow(rec_param), 2)
+ expect_equal(
+ names(rec_param),
+ c("name", "call_info", "source", "component", "component_id")
+ )
+})
+
+test_that("tunable is setup to works with extract_parameter_set_dials works", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_smotenc(
+ all_predictors(),
+ over_ratio = hardhat::tune(),
+ neighbors = hardhat::tune()
+ )
+
+ params <- extract_parameter_set_dials(rec)
+
+ expect_s3_class(params, "parameters")
+ expect_identical(nrow(params), 2L)
+})
diff --git a/tests/testthat/test-step_adasyn.R b/tests/testthat/test-step_adasyn.R
index 3ebbc60..1f97b60 100644
--- a/tests/testthat/test-step_adasyn.R
+++ b/tests/testthat/test-step_adasyn.R
@@ -3,20 +3,6 @@ library(recipes)
library(dplyr)
library(modeldata)
-test_that("tunable", {
- rec <- recipe(~., data = mtcars) %>%
- step_adasyn(all_predictors(), under_ratio = 1)
- rec_param <- tunable.step_adasyn(rec$steps[[1]])
- expect_equal(rec_param$name, c("over_ratio", "neighbors"))
- expect_true(all(rec_param$source == "recipe"))
- expect_true(is.list(rec_param$call_info))
- expect_equal(nrow(rec_param), 2)
- expect_equal(
- names(rec_param),
- c("name", "call_info", "source", "component", "component_id")
- )
-})
-
test_that("errors if there isn't enough data", {
data("credit_data")
credit_data0 <- credit_data
@@ -299,3 +285,31 @@ test_that("empty printing", {
expect_snapshot(rec)
})
+
+test_that("tunable", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_adasyn(all_predictors())
+ rec_param <- tunable.step_adasyn(rec$steps[[1]])
+ expect_equal(rec_param$name, c("over_ratio", "neighbors"))
+ expect_true(all(rec_param$source == "recipe"))
+ expect_true(is.list(rec_param$call_info))
+ expect_equal(nrow(rec_param), 2)
+ expect_equal(
+ names(rec_param),
+ c("name", "call_info", "source", "component", "component_id")
+ )
+})
+
+test_that("tunable is setup to works with extract_parameter_set_dials works", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_adasyn(
+ all_predictors(),
+ over_ratio = hardhat::tune(),
+ neighbors = hardhat::tune()
+ )
+
+ params <- extract_parameter_set_dials(rec)
+
+ expect_s3_class(params, "parameters")
+ expect_identical(nrow(params), 2L)
+})
diff --git a/tests/testthat/test-step_bsmote.R b/tests/testthat/test-step_bsmote.R
index 42db460..154d638 100644
--- a/tests/testthat/test-step_bsmote.R
+++ b/tests/testthat/test-step_bsmote.R
@@ -18,21 +18,6 @@ test_that("all minority classes are upsampled", {
expect_true(all(max(table(rec1_p2$species)) == 152))
})
-test_that("tunable", {
- rec <-
- recipe(~., data = mtcars) %>%
- step_bsmote(all_predictors(), under_ratio = 1)
- rec_param <- tunable.step_bsmote(rec$steps[[1]])
- expect_equal(rec_param$name, c("over_ratio", "neighbors", "all_neighbors"))
- expect_true(all(rec_param$source == "recipe"))
- expect_true(is.list(rec_param$call_info))
- expect_equal(nrow(rec_param), 3)
- expect_equal(
- names(rec_param),
- c("name", "call_info", "source", "component", "component_id")
- )
-})
-
test_that("basic usage", {
rec1 <- recipe(class ~ x + y, data = circle_example) %>%
step_bsmote(class, all_neighbors = FALSE)
@@ -385,3 +370,32 @@ test_that("empty printing", {
expect_snapshot(rec)
})
+
+test_that("tunable", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_bsmote(all_predictors())
+ rec_param <- tunable.step_bsmote(rec$steps[[1]])
+ expect_equal(rec_param$name, c("over_ratio", "neighbors", "all_neighbors"))
+ expect_true(all(rec_param$source == "recipe"))
+ expect_true(is.list(rec_param$call_info))
+ expect_equal(nrow(rec_param), 3)
+ expect_equal(
+ names(rec_param),
+ c("name", "call_info", "source", "component", "component_id")
+ )
+})
+
+test_that("tunable is setup to works with extract_parameter_set_dials works", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_bsmote(
+ all_predictors(),
+ over_ratio = hardhat::tune(),
+ neighbors = hardhat::tune(),
+ all_neighbors = hardhat::tune()
+ )
+
+ params <- extract_parameter_set_dials(rec)
+
+ expect_s3_class(params, "parameters")
+ expect_identical(nrow(params), 3L)
+})
diff --git a/tests/testthat/test-step_downsample.R b/tests/testthat/test-step_downsample.R
index 45cedc8..5380e93 100644
--- a/tests/testthat/test-step_downsample.R
+++ b/tests/testthat/test-step_downsample.R
@@ -12,20 +12,6 @@ test_that("ratio deprecation", {
)
})
-test_that("tunable", {
- rec <- recipe(~., data = mtcars) %>%
- step_downsample(all_predictors(), under_ratio = 1)
- rec_param <- tunable.step_downsample(rec$steps[[1]])
- expect_equal(rec_param$name, c("under_ratio"))
- expect_true(all(rec_param$source == "recipe"))
- expect_true(is.list(rec_param$call_info))
- expect_equal(nrow(rec_param), 1)
- expect_equal(
- names(rec_param),
- c("name", "call_info", "source", "component", "component_id")
- )
-})
-
test_that("basic usage", {
rec1 <- recipe(~., data = circle_example) %>%
step_downsample(class)
@@ -286,3 +272,30 @@ test_that("case_weights", {
expect_snapshot(rec1_p)
})
+
+test_that("tunable", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_downsample(all_predictors())
+ rec_param <- tunable.step_downsample(rec$steps[[1]])
+ expect_equal(rec_param$name, c("under_ratio"))
+ expect_true(all(rec_param$source == "recipe"))
+ expect_true(is.list(rec_param$call_info))
+ expect_equal(nrow(rec_param), 1)
+ expect_equal(
+ names(rec_param),
+ c("name", "call_info", "source", "component", "component_id")
+ )
+})
+
+test_that("tunable is setup to works with extract_parameter_set_dials works", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_downsample(
+ all_predictors(),
+ under_ratio = hardhat::tune()
+ )
+
+ params <- extract_parameter_set_dials(rec)
+
+ expect_s3_class(params, "parameters")
+ expect_identical(nrow(params), 1L)
+})
diff --git a/tests/testthat/test-step_nearmiss.R b/tests/testthat/test-step_nearmiss.R
index 1257950..2f3c441 100644
--- a/tests/testthat/test-step_nearmiss.R
+++ b/tests/testthat/test-step_nearmiss.R
@@ -5,21 +5,6 @@ library(modeldata)
set.seed(1234)
-test_that("tunable", {
- rec <-
- recipe(~., data = mtcars) %>%
- step_nearmiss(all_predictors(), under_ratio = 1)
- rec_param <- tunable.step_nearmiss(rec$steps[[1]])
- expect_equal(rec_param$name, c("under_ratio", "neighbors"))
- expect_true(all(rec_param$source == "recipe"))
- expect_true(is.list(rec_param$call_info))
- expect_equal(nrow(rec_param), 2)
- expect_equal(
- names(rec_param),
- c("name", "call_info", "source", "component", "component_id")
- )
-})
-
test_that("basic usage", {
rec1 <- recipe(class ~ x + y, data = circle_example) %>%
step_nearmiss(class)
@@ -250,3 +235,31 @@ test_that("empty printing", {
expect_snapshot(rec)
})
+
+test_that("tunable", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_nearmiss(all_predictors())
+ rec_param <- tunable.step_nearmiss(rec$steps[[1]])
+ expect_equal(rec_param$name, c("under_ratio", "neighbors"))
+ expect_true(all(rec_param$source == "recipe"))
+ expect_true(is.list(rec_param$call_info))
+ expect_equal(nrow(rec_param), 2)
+ expect_equal(
+ names(rec_param),
+ c("name", "call_info", "source", "component", "component_id")
+ )
+})
+
+test_that("tunable is setup to works with extract_parameter_set_dials works", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_nearmiss(
+ all_predictors(),
+ under_ratio = hardhat::tune(),
+ neighbors = hardhat::tune()
+ )
+
+ params <- extract_parameter_set_dials(rec)
+
+ expect_s3_class(params, "parameters")
+ expect_identical(nrow(params), 2L)
+})
diff --git a/tests/testthat/test-step_rose.R b/tests/testthat/test-step_rose.R
index e7686b4..7edb32a 100644
--- a/tests/testthat/test-step_rose.R
+++ b/tests/testthat/test-step_rose.R
@@ -24,21 +24,6 @@ test_that("minority_prop value", {
expect_lt(tr_xtab1[["Circle"]], tr_xtab2[["Circle"]])
})
-test_that("tunable", {
- rec <-
- recipe(~., data = mtcars) %>%
- step_rose(all_predictors(), under_ratio = 1)
- rec_param <- tunable.step_rose(rec$steps[[1]])
- expect_equal(rec_param$name, c("over_ratio"))
- expect_true(all(rec_param$source == "recipe"))
- expect_true(is.list(rec_param$call_info))
- expect_equal(nrow(rec_param), 1)
- expect_equal(
- names(rec_param),
- c("name", "call_info", "source", "component", "component_id")
- )
-})
-
test_that("row matching works correctly #36", {
expect_error(
recipe(class ~ ., data = circle_example) %>%
@@ -279,3 +264,30 @@ test_that("empty printing", {
expect_snapshot(rec)
})
+
+test_that("tunable", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_rose(all_predictors())
+ rec_param <- tunable.step_rose(rec$steps[[1]])
+ expect_equal(rec_param$name, c("over_ratio"))
+ expect_true(all(rec_param$source == "recipe"))
+ expect_true(is.list(rec_param$call_info))
+ expect_equal(nrow(rec_param), 1)
+ expect_equal(
+ names(rec_param),
+ c("name", "call_info", "source", "component", "component_id")
+ )
+})
+
+test_that("tunable is setup to works with extract_parameter_set_dials works", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_rose(
+ all_predictors(),
+ over_ratio = hardhat::tune()
+ )
+
+ params <- extract_parameter_set_dials(rec)
+
+ expect_s3_class(params, "parameters")
+ expect_identical(nrow(params), 1L)
+})
diff --git a/tests/testthat/test-step_smote.R b/tests/testthat/test-step_smote.R
index e126e44..772e6a7 100644
--- a/tests/testthat/test-step_smote.R
+++ b/tests/testthat/test-step_smote.R
@@ -3,21 +3,6 @@ library(recipes)
library(dplyr)
library(modeldata)
-test_that("tunable", {
- rec <-
- recipe(~., data = mtcars) %>%
- step_smote(all_predictors(), under_ratio = 1)
- rec_param <- tunable.step_smote(rec$steps[[1]])
- expect_equal(rec_param$name, c("over_ratio", "neighbors"))
- expect_true(all(rec_param$source == "recipe"))
- expect_true(is.list(rec_param$call_info))
- expect_equal(nrow(rec_param), 2)
- expect_equal(
- names(rec_param),
- c("name", "call_info", "source", "component", "component_id")
- )
-})
-
test_that("errors if there isn't enough data", {
data("credit_data")
credit_data0 <- credit_data
@@ -297,3 +282,31 @@ test_that("empty printing", {
expect_snapshot(rec)
})
+
+test_that("tunable", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_smote(all_predictors())
+ rec_param <- tunable.step_smote(rec$steps[[1]])
+ expect_equal(rec_param$name, c("over_ratio", "neighbors"))
+ expect_true(all(rec_param$source == "recipe"))
+ expect_true(is.list(rec_param$call_info))
+ expect_equal(nrow(rec_param), 2)
+ expect_equal(
+ names(rec_param),
+ c("name", "call_info", "source", "component", "component_id")
+ )
+})
+
+test_that("tunable is setup to works with extract_parameter_set_dials works", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_smote(
+ all_predictors(),
+ over_ratio = hardhat::tune(),
+ neighbors = hardhat::tune()
+ )
+
+ params <- extract_parameter_set_dials(rec)
+
+ expect_s3_class(params, "parameters")
+ expect_identical(nrow(params), 2L)
+})
diff --git a/tests/testthat/test-step_upsample.R b/tests/testthat/test-step_upsample.R
index 772cc25..1fb4aca 100644
--- a/tests/testthat/test-step_upsample.R
+++ b/tests/testthat/test-step_upsample.R
@@ -12,21 +12,6 @@ test_that("ratio deprecation", {
)
})
-test_that("tunable", {
- rec <-
- recipe(~., data = mtcars) %>%
- step_upsample(all_predictors())
- rec_param <- tunable.step_upsample(rec$steps[[1]])
- expect_equal(rec_param$name, c("over_ratio"))
- expect_true(all(rec_param$source == "recipe"))
- expect_true(is.list(rec_param$call_info))
- expect_equal(nrow(rec_param), 1)
- expect_equal(
- names(rec_param),
- c("name", "call_info", "source", "component", "component_id")
- )
-})
-
test_that("basic usage", {
rec1 <- recipe(~., data = circle_example) %>%
step_upsample(class)
@@ -286,3 +271,30 @@ test_that("case_weights", {
expect_snapshot(rec1_p)
})
+
+test_that("tunable", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_upsample(all_predictors())
+ rec_param <- tunable.step_upsample(rec$steps[[1]])
+ expect_equal(rec_param$name, c("over_ratio"))
+ expect_true(all(rec_param$source == "recipe"))
+ expect_true(is.list(rec_param$call_info))
+ expect_equal(nrow(rec_param), 1)
+ expect_equal(
+ names(rec_param),
+ c("name", "call_info", "source", "component", "component_id")
+ )
+})
+
+test_that("tunable is setup to works with extract_parameter_set_dials works", {
+ rec <- recipe(~., data = mtcars) %>%
+ step_upsample(
+ all_predictors(),
+ over_ratio = hardhat::tune()
+ )
+
+ params <- extract_parameter_set_dials(rec)
+
+ expect_s3_class(params, "parameters")
+ expect_identical(nrow(params), 1L)
+})