mlr-org
diff --git a/‎DESCRIPTION‎
Lines changed: 4 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions b/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/TuningSpace.R‎
Lines changed: 30 additions & 9 deletions b/‎R/TuningSpace.R‎
Lines changed: 30 additions & 9 deletions
diff --git a/‎R/bibentries.R‎
Lines changed: 9 additions & 1 deletion b/‎R/bibentries.R‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎R/tuning_spaces_rtdl.R‎
Lines changed: 167 additions & 0 deletions b/‎R/tuning_spaces_rtdl.R‎
Lines changed: 167 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 21 additions & 18 deletions b/‎README.md‎
Lines changed: 21 additions & 18 deletions
diff --git a/‎man/mlr_tuning_spaces_rtdl.Rd‎
Lines changed: 77 additions & 0 deletions b/‎man/mlr_tuning_spaces_rtdl.Rd‎
Lines changed: 77 additions & 0 deletions
@@ -35,7 +35,9 @@ Suggests:
     ranger (>= 0.12.1),
     rpart (>= 4.1-15),
     testthat (>= 3.0.0),
-    xgboost (>= 1.4.1.1)
+    xgboost (>= 1.4.1.1),
+    torch (>= 0.15.0),
+    mlr3torch (>= 0.3)
 Config/testthat/edition: 3
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
@@ -48,4 +50,5 @@ Collate:
     'tuning_spaces_default.R'
     'tuning_spaces_rbv1.R'
     'tuning_spaces_rbv2.R'
+    'tuning_spaces_rtdl.R'
     'zzz.R'
@@ -1,4 +1,5 @@
 # mlr3tuningspaces (development version)
+* feat: Added tuning spaces for deep neural networks from the Gorishniy, Rubachev, Khrulkov, Babenko (2021) article.
 
 # mlr3tuningspaces 0.6.0
 
 
@@ -169,20 +169,37 @@ add_tuning_space = function(id, values, tags, learner, package = character(), la
   mlr_tuning_spaces$add(id, tuning_space)
 }
 
+# rd_format_misc = function(lower, upper) {
+#   if (is.na(lower) || is.na(upper) || is.null(lower) || is.null(upper)) return("-")
+
+#   str = sprintf("%s%s, %s%s",
+#     if (is.finite(lower)) "[" else "(",
+#     if (is.finite(lower)) c(lower, lower) else c("-\\infty", "-Inf"),
+#     if (is.finite(upper)) c(upper, upper) else c("\\infty", "Inf"),
+#     if (is.finite(upper)) "]" else ")")
+#   paste0("\\eqn{", str[1L], "}{", str[2L], "}")
+# }
+
 #' @export
 rd_info.TuningSpace = function(obj, ...) { # nolint
   require_namespaces(obj$package)
   ps = lrn(obj$learner)$param_set
   x = c("",
     imap_chr(obj$values, function(space, name) {
-      switch(ps$params[name, , on = "id"]$cls,
-        "ParamLgl" = sprintf("* %s \\[%s\\]", name, as_short_string(space$content$levels[[1]])),
-        "ParamFct" = sprintf("* %s \\[%s\\]", name, rd_format_string(space$content$levels[[1]])),
-        {lower = c(space$content$param$lower, space$content$lower) # one is NULL
-        upper = c(space$content$upper, space$content$param$upper)
-        logscale = if (is.null(space$content$logscale) || !space$content$logscale) character(1) else "Logscale"
-        sprintf("* %s %s %s", name, rd_format_range(lower, upper), logscale)}
-      )
+      if (is.atomic(space)) {
+        sprintf("* %s %s", name, space)
+      } else if ("TuneToken" %nin% class(space)) {
+        sprintf("* %s -", name)
+      } else {
+        switch(ps$params[name, , on = "id"]$cls,
+          "ParamLgl" = sprintf("* %s \\[%s\\]", name, as_short_string(space$content$levels[[1]])),
+          "ParamFct" = sprintf("* %s \\[%s\\]", name, rd_format_string(space$content$levels[[1]])),
+          {lower = c(space$content$param$lower, space$content$lower) # one is NULL
+          upper = c(space$content$upper, space$content$param$upper)
+          logscale = if (is.null(space$content$logscale) || !space$content$logscale) character(1) else "Logscale"
+          sprintf("* %s %s %s", name, rd_format_range(lower, upper), logscale)}
+        )
+      }
     })
   )
   paste(x, collapse = "\n")
@@ -199,6 +216,10 @@ as.data.table.TuningSpace = function(x, ...) {
     if (test_class(value, "ObjectTuneToken")) {
       # old paradox: value$content$param
       as.data.table(value$content$param %??% value$content)[, c("lower", "upper", "levels")]
+    } else if (is.atomic(value)) {
+      data.table(lower = NA, upper = NA, levels = NA, logscale = FALSE)
+    } else if (is.function(value)) {
+      data.table(lower = NA, upper = NA, levels = NA, logscale = FALSE)
     } else {
       as.data.table(value$content)
     }
@@ -207,4 +228,4 @@ as.data.table.TuningSpace = function(x, ...) {
   setcolorder(tab, intersect(c("id", "lower", "upper", "levels", "logscale"), names(tab)))
   if ("logscale" %in% names(tab)) tab[is.na(get("logscale")), "logscale" := FALSE]
   tab[]
-}
+}
@@ -31,5 +31,13 @@ bibentries = c(
     booktitle = "Proceedings of the 7th ICML Workshop on Automated Machine Learning (AutoML 2020)",
     date      = "2020",
     url       = "https://www.automl.org/wp-content/uploads/2020/07/AutoML_2020_paper_63.pdf"
+  ),
+
+  gorishniy2021revisiting = bibentry("article",
+    title = "Revisiting Deep Learning  for Tabular Data",
+    author = "Yury Gorishniy and Ivan Rubachev and Valentin Khrulkov and Artem Babenko",
+    journal = "arXiv",
+    volume = "2106.11959",
+    year = "2021",
   )
-)
+)
@@ -0,0 +1,167 @@
+#' @title Deep Learning Tuning Spaces from Yandex's RTDL
+#'
+#' @name mlr_tuning_spaces_rtdl
+#'
+#' @description
+#' Tuning spaces for deep neural network architectures from the `r cite_bib("gorishniy2021revisiting")` article.
+#'
+#' These tuning spaces require optimizers that have a `weight_decay` parameter, such as AdamW or any of the other optimizers built into `mlr3torch`.
+#'
+#' When the article suggests multiple ranges for a given hyperparameter, these tuning spaces choose the widest range.
+#' 
+#' The FT-Transformer tuning space disables weight decay for all bias parameters, matching the implementation provided by the authors in the rtdl-revisiting-models package. 
+#' However, this differs from the experiments described in the article, which states that the
+#' 
+#' For the FT-Transformer, if training is unstable, consider a combination of standardizing features, using an adaptive optimizer (e.g. Adam), reducing the learning rate,
+#' and using a learning rate scheduler.
+#'
+#' @source
+#' `r format_bib("gorishniy2021revisiting")`
+#'
+#' @aliases
+#' mlr_tuning_spaces_classif.mlp.rtdl
+#' mlr_tuning_spaces_classif.tab_resnet.rtdl
+#' mlr_tuning_spaces_classif.ft_transformer.rtdl
+#' mlr_tuning_spaces_regr.mlp.rtdl
+#' mlr_tuning_spaces_regr.tab_resnet.rtdl
+#' mlr_tuning_spaces_regr.ft_transformer.rtdl
+#' 
+#' @section MLP tuning space:
+#' `r rd_info(lts("classif.mlp.rtdl"))`
+#' 
+#' @section Tabular ResNet tuning space:
+#' `r rd_info(lts("classif.tab_resnet.rtdl"))`
+#' 
+#' @section FT-Transformer tuning space:
+#' `r rd_info(lts("classif.ft_transformer.rtdl"))`
+#' 
+#' In the FT-Transformer, the validation-related parameters must still be set manually, via e.g. `lts("regr.ft_transformer.rtdl")$get_learner(validate = 0.2, measures_valid = msr("regr.rmse"))`.
+#'
+#' @include mlr_tuning_spaces.R
+NULL
+
+# mlp
+vals = list(
+  n_layers          = to_tune(1, 16),
+  neurons           = to_tune(levels = 1:1024),
+  p                 = to_tune(0, 0.5),
+  opt.lr            = to_tune(1e-5, 1e-2, logscale = TRUE),
+  opt.weight_decay  = to_tune(1e-6, 1e-3, logscale = TRUE),
+  epochs            = to_tune(lower = 1L, upper = 100L, internal = TRUE),
+  patience          = 17L
+)
+
+add_tuning_space(
+  id = "classif.mlp.rtdl",
+  values = vals,
+  tags = c("gorishniy2021", "classification"),
+  learner = "classif.mlp",
+  package = "mlr3torch",
+  label = "Classification MLP with RTDL"
+)
+
+add_tuning_space(
+  id = "regr.mlp.rtdl",
+  values = vals,
+  tags = c("gorishniy2021", "regression"),
+  learner = "regr.mlp",
+  package = "mlr3torch",
+  label = "Regression MLP with RTDL"
+)
+
+# resnet
+vals = list(
+  n_blocks            = to_tune(1, 16),
+  d_block             = to_tune(64, 1024),
+  d_hidden_multiplier = to_tune(1, 4),
+  dropout1            = to_tune(0, 0.5),
+  dropout2            = to_tune(0, 0.5),
+  opt.lr              = to_tune(1e-5, 1e-2, logscale = TRUE),
+  opt.weight_decay    = to_tune(1e-6, 1e-3, logscale = TRUE),
+  epochs              = to_tune(lower = 1L, upper = 100L, internal = TRUE),
+  patience            = 17L
+)
+
+add_tuning_space(
+  id = "classif.tab_resnet.rtdl",
+  values = vals,
+  tags = c("gorishniy2021", "classification"),
+  learner = "classif.tab_resnet",
+  package = "mlr3torch",
+  label = "Classification Tabular ResNet with RTDL"
+)
+
+add_tuning_space(
+  id = "regr.tab_resnet.rtdl",
+  values = vals,
+  tags = c("gorishniy2021", "regression"),
+  learner = "regr.tab_resnet",
+  package = "mlr3torch",
+  label = "Regression Tabular ResNet with RTDL"
+)
+
+no_wd = function(name) {
+  # this will also disable weight decay for the input projection bias of the attention heads
+  no_wd_params = c("_normalization", "bias")
+
+  return(any(map_lgl(no_wd_params, function(pattern) grepl(pattern, name, fixed = TRUE))))
+}
+
+rtdl_param_groups = function(parameters) {
+  split_param_names = strsplit(names(parameters), ".", fixed = TRUE)
+
+  ffn_norm_idx = grepl("ffn_normalization", names(parameters), fixed = TRUE)
+  first_ffn_norm_num_in_module_list = as.integer(split_param_names[ffn_norm_idx][[1]][2])
+  cls_num_in_module_list = first_ffn_norm_num_in_module_list - 1
+  nums_in_module_list = sapply(split_param_names, function(x) as.integer(x[2]))
+  tokenizer_idx = nums_in_module_list < cls_num_in_module_list
+
+  # the last normalization layer is unnamed, so we need to find it based on its position in the module list
+  last_module_num_in_module_list = as.integer(split_param_names[[length(split_param_names)]][2])
+  last_norm_num_in_module_list = last_module_num_in_module_list - 2
+  last_norm_idx = nums_in_module_list == last_norm_num_in_module_list
+
+  no_wd_idx = map_lgl(names(parameters), no_wd) | tokenizer_idx | last_norm_idx
+  no_wd_group = parameters[no_wd_idx]
+
+  main_group = parameters[!no_wd_idx]
+
+  list(
+    list(params = main_group),
+    list(params = no_wd_group, weight_decay = 0)
+  )
+}
+
+# ft_transformer
+vals = list(
+  n_blocks                = to_tune(1, 6),
+  d_token                 = to_tune(p_int(8L, 64L, trafo = function(x) 8L * x)),
+  attention_n_heads       = 8L,
+  residual_dropout        = to_tune(0, 0.2),
+  attention_dropout       = to_tune(0, 0.5),
+  ffn_dropout             = to_tune(0, 0.5),
+  ffn_d_hidden_multiplier = to_tune(2 / 3, 8 / 3),
+  opt.lr                  = to_tune(1e-5, 1e-4, logscale = TRUE),
+  opt.weight_decay        = to_tune(1e-6, 1e-3, logscale = TRUE),
+  opt.param_groups        = rtdl_param_groups,
+  epochs                  = to_tune(lower = 1L, upper = 100L, internal = TRUE),
+  patience                = 17L
+)
+
+add_tuning_space(
+  id = "classif.ft_transformer.rtdl",
+  values = vals,
+  tags = c("gorishniy2021", "classification"),
+  learner = "classif.ft_transformer",
+  package = "mlr3torch",
+  label = "Classification FT-Transformer with RTDL"
+)
+
+add_tuning_space(
+  id = "regr.ft_transformer.rtdl",
+  values = vals,
+  tags = c("gorishniy2021", "regression"),
+  learner = "regr.ft_transformer",
+  package = "mlr3torch",
+  label = "Regression FT-Transformer with RTDL"
+)
@@ -17,26 +17,29 @@ Status](https://www.r-pkg.org/badges/version-ago/mlr3tuningspaces)](https://cran
 optimization in the [mlr3](https://github.com/mlr-org/mlr3/) ecosystem.
 It features ready-to-use search spaces for many popular machine learning
 algorithms. The search spaces are from scientific articles and work for
-a wide range of data sets. Currently, we offer tuning spaces from three
+a wide range of data sets. Currently, we offer tuning spaces from four
 publications.
 
-| Publication                          | Learner | n Hyperparameter |
-|--------------------------------------|---------|------------------|
-| Bischl et al. (2023)                 | glmnet  | 2                |
-|                                      | ranger  | 4                |
-|                                      | rpart   | 3                |
-|                                      | svm     | 4                |
-|                                      | xgboost | 8                |
-| Kuehn et al. (2018)                  | glmnet  | 2                |
-|                                      | ranger  | 8                |
-|                                      | rpart   | 4                |
-|                                      | svm     | 5                |
-|                                      | xgboost | 13               |
-| Binder, Pfisterer, and Bischl (2020) | glmnet  | 2                |
-|                                      | ranger  | 6                |
-|                                      | rpart   | 4                |
-|                                      | svm     | 4                |
-|                                      | xgboost | 10               |
+| Publication                                        | Learner        | n Hyperparameter |
+|----------------------------------------------------|----------------|-------------------|
+| Bischl et al. (2023)                              | glmnet         | 2                 |
+|                                                   | ranger         | 4                 |
+|                                                   | rpart          | 3                 |
+|                                                   | svm            | 4                 |
+|                                                   | xgboost        | 8                 |
+| Kuehn et al. (2018)                               | glmnet         | 2                 |
+|                                                   | ranger         | 8                 |
+|                                                   | rpart          | 4                 |
+|                                                   | svm            | 5                 |
+|                                                   | xgboost        | 13                |
+| Binder, Pfisterer, and Bischl (2020)              | glmnet         | 2                 |
+|                                                   | ranger         | 6                 |
+|                                                   | rpart          | 4                 |
+|                                                   | svm            | 4                 |
+|                                                   | xgboost        | 10                |
+| Gorishniy, Rubachev, Khrulkov, and Babenko (2021) | mlp            | 7                 |
+|                                                   | tab_resnet     | 9                 |
+|                                                   | ft_transformer | 12                |
 
 ## Resources
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`# mlr3tuningspaces (development version)`
	`2`	`+* feat: Added tuning spaces for deep neural networks from the Gorishniy, Rubachev, Khrulkov, Babenko (2021) article.`
`2`	`3`
`3`	`4`	`# mlr3tuningspaces 0.6.0`
`4`	`5`