% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/breslow.R
\name{breslow}
\alias{breslow}
\title{Survival probabilities using Breslow's estimator}
\usage{
breslow(times, status, lp_train, lp_test, eval_times = NULL, type = "surv")
}
\arguments{
\item{times}{(\code{numeric()})\cr Vector of times (train set).}

\item{status}{(\code{numeric()})\cr Vector of status indicators (train set).
For each observation in the train set, this should be 0 (alive/censored) or
1 (dead).}

\item{lp_train}{(\code{numeric()})\cr Vector of linear predictors (train set).
These are the relative score predictions (\eqn{lp = \hat{\beta}X_{train}})
from a proportional hazards model on the train set.}

\item{lp_test}{(\code{numeric()})\cr Vector of linear predictors (test set).
These are the relative score predictions (\eqn{lp = \hat{\beta}X_{test}})
from a proportional hazards model on the test set.}

\item{eval_times}{(\code{numeric()})\cr Vector of times to compute survival
probabilities. If \code{NULL} (default), the unique and sorted \code{times} from the
train set will be used, otherwise the unique and sorted \code{eval_times}.}

\item{type}{(\code{character()})\cr Type of prediction estimates.
Default is \code{surv} which returns the survival probabilities \eqn{S_i(t)} for
each test observation \eqn{i}. If \code{cumhaz}, the function returns the estimated
cumulative hazards \eqn{H_i(t)}.}
}
\value{
a \code{matrix} (obs x times). Number of columns is equal to \code{eval_times}
and number of rows is equal to the number of test observations (i.e. the
length of the \code{lp_test} vector). Depending on the \code{type} argument, the matrix
can have either survival probabilities (0-1) or cumulative hazard estimates
(0-\code{Inf}).
}
\description{
Helper function to compose a survival distribution (or cumulative hazard)
from the relative risk predictions (linear predictors, \code{lp}) of a
\strong{proportional hazards} model (e.g. a Cox-type model).
}
\details{
We estimate the survival probability of individual \eqn{i} (from the test set),
at time point \eqn{t} as follows:
\deqn{S_i(t) = e^{-H_i(t)} = e^{-\hat{H}_0(t) \times e^{lp_i}}}

where:
\itemize{
\item \eqn{H_i(t)} is the cumulative hazard function for individual \eqn{i}
\item \eqn{\hat{H}_0(t)} is Breslow's estimator for the \strong{cumulative baseline
hazard}. Estimation requires the training set's \code{times} and \code{status} as well
the risk predictions (\code{lp_train}).
\item \eqn{lp_i} is the risk prediction (linear predictor) of individual \eqn{i}
on the test set.
}

Breslow's approach uses a non-parametric maximum likelihood estimation of the
cumulative baseline hazard function:

\deqn{\hat{H}_0(t) = \sum_{i=1}^n{\frac{I(T_i \le t)\delta_i}
{\sum\nolimits_{j \in R_i}e^{lp_j}}}}

where:
\itemize{
\item \eqn{t} is the vector of time points (unique and sorted, from the train set)
\item \eqn{n} is number of events (train set)
\item \eqn{T} is the vector of event times (train set)
\item \eqn{\delta} is the status indicator (1 = event or 0 = censored)
\item \eqn{R_i} is the risk set (number of individuals at risk just before
event \eqn{i})
\item \eqn{lp_j} is the risk prediction (linear predictor) of individual \eqn{j}
(who is part of the risk set \eqn{R_i}) on the train set.
}

We employ \strong{constant interpolation} to estimate the cumulative baseline hazards,
extending from the observed unique event times to the specified evaluation
times (\code{eval_times}).
Any values falling outside the range of the estimated times are assigned as
follows:
\deqn{\hat{H}_0(eval\_times < min(t)) = 0} and
\deqn{\hat{H}_0(eval\_times > max(t)) = \hat{H}_0(max(t))}

Note that in the rare event of \code{lp} predictions being \code{Inf} or \code{-Inf}, the
resulting cumulative hazard values become \code{NaN}, which we substitute with
\code{Inf} (and corresponding survival probabilities take the value of \eqn{0}).

For similar implementations, see \code{gbm::basehaz.gbm()}, \code{C060::basesurv()} and
\code{xgboost.surv::sgb_bhaz()}.
}
\examples{
task = tsk("rats")
part = partition(task, ratio = 0.8)

learner = lrn("surv.coxph")
learner$train(task, part$train)
p_train = learner$predict(task, part$train)
p_test = learner$predict(task, part$test)

surv = breslow(times = task$times(part$train), status = task$status(part$train),
               lp_train = p_train$lp, lp_test = p_test$lp)
head(surv)
}
\references{
Cox DR (1972).
\dQuote{Regression Models and Life-Tables.}
\emph{Journal of the Royal Statistical Society: Series B (Methodological)}, \bold{34}(2), 187--202.
\doi{10.1111/j.2517-6161.1972.tb00899.x}.

Lin, Y. D (2007).
\dQuote{On the Breslow estimator.}
\emph{Lifetime Data Analysis}, \bold{13}(4), 471-480.
\doi{10.1007/s10985-007-9048-y}.
}
