This vignette illustrates, through four examples, the potential uses of the R-package \(\texttt{AalenJohansen}\), which is an implementation of the conditional Nelson–Aalen and Aalen–Johansen estimators introduced in Bladt & Furrer (2023).
We start out with a simple time-inhomogeneous Markov model: \[\begin{align*} \frac{\mathrm{d}\Lambda(t)}{\mathrm{d}t}=\lambda(t)=\frac{1}{1+\frac{1}{2}t} \begin{pmatrix} -2 & 1& 1 \\ 2 & -3 & 1 \\ 0 & 0 & 0 \end{pmatrix}\!. \end{align*}\]
library(AalenJohansen)
set.seed(2)
<- function(i, t, u){
jump_rate if(i == 1){
2 / (1+1/2*t)
else if(i == 2){
} 3 / (1+1/2*t)
else{
} 0
}
}
<- function(i, s, v){
mark_dist if(i == 1){
c(0, 1/2, 1/2)
else if(i == 2){
} c(2/3, 0, 1/3)
else{
} 0
}
}
<- function(t){
lambda <- matrix(c(2/(1+1/2*t)*mark_dist(1, t, 0), 3/(1+1/2*t)*mark_dist(2, t, 0), rep(0, 3)),
A nrow = 3, ncol = 3, byrow = TRUE)
diag(A) <- -rowSums(A)
A }
We simulate \(1,000\) independent and identically distributed realizations subject to independent right-censoring. Right-censoring follows the distribution \(\text{Unif}(0,5)\).
<- 1000
n
<- runif(n, 0, 5)
c
<- list()
sim for(i in 1:n){
<- sim_path(sample(1:2, 1), rates = jump_rate, dists = mark_dist,
sim[[i]] tn = c[i], bs = c(2*c[i], 3*c[i], 0))
}
The degree of censoring is
sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n
#> [1] 0.309
We fit the classic Nelson–Aalen and Aalen-Johansen estimators.
<- aalen_johansen(sim) fit
For illustrative purposes, we plot \(\Lambda_{21}\) and the state occupation probability \(p_2\) for both the true model (full) and using the classic estimators (dashed).
<- unlist(lapply(fit$Lambda, FUN = function(L) L[2,1]))
v1 <- fit$t
v0 <- unlist(lapply(fit$p, FUN = function(L) L[2]))
p <- unlist(lapply(prodint(0, 5, 0.01, lambda), FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))
P
par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))
plot(v0, v1, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard")
lines(v0, 4*log(1+1/2*v0))
plot(v0, p, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability")
lines(seq(0, 5, 0.01), P)
We now consider a simple extension with covariates: \[\begin{align*} \frac{\mathrm{d}\Lambda(t|x)}{\mathrm{d}t}=\lambda(t|x)=\frac{1}{1+x\cdot t} \begin{pmatrix} -2 & 1& 1 \\ 2 & -3 & 1 \\ 0 & 0 & 0 \end{pmatrix}\!. \end{align*}\]
We simulate \(10,000\) independent realizations subject to independent right-censoring. Right-censoring follows the distribution \(\text{Unif}(0,5)\), while \(X\sim\text{Unif}(0,1)\).
<- function(i, t, u){
jump_rate if(i == 1){
2
else if(i == 2){
} 3
else{
} 0
}
}
<- function(i, s, v){
mark_dist if(i == 1){
c(0, 1/2, 1/2)
else if(i == 2){
} c(2/3, 0, 1/3)
else{
} 0
}
}
<- function(t, x){
lambda <- matrix(c(2/(1+x*t)*mark_dist(1, t, 0), 3/(1+x*t)*mark_dist(2, t, 0), rep(0, 3)),
A nrow = 3, ncol = 3, byrow = TRUE)
diag(A) <- -rowSums(A)
A
}
<- 10000
n
<- runif(n)
X <- runif(n, 0, 5)
c
<- list()
sim for(i in 1:n){
<- function(j, y, z){jump_rate(j, y, z)/(1+X[i]*y)}
rates <- sim_path(sample(1:2, 1), rates = rates, dists = mark_dist,
sim[[i]] tn = c[i], bs = c(2*c[i], 3*c[i], 0))
$X <- X[i]
sim[[i]] }
The degree of censoring is
sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n
#> [1] 0.2954
We fit the conditional Nelson–Aalen and Aalen–Johansen estimators for \(x=0.2, 0.8\).
<- 0.2
x1 <- 0.8
x2
<- aalen_johansen(sim, x = x1)
fit1 <- aalen_johansen(sim, x = x2) fit2
For illustrative purposes, we plot \(\Lambda_{21}\) and the conditional state occupation probability \(p_2\) for both the true model (full) and using the conditional estimators (dashed). This is done for both \(x=0.2\) (in red) and \(x=0.8\) (in blue).
<- unlist(lapply(fit1$Lambda, FUN = function(L) L[2,1]))
v11 <- fit1$t
v10 <- unlist(lapply(fit2$Lambda, FUN = function(L) L[2,1]))
v21 <- fit2$t
v20 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
p1 <- unlist(lapply(prodint(0, 5, 0.01, function(t){lambda(t, x = x1)}),
P1 FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))
<- unlist(lapply(fit2$p, FUN = function(L) L[2]))
p2 <- unlist(lapply(prodint(0, 5, 0.01, function(t){lambda(t, x = x2)}),
P2 FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))
par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))
plot(v10, v11, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard", col = "red")
lines(v10, 2/x1*log(1+x1*v10), col = "red")
lines(v20, v21, lty = 2, col = "blue")
lines(v20, 2/x2*log(1+x2*v20), col = "blue")
plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red")
lines(seq(0, 5, 0.01), P1, col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(0, 5, 0.01), P2, col = "blue")
We consider the same model as before, but now introduce dependent right-censoring. To be precise, we assume that right-censoring occurs at rate \(\frac{1}{2}\frac{1}{1+\frac{1}{2}t}\) in the first state, while it occurs at twice this rate in the second state. Finally, all remaining individuals are right-censored at time \(t\).
We again simulate \(10,000\) independent realizations.
<- function(i, t, u){
jump_rate_enlarged if(i == 1){
2.5
else if(i == 2){
} 4
else{
} 0
}
}
<- function(i, s, v){
mark_dist_enlarged if(i == 1){
c(0, 2/5, 2/5, 1/5)
else if(i == 2){
} c(2/4, 0, 1/4, 1/4)
else{
} 0
}
}
<- 10000
n
<- runif(n)
X
<- 5
tn <- list()
sim for(i in 1:n){
<- function(j, y, z){jump_rate_enlarged(j, y, z)/(1+X[i]*y)}
rates <- sim_path(sample(1:2, 1), rates = rates, dists = mark_dist_enlarged,
sim[[i]] tn = tn, abs = c(FALSE, FALSE, TRUE, TRUE),
bs = c(2.5*tn, 4*tn, 0, 0))
$X <- X[i]
sim[[i]] }
The degree of censoring is
sum(tn == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))
| 4 == unlist(lapply(sim, FUN = function(z){tail(z$states, 1)}))) / n
#> [1] 0.4373
We fit the conditional Nelson–Aalen and Aalen–Johansen estimators for \(x=0.2, 0.8\).
<- aalen_johansen(sim, x = x1, collapse = TRUE)
fit1 <- aalen_johansen(sim, x = x2, collapse = TRUE) fit2
For illustrative purposes, we plot \(\Lambda_{21}\) and the conditional state occupation probability \(p_2\) for both the true model (full) and using the conditional estimators (dashed). This is done for both \(x=0.2\) (in red) and \(x=0.8\) (in blue).
<- unlist(lapply(fit1$Lambda, FUN = function(L) L[2,1]))
v11 <- fit1$t
v10 <- unlist(lapply(fit2$Lambda, FUN = function(L) L[2,1]))
v21 <- fit2$t
v20 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
p1 <- unlist(lapply(fit2$p, FUN = function(L) L[2]))
p2
par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))
plot(v10, v11, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard", col = "red")
lines(v10, 2/x1*log(1+x1*v10), col = "red")
lines(v20, v21, lty = 2, col = "blue")
lines(v20, 2/x2*log(1+x2*v20), col = "blue")
plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red")
lines(seq(0, 5, 0.01), P1, col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(0, 5, 0.01), P2, col = "blue")
Last but not least, we consider a time-inhomogeneous semi-Markov model with non-zero transition rates given by \[\begin{align*} \lambda_{12}(t, u) &= 0.09 + 0.0018t, \\ \lambda_{13}(t, u) &= 0.01 + 0.0002t, \\ \lambda_{23}(t, u) &= 0.09 + 1(u < 4)0.20 + 0.001t. \end{align*}\]
<- function(i, t, u){
jump_rate if(i == 1){
0.1 + 0.002*t
else if(i == 2){
} ifelse(u < 4, 0.29, 0.09) + 0.001*t
else{
} 0
}
}
<- function(i, s, v){
mark_dist if(i == 1){
c(0, 0.9, 0.1)
else if(i == 2){
} c(0, 0, 1)
else{
} 0
} }
We simulate \(5,000\) independent and identically distributed realizations subject to independent right-censoring. Right-censoring follows the distribution \(\text{Unif}(10,40)\).
<- 5000
n
<- runif(n, 10, 40)
c
<- list()
sim for(i in 1:n){
<- sim_path(1, rates = jump_rate, dists = mark_dist, tn = c[i],
sim[[i]] bs = c(0.1+0.002*c[i], 0.29+0.001*c[i], 0))
}
The degree of censoring is
sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n
#> [1] 0.1758
We fit the Aalen–Johansen estimator.
<- aalen_johansen(sim) fit
For illustrative purposes, we plot the estimate of the state occupation probability \(p_2\) (dashed). The true values (full) are obtained via numerical integration, utilizing that this specific model has a hierarchical structure.
<- fit$t
v0 <- unlist(lapply(fit$p, FUN = function(L) L[2]))
p <- function(t, s){
integrand exp(-0.1*s-0.001*s^2)*(0.09 + 0.0018*s)*exp(-0.20*pmin(t-s, 4)-0.09*(t-s)-0.0005*(t^2-s^2))
}<- Vectorize(function(t){
P integrate(f = integrand, lower = 0, upper = t, t = t)$value
vectorize.args = "t")
}, plot(v0, p, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability")
lines(seq(0, 40, 0.1), P(seq(0, 40, 0.1)))
We now want to estimate the conditional state occupation probability \(p_2\), given sojourn in the second state at time \(10\) with duration \(u=1,5\). For this, we first need to sub-sample the data (landmarking).
<- sim[unlist(lapply(sim, FUN = function(z){any(z$times <= 10
landmark & c(z$times[-1], Inf) > 10
& z$states == 2)}))]
<- lapply(landmark, FUN = function(z){list(times = z$times, states = z$states,
landmark X = 10 - z$times[z$times <= 10
& c(z$times[-1], Inf) > 10
& z$states == 2])})
The degree of sub-sampling is
length(landmark) / n
#> [1] 0.1852
Next, we fit the conditional Aalen–Johansen estimator for \(u=1,5\). We also fit the usual landmark Aalen–Johansen estimator.
<- 1
u1 <- 5
u2
<- aalen_johansen(landmark, x = u1)
fit1 <- aalen_johansen(landmark, x = u2)
fit2 <- aalen_johansen(landmark) fit3
For illustrative purposes, we plot the conditional state occupation probability \(p_2\) using the conditional estimator (dashed), the usual landmark estimator (dotted), and the true model (full). This is done for both \(u=1\) (in red) and \(u=5\) (in blue).
<- fit1$t
v10 <- fit2$t
v20 <- fit3$t
v30 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
p1 <- unlist(lapply(fit2$p, FUN = function(L) L[2]))
p2 <- unlist(lapply(fit3$p, FUN = function(L) L[2]))
p3 <- function(t, u){
P exp(-(t-10)*0.09-(t^2-100)*0.0005-pmax(0, pmin(t, 4-(u-10))-10)*0.20)
}
plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability",
col = "red", xlim = c(10, 40))
lines(seq(10, 40, 0.1), P(seq(10, 40, 0.1), u1), col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(10, 40, 0.1), P(seq(10, 40, 0.1), u2), col = "blue")
lines(v30, p3, lty = 3)