207 lines
6.1 KiB
Python
207 lines
6.1 KiB
Python
import numpy as np
|
|
from statsmodels.duration.hazard_regression import PHReg
|
|
|
|
|
|
def _kernel_cumincidence(time, status, exog, kfunc, freq_weights,
|
|
dimred=True):
|
|
"""
|
|
Calculates cumulative incidence functions using kernels.
|
|
|
|
Parameters
|
|
----------
|
|
time : array_like
|
|
The observed time values
|
|
status : array_like
|
|
The status values. status == 0 indicates censoring,
|
|
status == 1, 2, ... are the events.
|
|
exog : array_like
|
|
Covariates such that censoring becomes independent of
|
|
outcome times conditioned on the covariate values.
|
|
kfunc : function
|
|
A kernel function
|
|
freq_weights : array_like
|
|
Optional frequency weights
|
|
dimred : bool
|
|
If True, proportional hazards regression models are used to
|
|
reduce exog to two columns by predicting overall events and
|
|
censoring in two separate models. If False, exog is used
|
|
directly for calculating kernel weights without dimension
|
|
reduction.
|
|
"""
|
|
|
|
# Reorder so time is ascending
|
|
ii = np.argsort(time)
|
|
time = time[ii]
|
|
status = status[ii]
|
|
exog = exog[ii, :]
|
|
nobs = len(time)
|
|
|
|
# Convert the unique times to ranks (0, 1, 2, ...)
|
|
utime, rtime = np.unique(time, return_inverse=True)
|
|
|
|
# Last index where each unique time occurs.
|
|
ie = np.searchsorted(time, utime, side='right') - 1
|
|
|
|
ngrp = int(status.max())
|
|
|
|
# All-cause status
|
|
statusa = (status >= 1).astype(np.float64)
|
|
|
|
if freq_weights is not None:
|
|
freq_weights = freq_weights / freq_weights.sum()
|
|
|
|
ip = []
|
|
sp = [None] * nobs
|
|
n_risk = [None] * nobs
|
|
kd = [None] * nobs
|
|
for k in range(ngrp):
|
|
status0 = (status == k + 1).astype(np.float64)
|
|
|
|
# Dimension reduction step
|
|
if dimred:
|
|
sfe = PHReg(time, exog, status0).fit()
|
|
fitval_e = sfe.predict().predicted_values
|
|
sfc = PHReg(time, exog, 1 - status0).fit()
|
|
fitval_c = sfc.predict().predicted_values
|
|
exog2d = np.hstack((fitval_e[:, None], fitval_c[:, None]))
|
|
exog2d -= exog2d.mean(0)
|
|
exog2d /= exog2d.std(0)
|
|
else:
|
|
exog2d = exog
|
|
|
|
ip0 = 0
|
|
for i in range(nobs):
|
|
|
|
if k == 0:
|
|
kd1 = exog2d - exog2d[i, :]
|
|
kd1 = kfunc(kd1)
|
|
kd[i] = kd1
|
|
|
|
# Get the local all-causes survival function
|
|
if k == 0:
|
|
denom = np.cumsum(kd[i][::-1])[::-1]
|
|
num = kd[i] * statusa
|
|
rat = num / denom
|
|
tr = 1e-15
|
|
ii = np.flatnonzero((denom < tr) & (num < tr))
|
|
rat[ii] = 0
|
|
ratc = 1 - rat
|
|
ratc = np.clip(ratc, 1e-10, np.inf)
|
|
lrat = np.log(ratc)
|
|
prat = np.cumsum(lrat)[ie]
|
|
sf = np.exp(prat)
|
|
sp[i] = np.r_[1, sf[:-1]]
|
|
n_risk[i] = denom[ie]
|
|
|
|
# Number of cause-specific deaths at each unique time.
|
|
d0 = np.bincount(rtime, weights=status0*kd[i],
|
|
minlength=len(utime))
|
|
|
|
# The cumulative incidence function probabilities. Carry
|
|
# forward once the effective sample size drops below 1.
|
|
ip1 = np.cumsum(sp[i] * d0 / n_risk[i])
|
|
jj = len(ip1) - np.searchsorted(n_risk[i][::-1], 1)
|
|
if jj < len(ip1):
|
|
ip1[jj:] = ip1[jj - 1]
|
|
if freq_weights is None:
|
|
ip0 += ip1
|
|
else:
|
|
ip0 += freq_weights[i] * ip1
|
|
|
|
if freq_weights is None:
|
|
ip0 /= nobs
|
|
|
|
ip.append(ip0)
|
|
|
|
return utime, ip
|
|
|
|
|
|
def _kernel_survfunc(time, status, exog, kfunc, freq_weights):
|
|
"""
|
|
Estimate the marginal survival function under dependent censoring.
|
|
|
|
Parameters
|
|
----------
|
|
time : array_like
|
|
The observed times for each subject
|
|
status : array_like
|
|
The status for each subject (1 indicates event, 0 indicates
|
|
censoring)
|
|
exog : array_like
|
|
Covariates such that censoring is independent conditional on
|
|
exog
|
|
kfunc : function
|
|
Kernel function
|
|
freq_weights : array_like
|
|
Optional frequency weights
|
|
|
|
Returns
|
|
-------
|
|
probs : array_like
|
|
The estimated survival probabilities
|
|
times : array_like
|
|
The times at which the survival probabilities are estimated
|
|
|
|
References
|
|
----------
|
|
Zeng, Donglin 2004. Estimating Marginal Survival Function by
|
|
Adjusting for Dependent Censoring Using Many Covariates. The
|
|
Annals of Statistics 32 (4): 1533 55.
|
|
doi:10.1214/009053604000000508.
|
|
https://arxiv.org/pdf/math/0409180.pdf
|
|
"""
|
|
|
|
# Dimension reduction step
|
|
sfe = PHReg(time, exog, status).fit()
|
|
fitval_e = sfe.predict().predicted_values
|
|
sfc = PHReg(time, exog, 1 - status).fit()
|
|
fitval_c = sfc.predict().predicted_values
|
|
exog2d = np.hstack((fitval_e[:, None], fitval_c[:, None]))
|
|
|
|
n = len(time)
|
|
ixd = np.flatnonzero(status == 1)
|
|
|
|
# For consistency with standard KM, only compute the survival
|
|
# function at the times of observed events.
|
|
utime = np.unique(time[ixd])
|
|
|
|
# Reorder everything so time is ascending
|
|
ii = np.argsort(time)
|
|
time = time[ii]
|
|
status = status[ii]
|
|
exog2d = exog2d[ii, :]
|
|
|
|
# Last index where each evaluation time occurs.
|
|
ie = np.searchsorted(time, utime, side='right') - 1
|
|
|
|
if freq_weights is not None:
|
|
freq_weights = freq_weights / freq_weights.sum()
|
|
|
|
sprob = 0.
|
|
for i in range(n):
|
|
|
|
kd = exog2d - exog2d[i, :]
|
|
kd = kfunc(kd)
|
|
|
|
denom = np.cumsum(kd[::-1])[::-1]
|
|
num = kd * status
|
|
rat = num / denom
|
|
tr = 1e-15
|
|
ii = np.flatnonzero((denom < tr) & (num < tr))
|
|
rat[ii] = 0
|
|
ratc = 1 - rat
|
|
ratc = np.clip(ratc, 1e-12, np.inf)
|
|
lrat = np.log(ratc)
|
|
prat = np.cumsum(lrat)[ie]
|
|
prat = np.exp(prat)
|
|
|
|
if freq_weights is None:
|
|
sprob += prat
|
|
else:
|
|
sprob += prat * freq_weights[i]
|
|
|
|
if freq_weights is None:
|
|
sprob /= n
|
|
|
|
return sprob, utime
|