##' ## Custom distribution
##' ## make "dEV" and "pEV" from eha package (if installed)
##' ## available to the working environment
##' if (require("eha")) {
##' custom.ev <- list(name="EV",
##' pars=c("shape","scale"),
##' location="scale",
##' transforms=c(log, log),
##' inv.transforms=c(exp, exp),
##' inits=function(t){ c(1, median(t)) })
##' fitev <- flexsurvreg(formula = Surv(futime, fustat) ~ 1, data = ovarian,
##' dist=custom.ev)
##' fitev
##' lines(fitev, col="purple", col.ci="purple")
##' }
##'
##' ## Custom distribution: supply the hazard function only
##' hexp2 <- function(x, rate=1){ rate } # exponential distribution
##' hexp2 <- Vectorize(hexp2)
##' custom.exp2 <- list(name="exp2", pars=c("rate"), location="rate",
##' transforms=c(log), inv.transforms=c(exp),
##' inits=function(t)1/mean(t))
##' flexsurvreg(Surv(futime, fustat) ~ 1, data = ovarian, dist=custom.exp2)
##' flexsurvreg(Surv(futime, fustat) ~ 1, data = ovarian, dist="exp")
##' ## should give same answer
This site is to serve as my note-book and to effectively communicate with my students and collaborators. Every now and then, a blog may be of interest to other researchers or teachers. Views in this blog are my own. All rights of research results and findings on this blog are reserved. See also http://youtube.com/c/hongqin @hongqin
Showing posts with label flexsurv. Show all posts
Showing posts with label flexsurv. Show all posts
Friday, September 22, 2017
Monday, August 18, 2014
LRT test on Gompertz-Makeham model,
# I used single-fitting as initial values for the double-fitting. It worked.
# Hessian does seems to be available when lower bounds are specifized in optim()
#20140818 Nested model test on LS1 RLS using the Gompertz-Makehma model
# Conclusions:
# H0: all I, G, M are the same between control and LS1
# H1: M are different between control and LS1
# p = 1.285958e-05, indicating that H1 is significantly different from the hull hypothesis
# H0 I = 1.000000e-05, G= 0.35, M = 0.021
# H1: I = 5.597089e-07, G = 0.45, M1 = 0.0063, M2 = 0.052
rm(list=ls());
source("~/lib/R/lifespan.r")
require(nlme)
require(survival)
require(xlsx)
require(flexsurv)
tb = read.xlsx( "LS1 MD data.xlsx", 1 );
ctl = tb[,2]
ls1 = tb[,3]
fitCtlGom = flexsurvreg(formula = Surv(ctl) ~ 1, dist="gompertz")
fitCtlGom$res
S = calculate.s(ctl)
s= S$s; t=S$t;
g1 = gnls( s ~ exp( (I/G) *(1 - exp(G* t)) - M*t ), start=list( I=0.003, G=0.16, M=0.01) )
retGM = optim ( g1$coef, llh.GM.single.run, lifespan=ctl,
lower = c(1E-10, 1E-5, 0), upper = c(0.2, 2, 0.1), method="L-BFGS-B");
retG = optim ( c(3.6E-5, 0.3), llh.G.single.run, lifespan=ctl,
lower = c(1E-10, 1E-5), upper = c(0.2, 2 ), method="L-BFGS-B");
fitLS1Gom = flexsurvreg(formula = Surv(ls1) ~ 1, dist="gompertz")
fitLS1Gom$res
S2 = calculate.s(ls1)
s = S2$s; t = S2$t
g2 = gnls( s ~ exp( (I/G) *(1 - exp(G* t)) - M*t ), start=list( I=0.003, G=0.16, M=0.01) )
retGM2 = optim ( g2$coef, llh.GM.single.run, lifespan=ls1, lower = c(1E-10, 1E-5, 0), upper = c(0.2, 2, 0.1), method="L-BFGS-B");
retG2 = optim ( c(0.0166, 0.1), llh.G.single.run, lifespan=ls1,
lower = c(1E-10, 1E-5), upper = c(0.2, 2 ), method="L-BFGS-B");
retGM$par
retGM2$par
initIGM = c(retGM$par, retGM2$par)
#################################
##### LRT to exam whether two data on I,G,M.
# rawIGM =c( I1, G1, M1, I2, G2, M2 )
H0 <- function( rawIGM ) { IG <- c(rawIGM[1], rawIGM[2], rawIGM[3], rawIGM[1], rawIGM[2], rawIGM[3]) } #all the same
H1m <- function( rawIGM ) { IG <- c(rawIGM[1], rawIGM[2], rawIGM[3], rawIGM[1], rawIGM[2], rawIGM[6]) } # M different
Hx.llh.GM2sample.single.run <- function( rawIGM, model, lifespan1, lifespan2 ) {
IGM = model(rawIGM);
I1 = IGM[1]; G1 = IGM[2]; M1=IGM[3]; I2 = IGM[4]; G2 = IGM[5]; M2=IGM[6];
my.lh1 = llh.GM.single.run(IGM[1:3], lifespan1)
my.lh2 = llh.GM.single.run(IGM[4:6], lifespan2)
my.lh = my.lh1 + my.lh2
print (IGM ); #trace the convergence
ret = my.lh
}
rawIGM = c(retGM$par, retGM2$par)
llh.H0 = optim( initIGM, Hx.llh.GM2sample.single.run, model=H0, lifespan1=ctl, lifespan2=ls1,
method="L-BFGS-B", lower=c(1E-5,1E-5,1E-5, 1E-5,1E-5,1E-5) );
llh.H1m = optim( initIGM, Hx.llh.GM2sample.single.run, model=H1m, lifespan1=ctl, lifespan2=ls1,
method="L-BFGS-B", lower=c(1E-7,1E-7,1E-7, 1E-7,1E-7,1E-7) );
rbind( llh.H0$par, llh.H1m$par)
deltaLH = llh.H0$value - rbind( llh.H0$value, llh.H1m$value)
1 - pchisq( 2*deltaLH, df =1 );
#################################
###### End of LRT ############
#################################
# Hessian does seems to be available when lower bounds are specifized in optim()
#20140818 Nested model test on LS1 RLS using the Gompertz-Makehma model
# Conclusions:
# H0: all I, G, M are the same between control and LS1
# H1: M are different between control and LS1
# p = 1.285958e-05, indicating that H1 is significantly different from the hull hypothesis
# H0 I = 1.000000e-05, G= 0.35, M = 0.021
# H1: I = 5.597089e-07, G = 0.45, M1 = 0.0063, M2 = 0.052
rm(list=ls());
source("~/lib/R/lifespan.r")
require(nlme)
require(survival)
require(xlsx)
require(flexsurv)
tb = read.xlsx( "LS1 MD data.xlsx", 1 );
ctl = tb[,2]
ls1 = tb[,3]
fitCtlGom = flexsurvreg(formula = Surv(ctl) ~ 1, dist="gompertz")
fitCtlGom$res
S = calculate.s(ctl)
s= S$s; t=S$t;
g1 = gnls( s ~ exp( (I/G) *(1 - exp(G* t)) - M*t ), start=list( I=0.003, G=0.16, M=0.01) )
retGM = optim ( g1$coef, llh.GM.single.run, lifespan=ctl,
lower = c(1E-10, 1E-5, 0), upper = c(0.2, 2, 0.1), method="L-BFGS-B");
retG = optim ( c(3.6E-5, 0.3), llh.G.single.run, lifespan=ctl,
lower = c(1E-10, 1E-5), upper = c(0.2, 2 ), method="L-BFGS-B");
fitLS1Gom = flexsurvreg(formula = Surv(ls1) ~ 1, dist="gompertz")
fitLS1Gom$res
S2 = calculate.s(ls1)
s = S2$s; t = S2$t
g2 = gnls( s ~ exp( (I/G) *(1 - exp(G* t)) - M*t ), start=list( I=0.003, G=0.16, M=0.01) )
retGM2 = optim ( g2$coef, llh.GM.single.run, lifespan=ls1, lower = c(1E-10, 1E-5, 0), upper = c(0.2, 2, 0.1), method="L-BFGS-B");
retG2 = optim ( c(0.0166, 0.1), llh.G.single.run, lifespan=ls1,
lower = c(1E-10, 1E-5), upper = c(0.2, 2 ), method="L-BFGS-B");
retGM$par
retGM2$par
initIGM = c(retGM$par, retGM2$par)
#################################
##### LRT to exam whether two data on I,G,M.
# rawIGM =c( I1, G1, M1, I2, G2, M2 )
H0 <- function( rawIGM ) { IG <- c(rawIGM[1], rawIGM[2], rawIGM[3], rawIGM[1], rawIGM[2], rawIGM[3]) } #all the same
H1m <- function( rawIGM ) { IG <- c(rawIGM[1], rawIGM[2], rawIGM[3], rawIGM[1], rawIGM[2], rawIGM[6]) } # M different
Hx.llh.GM2sample.single.run <- function( rawIGM, model, lifespan1, lifespan2 ) {
IGM = model(rawIGM);
I1 = IGM[1]; G1 = IGM[2]; M1=IGM[3]; I2 = IGM[4]; G2 = IGM[5]; M2=IGM[6];
my.lh1 = llh.GM.single.run(IGM[1:3], lifespan1)
my.lh2 = llh.GM.single.run(IGM[4:6], lifespan2)
my.lh = my.lh1 + my.lh2
print (IGM ); #trace the convergence
ret = my.lh
}
rawIGM = c(retGM$par, retGM2$par)
llh.H0 = optim( initIGM, Hx.llh.GM2sample.single.run, model=H0, lifespan1=ctl, lifespan2=ls1,
method="L-BFGS-B", lower=c(1E-5,1E-5,1E-5, 1E-5,1E-5,1E-5) );
llh.H1m = optim( initIGM, Hx.llh.GM2sample.single.run, model=H1m, lifespan1=ctl, lifespan2=ls1,
method="L-BFGS-B", lower=c(1E-7,1E-7,1E-7, 1E-7,1E-7,1E-7) );
rbind( llh.H0$par, llh.H1m$par)
deltaLH = llh.H0$value - rbind( llh.H0$value, llh.H1m$value)
1 - pchisq( 2*deltaLH, df =1 );
#################################
###### End of LRT ############
#################################
Custom distribution for flexsurv(), example
data(ovarian)
## Compare generalized gamma fit with Weibull
fitg <- flexsurvreg(formula = Surv(futime, fustat) ~ 1, data = ovarian, dist="gengamma")
fitg
fitw <- flexsurvreg(formula = Surv(futime, fustat) ~ 1, data = ovarian, dist="weibull")
fitw
plot(fitg)
lines(fitw, col="blue", lwd.ci=1, lty.ci=1)
## Identical AIC, probably not enough data in this simple example for a
## very flexible model to be worthwhile.
## Custom distribution
library(eha) ## make "dllogis" and "pllogis" available to the working environment
custom.llogis <- list(name="llogis",
pars=c("shape","scale"),
location="scale",
transforms=c(log, log),
inv.transforms=c(exp, exp),
inits=function(t){ c(1, median(t)) })
fitl <- flexsurvreg(formula = Surv(futime, fustat) ~ 1, data = ovarian, dist=custom.llogis)
fitl
lines(fitl, col.fit="purple", col.ci="purple")
Saturday, July 12, 2014
flexsurv usage and results
fitGomp = flexsurvreg( formula=Surv(tb$rls) ~ 1, dist='gompertz')
> fitGomp$res
est L95% U95%
shape 0.074024323 0.065724069 0.08232458
fitGomp$coeff['rate'] has to be transformed by exp() function.

Friday, July 11, 2014
Compare binomial, gompertz, and weibull model, fitting with simulated binomial-aging-model lifespan
# ~/0.network.aging.prj/0a.network.fitting/_binomial_aging20140711.r
#test binomial aging model
rm(list=ls());
source("lifespan.20131221.r")
require(flexsurv)
##### log likelihood function, 3-parameter binomial mortality rate model
# http://hongqinlab.blogspot.com/2013/12/binomial-mortailty-model.html
# m = R ( 1 + t/t0)^(n-1)
# s = exp( (R t0/n)*(1 - (1+t/t0)^n ) )
llh.binomialMortality.single.run <- function( Rt0n, lifespan ) {
I = Rt0n[1]; t0 = Rt0n[2]; n=Rt0n[3];
my.data = lifespan[!is.na(lifespan)];
log_s = (I * t0 /n )*(1 - (1 + my.data/t0)^n);
log_m = log(I) + (n-1) * log(1 + my.data/t0 );
my.lh = sum(log_s) + sum(log_m);
print (Rt0n ); #trace the convergence
ret = - my.lh # because optim seems to maximize
}
#random number from binomial-aging model
rbinomial.aging <- function ( Rt0n, n){
x.uniform = runif(n)
inverse.binomialaging.CDF = function(R,t0,n,y) {t0*((1- n*log(y)/(R*t0))^(1/n) -1) }
x.binomialaging = inverse.binomialaging.CDF(Rt0n[1],Rt0n[2], Rt0n[3], x.uniform)
return(x.binomialaging)
}
set.seed(20140711)
x = rbinomial.aging( c(0.001,10, 5), 1000)
fitBinom = optim ( c(0.001,10, 3), llh.binomialMortality.single.run , lifespan=x,
method="L-BFGS-B", lower=c(1E-10,0.05,1E-1), upper=c(10,100,50) );
fitGomp = optim ( c(0.005, 0.08), llh.G.single.run , lifespan=x,
method="L-BFGS-B", lower=c(1E-10,1E-2), upper=c(10,10) );
fitWeib2 = flexsurvreg(formula = Surv(x) ~ 1, dist="weibull")
fitGomp2 = flexsurvreg(formula = Surv(x) ~ 1, dist="gompertz")
fitGomp2$coeff
c(-fitGomp$value, fitGomp2$loglik, fitWeib2$loglik, -fitBinom$value)
Flexsurv can over-fit the Gompertz with negative values, and Weibull gave the best value!!!
Even though the random number are generated by binomial aging model, its fitting gave the lowest likelihood?!
TODO: I should to provide a gradient function for optim(method='L-BFGS-B')
Or try optimx()
For binomial-aging, $n$ is under-estimated by optim(), but $m0$ is over-estimated. When the entire data is fitted with Gompertz model, $G$ is too low. How about fitting within initial stage?
set.seed(20140711)
x = round( rbinomial.aging( c(0.001,10, 4), 5000), digits=1 )#t0=10, n=4
tb = calculate.mortality.rate(x)
plot( tb$s ~ tb$t)
plot( tb$mortality.rate ~ tb$t )
plot( tb$mortality.rate ~ tb$t, log='y', main='linear for Gompertz' )
plot( tb$mortality.rate ~ tb$t, log='yx', main='linear for Weibull' )
set.seed(20140711)
x = rbinomial.aging( c(0.01,25, 4), 100) #large m0, t0=25, n=4
x = round( x + 0.5, digits=0)
fitBinom = optim ( c(0.01,25, 3.5), llh.binomialMortality.single.run , lifespan=x,
method="L-BFGS-B", lower=c(1E-10,0.05,1E-1), upper=c(10,100,50) );
fitGomp = optim ( c(0.005, 0.08), llh.G.single.run , lifespan=x,
method="L-BFGS-B", lower=c(1E-10,1E-2), upper=c(10,10) );
fitWeib2 = flexsurvreg(formula = Surv(x) ~ 1, dist="weibull")
fitGomp2 = flexsurvreg(formula = Surv(x) ~ 1, dist="gompertz")
fitGomp2$coeff
c(-fitGomp$value, fitGomp2$loglik, fitWeib2$loglik, -fitBinom$value)
Good, binomial aging model gives the best likelihood.
Plot confirmed that Gompertz looks better than Weibull.
Labels:
aging,
Binomial Aging,
flexsurv,
Gompertz,
network aging,
optim(),
R,
star,
Weibull
Saturday, December 21, 2013
Use simulated Gompertz random number to test flexsurv Gompertz and Weibull fitting results
Summary: I experimented the sample size. For 50 cells, the Gompertz model may be the better fit 4 out of 5 times. For 100 cells, Gompertz is better than Weibull maybe 9 out of 10 times. For 500 cells, Gompertz is always bettern than the Weibull model.
Conclusion: For most yeast life span assay, it is not always easy to tell whether Gompertz or Weibull is a better fit.
Comments: The actual results probably also depend on R and G, which determine the variance.
code: 20131221.gompertz.simulation.R
# generate gompertz random numbers
# fit with flexsurv Gompertz and weibull models
#inverse of gompertz CDF
# see http://hongqinlab.blogspot.com/2013/06/median-lifespan-of-2-parameter-gompertz.html
inverse.gomp.CDF = function(R,G,y) { (1/G)*log(1 - (G/R)*log(1-y) ) }
#see generate random number with a given distribution
# http://hongqinlab.blogspot.com/2013/12/generate-gompertz-random-numbers.html
x.uniform = runif(60)
hist(x.uniform)
x.gompertz = inverse.gomp.CDF(0.001,0.2, x.uniform)
hist(x.gompertz)
summary(x.gompertz)
source("lifespan.r")
tb = calculate.s(x.gompertz)
plot(tb$s ~ tb$t)
require(flexsurv)
require(flexsurv)
lifespan = x.gompertz
lifespanGomp = flexsurvreg(formula = Surv(lifespan) ~ 1, dist = 'gompertz')
lifespanWeib = flexsurvreg(formula = Surv(lifespan) ~ 1, dist = 'weibull')
c(lifespanWeib$AIC, lifespanGomp$AIC, lifespanWeib$AIC - lifespanGomp$AIC )
Conclusion: For most yeast life span assay, it is not always easy to tell whether Gompertz or Weibull is a better fit.
Comments: The actual results probably also depend on R and G, which determine the variance.
code: 20131221.gompertz.simulation.R
# generate gompertz random numbers
# fit with flexsurv Gompertz and weibull models
#inverse of gompertz CDF
# see http://hongqinlab.blogspot.com/2013/06/median-lifespan-of-2-parameter-gompertz.html
inverse.gomp.CDF = function(R,G,y) { (1/G)*log(1 - (G/R)*log(1-y) ) }
#see generate random number with a given distribution
# http://hongqinlab.blogspot.com/2013/12/generate-gompertz-random-numbers.html
x.uniform = runif(60)
hist(x.uniform)
x.gompertz = inverse.gomp.CDF(0.001,0.2, x.uniform)
hist(x.gompertz)
summary(x.gompertz)
source("lifespan.r")
tb = calculate.s(x.gompertz)
plot(tb$s ~ tb$t)
require(flexsurv)
require(flexsurv)
lifespan = x.gompertz
lifespanGomp = flexsurvreg(formula = Surv(lifespan) ~ 1, dist = 'gompertz')
lifespanWeib = flexsurvreg(formula = Surv(lifespan) ~ 1, dist = 'weibull')
c(lifespanWeib$AIC, lifespanGomp$AIC, lifespanWeib$AIC - lifespanGomp$AIC )
Wednesday, October 16, 2013
flexsurvreg() in package flexsurv
Notes on 20140818: I did not see the specification of gradient function in the call of optim()?!
# package(flexsurv)
> flexsurvreg
function (formula, data, dist, inits, fixedpars = NULL, cl = 0.95,
...)
{
call <- match.call() #a useful base function
indx <- match(c("formula", "data"), names(call), nomatch = 0)
if (indx[1] == 0)
stop("A \"formula\" argument is required")
temp <- call[c(1, indx)]
temp[[1]] <- as.name("model.frame")
m <- eval(temp, parent.frame())
Y <- model.extract(m, "response")
if (!inherits(Y, "Surv"))
stop("Response must be a survival object")
Terms <- attr(m, "terms")
X <- model.matrix(Terms, m)
dat <- list(Y = Y, X = X[, -1, drop = FALSE], Xraw = m[,
-1, drop = FALSE])
X <- dat$X
if (missing(dist))
stop("Distribution \"dist\" not specified")
if (is.character(dist)) {
match.arg(dist, names(flexsurv.dists))
dlist <- flexsurv.dists[[dist]]
dlist$name <- dist
}
else if (is.list(dist)) {
dlist <- check.dlist(dist)
}
else stop("\"dist\" should be a string for a built-in distribution, or a list for a custom distribution")
parnames <- dlist$pars
ncovs <- ncol(X)
nbpars <- length(parnames)
npars <- nbpars + ncovs
if (!missing(inits) && (!is.numeric(inits) || (length(inits) !=
npars)))
stop("inits must be a numeric vector of length ", npars)
if (missing(inits) || any(is.na(inits)))
default.inits <- c(dlist$inits(Y[, "time"]), rep(0, ncovs))
if (missing(inits))
inits <- default.inits
else if (any(is.na(inits)))
inits[is.na(inits)] <- default.inits[is.na(inits)]
for (i in 1:nbpars) inits[i] <- dlist$transforms[[i]](inits[i])
outofrange <- which(is.nan(inits) | is.infinite(inits))
if (any(outofrange)) {
plural <- if (length(outofrange) > 1)
"s"
else ""
stop("Initial value", plural, " for parameter", plural,
" ", paste(outofrange, collapse = ","), " out of range")
}
cnames <- if (ncovs == 0)
NULL
else colnames(X)
names(inits) <- c(parnames, cnames)
if (!is.null(fixedpars) && !is.logical(fixedpars) && (!is.numeric(fixedpars) ||
any(!(fixedpars %in% 1:npars)))) {
dots <- if (npars > 2)
"...,"
else ""
stop("fixedpars must be TRUE/FALSE or a vector of indices in 1,",
dots, npars)
}
if ((is.logical(fixedpars) && fixedpars == TRUE) || (is.numeric(fixedpars) &&
all(fixedpars == 1:npars))) {
minusloglik <- minusloglik.flexsurv(inits, t = Y[, "time"],
dead = Y[, "status"], X = X, dlist = dlist, inits = inits)
for (i in 1:nbpars) inits[i] <- dlist$inv.transforms[[i]](inits[i])
res <- matrix(inits, ncol = 1)
dimnames(res) <- list(names(inits), "est")
ret <- list(call = call, dlist = dlist, res = res, npars = 0,
loglik = -minusloglik, AIC = 2 * minusloglik, data = dat,
datameans = colMeans(dat$X), N = nrow(dat$Y), events = sum(dat$Y[,
"status"]), trisk = sum(dat$Y[, "time"]))
}
else {
optpars <- inits[setdiff(1:npars, fixedpars)] #nice trick to optimize a subset of pars
opt <- optim(optpars, minusloglik.flexsurv, t = Y[, "time"],
dead = Y[, "status"], X = X, dlist = dlist, inits = inits,
fixedpars = fixedpars, hessian = TRUE, ...) #fixedpars is a useful optim feature
est <- opt$par
if (all(!is.na(opt$hessian)) && all(!is.nan(opt$hessian)) &&
all(is.finite(opt$hessian)) && all(eigen(opt$hessian)$values >
0)) {
cov <- solve(opt$hessian)
se <- sqrt(diag(cov)) #qin, standard errors, see wiki entry
if (!is.numeric(cl) || length(cl) > 1 || !(cl > 0) ||
!(cl < 1))
stop("cl must be a number in (0,1)")
lcl <- est - qnorm(1 - (1 - cl)/2) * se
ucl <- est + qnorm(1 - (1 - cl)/2) * se
}
else {
warning("Could not calculate asymptotic standard errors - Hessian is not positive definite. Optimisation has probably not converged to the maximum likelihood")
lcl <- ucl <- NA
}
res <- cbind(est = inits, lcl = NA, ucl = NA)
res[setdiff(1:npars, fixedpars), ] <- cbind(est, lcl,
ucl)
colnames(res) <- c("est", paste(c("L", "U"), round(cl *
100), "%", sep = ""))
for (i in 1:nbpars) res[i, ] <- dlist$inv.transforms[[i]](res[i,
])
ret <- list(call = match.call(), dlist = dlist, res = res,
npars = length(est), loglik = -opt$value, AIC = 2 *
opt$value + 2 * length(est), cl = cl, opt = opt,
data = dat, datameans = colMeans(dat$X), N = nrow(dat$Y),
events = sum(dat$Y[, "status"]), trisk = sum(dat$Y[,
"time"]))
}
class(ret) <- "flexsurvreg"
ret
}
# package(flexsurv)
> flexsurvreg
function (formula, data, dist, inits, fixedpars = NULL, cl = 0.95,
...)
{
call <- match.call() #a useful base function
indx <- match(c("formula", "data"), names(call), nomatch = 0)
if (indx[1] == 0)
stop("A \"formula\" argument is required")
temp <- call[c(1, indx)]
temp[[1]] <- as.name("model.frame")
m <- eval(temp, parent.frame())
Y <- model.extract(m, "response")
if (!inherits(Y, "Surv"))
stop("Response must be a survival object")
Terms <- attr(m, "terms")
X <- model.matrix(Terms, m)
dat <- list(Y = Y, X = X[, -1, drop = FALSE], Xraw = m[,
-1, drop = FALSE])
X <- dat$X
if (missing(dist))
stop("Distribution \"dist\" not specified")
if (is.character(dist)) {
match.arg(dist, names(flexsurv.dists))
dlist <- flexsurv.dists[[dist]]
dlist$name <- dist
}
else if (is.list(dist)) {
dlist <- check.dlist(dist)
}
else stop("\"dist\" should be a string for a built-in distribution, or a list for a custom distribution")
parnames <- dlist$pars
ncovs <- ncol(X)
nbpars <- length(parnames)
npars <- nbpars + ncovs
if (!missing(inits) && (!is.numeric(inits) || (length(inits) !=
npars)))
stop("inits must be a numeric vector of length ", npars)
if (missing(inits) || any(is.na(inits)))
default.inits <- c(dlist$inits(Y[, "time"]), rep(0, ncovs))
if (missing(inits))
inits <- default.inits
else if (any(is.na(inits)))
inits[is.na(inits)] <- default.inits[is.na(inits)]
for (i in 1:nbpars) inits[i] <- dlist$transforms[[i]](inits[i])
outofrange <- which(is.nan(inits) | is.infinite(inits))
if (any(outofrange)) {
plural <- if (length(outofrange) > 1)
"s"
else ""
stop("Initial value", plural, " for parameter", plural,
" ", paste(outofrange, collapse = ","), " out of range")
}
cnames <- if (ncovs == 0)
NULL
else colnames(X)
names(inits) <- c(parnames, cnames)
if (!is.null(fixedpars) && !is.logical(fixedpars) && (!is.numeric(fixedpars) ||
any(!(fixedpars %in% 1:npars)))) {
dots <- if (npars > 2)
"...,"
else ""
stop("fixedpars must be TRUE/FALSE or a vector of indices in 1,",
dots, npars)
}
if ((is.logical(fixedpars) && fixedpars == TRUE) || (is.numeric(fixedpars) &&
all(fixedpars == 1:npars))) {
minusloglik <- minusloglik.flexsurv(inits, t = Y[, "time"],
dead = Y[, "status"], X = X, dlist = dlist, inits = inits)
for (i in 1:nbpars) inits[i] <- dlist$inv.transforms[[i]](inits[i])
res <- matrix(inits, ncol = 1)
dimnames(res) <- list(names(inits), "est")
ret <- list(call = call, dlist = dlist, res = res, npars = 0,
loglik = -minusloglik, AIC = 2 * minusloglik, data = dat,
datameans = colMeans(dat$X), N = nrow(dat$Y), events = sum(dat$Y[,
"status"]), trisk = sum(dat$Y[, "time"]))
}
else {
optpars <- inits[setdiff(1:npars, fixedpars)] #nice trick to optimize a subset of pars
opt <- optim(optpars, minusloglik.flexsurv, t = Y[, "time"],
dead = Y[, "status"], X = X, dlist = dlist, inits = inits,
fixedpars = fixedpars, hessian = TRUE, ...) #fixedpars is a useful optim feature
est <- opt$par
if (all(!is.na(opt$hessian)) && all(!is.nan(opt$hessian)) &&
all(is.finite(opt$hessian)) && all(eigen(opt$hessian)$values >
0)) {
cov <- solve(opt$hessian)
se <- sqrt(diag(cov)) #qin, standard errors, see wiki entry
if (!is.numeric(cl) || length(cl) > 1 || !(cl > 0) ||
!(cl < 1))
stop("cl must be a number in (0,1)")
lcl <- est - qnorm(1 - (1 - cl)/2) * se
ucl <- est + qnorm(1 - (1 - cl)/2) * se
}
else {
warning("Could not calculate asymptotic standard errors - Hessian is not positive definite. Optimisation has probably not converged to the maximum likelihood")
lcl <- ucl <- NA
}
res <- cbind(est = inits, lcl = NA, ucl = NA)
res[setdiff(1:npars, fixedpars), ] <- cbind(est, lcl,
ucl)
colnames(res) <- c("est", paste(c("L", "U"), round(cl *
100), "%", sep = ""))
for (i in 1:nbpars) res[i, ] <- dlist$inv.transforms[[i]](res[i,
])
ret <- list(call = match.call(), dlist = dlist, res = res,
npars = length(est), loglik = -opt$value, AIC = 2 *
opt$value + 2 * length(est), cl = cl, opt = opt,
data = dat, datameans = colMeans(dat$X), N = nrow(dat$Y),
events = sum(dat$Y[, "status"]), trisk = sum(dat$Y[,
"time"]))
}
class(ret) <- "flexsurvreg"
ret
}
Tuesday, June 25, 2013
*** Two-parameter Gompertz model
Median lifespan of the 2-parameter
Survivorship
The Gompertz model in 'flexsurv' is
Notice thatpdf = mortality rate * (1-s)
pdf = mortality rate * s (based on mortality definition)
m = - 1/s ds/dt , so - ds/dt = pdf = m * s
So, flexsurv-shape a ==G, flexsurv rate b = R.
Inverse of Gompertz CDF:
The Gompertz model in 'flexsurv' is
Notice that
pdf = mortality rate * s (based on mortality definition)
m = - 1/s ds/dt , so - ds/dt = pdf = m * s
So, flexsurv-shape a ==G, flexsurv rate b = R.
Inverse of Gompertz CDF:
Subscribe to:
Posts (Atom)