Wednesday, October 16, 2013

flexsurvreg() in package flexsurv

Notes on 20140818: I did not see the specification of gradient function in the call of optim()?!


# package(flexsurv)

> flexsurvreg
function (formula, data, dist, inits, fixedpars = NULL, cl = 0.95, 
    ...) 
{
    call <- match.call() #a useful base function
    indx <- match(c("formula", "data"), names(call), nomatch = 0)
    if (indx[1] == 0) 
        stop("A \"formula\" argument is required")
    temp <- call[c(1, indx)]
    temp[[1]] <- as.name("model.frame")
    m <- eval(temp, parent.frame())
    Y <- model.extract(m, "response")
    if (!inherits(Y, "Surv")) 
        stop("Response must be a survival object")
    Terms <- attr(m, "terms")
    X <- model.matrix(Terms, m)
    dat <- list(Y = Y, X = X[, -1, drop = FALSE], Xraw = m[, 
        -1, drop = FALSE])
    X <- dat$X
    if (missing(dist)) 
        stop("Distribution \"dist\" not specified")
    if (is.character(dist)) {
        match.arg(dist, names(flexsurv.dists))
        dlist <- flexsurv.dists[[dist]]
        dlist$name <- dist
    }
    else if (is.list(dist)) {
        dlist <- check.dlist(dist)
    }
    else stop("\"dist\" should be a string for a built-in distribution, or a list for a custom distribution")
    parnames <- dlist$pars
    ncovs <- ncol(X)
    nbpars <- length(parnames)
    npars <- nbpars + ncovs
    if (!missing(inits) && (!is.numeric(inits) || (length(inits) != 
        npars))) 
        stop("inits must be a numeric vector of length ", npars)
    if (missing(inits) || any(is.na(inits))) 
        default.inits <- c(dlist$inits(Y[, "time"]), rep(0, ncovs))
    if (missing(inits)) 
        inits <- default.inits
    else if (any(is.na(inits))) 
        inits[is.na(inits)] <- default.inits[is.na(inits)]
    for (i in 1:nbpars) inits[i] <- dlist$transforms[[i]](inits[i])
    outofrange <- which(is.nan(inits) | is.infinite(inits))
    if (any(outofrange)) {
        plural <- if (length(outofrange) > 1) 
            "s"
        else ""
        stop("Initial value", plural, " for parameter", plural, 
            " ", paste(outofrange, collapse = ","), " out of range")
    }
    cnames <- if (ncovs == 0) 
        NULL
    else colnames(X)
    names(inits) <- c(parnames, cnames)
    if (!is.null(fixedpars) && !is.logical(fixedpars) && (!is.numeric(fixedpars) || 
        any(!(fixedpars %in% 1:npars)))) {
        dots <- if (npars > 2) 
            "...,"
        else ""
        stop("fixedpars must be TRUE/FALSE or a vector of indices in 1,", 
            dots, npars)
    }
    if ((is.logical(fixedpars) && fixedpars == TRUE) || (is.numeric(fixedpars) && 
        all(fixedpars == 1:npars))) {
        minusloglik <- minusloglik.flexsurv(inits, t = Y[, "time"], 
            dead = Y[, "status"], X = X, dlist = dlist, inits = inits)
        for (i in 1:nbpars) inits[i] <- dlist$inv.transforms[[i]](inits[i])
        res <- matrix(inits, ncol = 1)
        dimnames(res) <- list(names(inits), "est")
        ret <- list(call = call, dlist = dlist, res = res, npars = 0, 
            loglik = -minusloglik, AIC = 2 * minusloglik, data = dat, 
            datameans = colMeans(dat$X), N = nrow(dat$Y), events = sum(dat$Y[, 
                "status"]), trisk = sum(dat$Y[, "time"]))
    }
    else {
        optpars <- inits[setdiff(1:npars, fixedpars)]  #nice trick to optimize a subset of pars
        opt <- optim(optpars, minusloglik.flexsurv, t = Y[, "time"], 
            dead = Y[, "status"], X = X, dlist = dlist, inits = inits, 
            fixedpars = fixedpars, hessian = TRUE, ...) #fixedpars is a useful optim feature
        est <- opt$par
        if (all(!is.na(opt$hessian)) && all(!is.nan(opt$hessian)) && 
            all(is.finite(opt$hessian)) && all(eigen(opt$hessian)$values > 
            0)) {
            cov <- solve(opt$hessian)
            se <- sqrt(diag(cov)) #qin, standard errors, see wiki entry 
            if (!is.numeric(cl) || length(cl) > 1 || !(cl > 0) || 
                !(cl < 1)) 
                stop("cl must be a number in (0,1)")
            lcl <- est - qnorm(1 - (1 - cl)/2) * se
            ucl <- est + qnorm(1 - (1 - cl)/2) * se
        }
        else {
            warning("Could not calculate asymptotic standard errors - Hessian is not positive definite. Optimisation has probably not converged to the maximum likelihood")
            lcl <- ucl <- NA
        }
        res <- cbind(est = inits, lcl = NA, ucl = NA)
        res[setdiff(1:npars, fixedpars), ] <- cbind(est, lcl, 
            ucl)
        colnames(res) <- c("est", paste(c("L", "U"), round(cl * 
            100), "%", sep = ""))
        for (i in 1:nbpars) res[i, ] <- dlist$inv.transforms[[i]](res[i, 
            ])
        ret <- list(call = match.call(), dlist = dlist, res = res, 
            npars = length(est), loglik = -opt$value, AIC = 2 * 
                opt$value + 2 * length(est), cl = cl, opt = opt, 
            data = dat, datameans = colMeans(dat$X), N = nrow(dat$Y), 
            events = sum(dat$Y[, "status"]), trisk = sum(dat$Y[, 
                "time"]))
    }
    class(ret) <- "flexsurvreg"
    ret
}

No comments:

Post a Comment