This site is to serve as my note-book and to effectively communicate with my students and collaborators. Every now and then, a blog may be of interest to other researchers or teachers. Views in this blog are my own. All rights of research results and findings on this blog are reserved. See also http://youtube.com/c/hongqin @hongqin
Sunday, May 29, 2016
enet
> enet
function (x, y, lambda = 0, max.steps, normalize = TRUE, intercept = TRUE,
trace = FALSE, eps = .Machine$double.eps)
{
call <- match.call()
nm <- dim(x)
n <- nm[1]
m <- nm[2]
im <- seq(m)
one <- rep(1, n)
vn <- dimnames(x)[[2]]
meanx <- drop(one %*% x)/n
if (intercept == FALSE) {
meanx <- rep(0, m)
}
x <- scale(x, meanx, FALSE)
normx <- sqrt(drop(one %*% (x^2)))
if (normalize == FALSE) {
normx <- rep(1, m)
}
if (any(normx < eps * sqrt(n)))
stop("Some of the columns of x have zero variance")
names(normx) <- NULL
x <- scale(x, FALSE, normx)
mu <- mean(y)
if (intercept == FALSE) {
mu <- 0
}
y <- drop(y - mu)
d1 <- sqrt(lambda)
d2 <- 1/sqrt(1 + lambda)
Cvec <- drop(t(y) %*% x) * d2
ssy <- sum(y^2)
residuals <- c(y, rep(0, m))
if (lambda > 0) {
maxvars <- m
}
if (lambda == 0) {
maxvars <- min(m, n - 1)
}
if (missing(max.steps)) {
max.steps <- 50 * maxvars
}
L1norm <- 0
penalty <- max(abs(Cvec))
beta <- rep(0, m)
betactive <- list(NULL)
first.in <- integer(m)
active <- NULL
Actset <- list(NULL)
df <- 0
if (lambda != 0) {
Cp <- ssy
}
ignores <- NULL
actions <- as.list(seq(max.steps))
drops <- FALSE
Sign <- NULL
R <- NULL
k <- 0
while ((k < max.steps) & (length(active) < maxvars)) {
action <- NULL
k <- k + 1
inactive <- if (k == 1)
im
else im[-c(active, ignores)]
C <- Cvec[inactive]
Cmax <- max(abs(C))
if (!any(drops)) {
new <- abs(C) == Cmax
C <- C[!new]
new <- inactive[new]
for (inew in new) {
R <- updateRR(x[, inew], R, x[, active], lambda)
if (attr(R, "rank") == length(active)) {
nR <- seq(length(active))
R <- R[nR, nR, drop = FALSE]
attr(R, "rank") <- length(active)
ignores <- c(ignores, inew)
action <- c(action, -inew)
if (trace)
cat("LARS-EN Step", k, ":\t Variable", inew,
"\tcollinear; dropped for good\n")
}
else {
if (first.in[inew] == 0)
first.in[inew] <- k
active <- c(active, inew)
Sign <- c(Sign, sign(Cvec[inew]))
action <- c(action, inew)
if (trace)
cat("LARS-EN Step", k, ":\t Variable", inew,
"\tadded\n")
}
}
}
else action <- -dropid
Gi1 <- backsolve(R, backsolvet(R, Sign))
A <- 1/sqrt(sum(Gi1 * Sign))
w <- A * Gi1
u1 <- drop(x[, active, drop = FALSE] %*% w * d2)
u2 <- rep(0, m)
u2[active] <- d1 * d2 * w
u <- c(u1, u2)
if (lambda > 0) {
maxvars <- m - length(ignores)
}
if (lambda == 0) {
maxvars <- min(m - length(ignores), n - 1)
}
if (length(active) >= maxvars) {
gamhat <- Cmax/A
}
else {
a <- (drop(u1 %*% x[, -c(active, ignores)]) + d1 *
u2[-c(active, ignores)]) * d2
gam <- c((Cmax - C)/(A - a), (Cmax + C)/(A + a))
gamhat <- min(gam[gam > eps], Cmax/A)
Cdrop <- c(C - gamhat * a, -C + gamhat * a) - (Cmax -
gamhat * A)
}
dropid <- NULL
b1 <- beta[active]
z1 <- -b1/w
zmin <- min(z1[z1 > eps], gamhat)
if (zmin < gamhat) {
gamhat <- zmin
drops <- z1 == zmin
}
else drops <- FALSE
beta[active] <- beta[active] + gamhat * w
betactive[[k]] <- beta[active]
Actset[[k]] <- active
residuals <- residuals - (gamhat * u)
Cvec <- (drop(t(residuals[1:n]) %*% x) + d1 * residuals[-(1:n)]) *
d2
L1norm <- c(L1norm, sum(abs(beta[active]))/d2)
penalty <- c(penalty, penalty[k] - abs(gamhat * A))
if (any(drops)) {
dropid <- seq(drops)[drops]
for (id in rev(dropid)) {
if (trace)
cat("LARS-EN Step", k, ":\t Variable", active[id],
"\tdropped\n")
R <- downdateR(R, id)
}
dropid <- active[drops]
beta[dropid] <- 0
active <- active[!drops]
Sign <- Sign[!drops]
}
if (!is.null(vn))
names(action) <- vn[abs(action)]
actions[[k]] <- action
}
allset <- Actset[[1]]
for (i in 2:k) {
allset <- union(allset, Actset[[i]])
}
allset <- sort(allset)
max.p <- length(allset)
beta.pure <- matrix(0, k + 1, max.p)
for (i in 2:(k + 1)) {
for (j in 1:length(Actset[[i - 1]])) {
l <- c(1:max.p)[allset == Actset[[i - 1]][j]]
beta.pure[i, l] <- betactive[[i - 1]][j]
}
}
beta.pure <- beta.pure/d2
dimnames(beta.pure) <- list(paste(0:k), vn[allset])
k <- dim(beta.pure)[1]
df <- 1:k
for (i in 1:k) {
a <- drop(beta.pure[i, ])
df[i] <- 1 + length(a[a != 0])
}
residuals <- y - x[, allset, drop = FALSE] %*% t(beta.pure)
beta.pure <- scale(beta.pure, FALSE, normx[allset])
RSS <- apply(residuals^2, 2, sum)
R2 <- 1 - RSS/RSS[1]
Cp <- ((n - m - 1) * RSS)/rev(RSS)[1] - n + 2 * df
object <- list(call = call, actions = actions[seq(k)], allset = allset,
beta.pure = beta.pure, vn = vn, mu = mu, normx = normx[allset],
meanx = meanx[allset], lambda = lambda, L1norm = L1norm,
penalty = penalty * 2/d2, df = df, Cp = Cp, sigma2 = rev(RSS)[1]/(n -
m - 1))
class(object) <- "enet"
object
}
<environment: namespace:elasticnet>
Labels:
R
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment