Showing posts with label gnls. Show all posts
Showing posts with label gnls. Show all posts

Tuesday, December 10, 2013

gnls error, approximate covariance matrix for parameter estimates not of full rank

fm.b0.5 <- gnls( b0.5 ~ modelBlack0.5(v,w) , start = list( v = 0.0075, w=0.5)  );
Error in gnls(b0.5 ~ modelBlack0.5(v, w), start = list(v = 0.0075, w = 0.5)) :
  approximate covariance matrix for parameter estimates not of full rank

Possible problem:
There's a symmetry in the model, probably due to dilution bias. 
I manually corrected symmetry in the data, but still see the same error. 
Further scrutinizing led the discovery of a typo. 
 
Bug:
A typo in assignment that gave 'NA' bot b.05.max.

Ref:
 http://en.wikipedia.org/wiki/Rank_%28linear_algebra%29
https://svn.r-project.org/R-packages/trunk/nlme/R/gnls.R
https://stat.ethz.ch/pipermail/r-help/2003-April/032550.html

Thursday, May 30, 2013

Compare manually estimated Cv, Cb ~ gnls estimations

Mid-point of H2O2-dependent viability can be consistently estimated manually and by gnls. 
These are the fitting results of H2O2-LOH data.


Comparison of manually fitted Cb ~ gnls results. Red line is y=x, blue line is the model of gnls_Cb = a*manualCb + b. 
Gnls seem to give larger Cb than manual estimations. So, I should use manual estimations as initial values for 'gnls' and see whether it helps. Function 'gnls' treats all points equally correct, but I know data points in higher H2O2 concentration are more noisy due to fewer colonies. So, manual fit has some advantage, though 'gnls' can be viewed as kind of 'objective' method (well, if initial values are not considered as human biases).



# I am concerned how initial values can influence Cb estimation.
# The black colonies data are very noisy, so initial values can trapped fitting to different local optima.

rm(list=ls())
setwd("~/github/LOH_H2O2_2012-master/analysis")
files = list.files(path="../", pattern="_*csv")

tb29 = read.csv("../_ctl.tb_out.20130529.csv")
tb30 = read.csv("../_ctl.tb_out.20130530a.csv")
tb30$files = as.character(tb30$files)

head(tb29)
head(tb30)
tb30$change = tb30$Cb / tb29$Cb
summary(tb30)

tb30$color = ifelse(tb30$change>1.5 | tb30$change<0.5, "red", "black"   )
tb30$pch = ifelse(tb30$change>1.5 | tb30$change<0.5, 19, 1   )
plot( tb29$Cb ~ tb30$Cb, xlim=c(0,0.4),pch=tb30$pch, col=tb30$color)

tb30$label = ifelse(tb30$change>2 | tb30$change<0.5, as.character(tb30$files), NA   )
text( tb30$Cb, tb29$Cb, tb30$label)

red= tb30[tb30$color=='red', ]
head(tb30)
tb30$Cv.vs.Cb = tb30$Cv / tb30$Cb

######load the manual fitting results
tbManual = read.csv("H2O2_Log_Plot_Summarized_data,2013May30.csv")
head(tbManual)
names(tbManual)=c("files", "Date", "strains", "Cv", "Cb", "OD600", "note")
tbManual$files = as.character(tbManual$files)

tbManual$Cv.vs.Cb = tbManual$Cv / tbManual$Cb

summary(tbManual$Cv.vs.Cb)
summary(tb30$Cv.vs.Cb)
tbManual[tbManual$Cv.vs.Cb>5, ]
str(tbManual)

### merge
intersect( tbManual$files, tb30$files )
head(tbManual)
head(tb30)

tmp = tbManual[match(tb30$files, tbManual$files), ]
cbind( tmp[,1], tb30[,1]) #visual check, passed. 

tb30M = data.frame( cbind( tb30, tmp[, c(2:9)]) )
head(tb30M)
plot( Cv.vs.Cb ~ Cv.vs.Cb.1, data=tb30M)

plot( Cv ~ Cv.1, data=tb30M, xlab="manual fit Cv", ylab="gnls Cv")

plot( Cb ~ Cb.1, data=tb30M, xlab= "manual Cb", ylab="gnls Cb")
x = seq(0,1, by=0.1); y = x; 
lines( y ~ x, col='red')
m = lm( Cb ~ Cb.1, data=tb30M)
summary(m)
abline(m, col='blue')

Thursday, September 23, 2004

gompertz bootstrap fitting, using gnls in R



#092304Thu
 library(nlme)
 library(survival)

## functions
 gompfit <- function( data ) { #data is a vector
   tmp.fit <- survfit(Surv( data ) )
   s <- c( 1, tmp.fit$surv );
   t <- c( 0, tmp.fit$time );
   fm <- gnls( s ~ exp( (I/G)* ( 1 - exp(G * t) )  ) ,
                  start = list( I = 0.001, G = 0.2 ),
                  );
   I.fit <- fm$coefficients[1];
   G.fit <- fm$coefficients[2];
   c( I.fit, G.fit);
 }

 bootstrap.gompfit <- function ( in.vector ) {
   data <- in.vector[ ! is.na(in.vector) ];
   boot.num <- length(data);
   boot.out <- data.frame(matrix(ncol=3,nrow=boot.num));
   names(boot.out) <- c("I","G","avg.rls");
   for ( i in 1:boot.num ) {
     # data.tmp <- BY4741;
     data.tmp <- sample( data, size=length(data), replace=T);
     gf.out <- gompfit(data.tmp);
     boot.out[i, ] <- c( gf.out, mean(data.tmp) );
   }
   ret = boot.out;
 }

## the main section
 rls <- read.table( "rls.tab", sep="\t", header=T);
 strains <- names(rls);

 #buffers for results
 return.labels <- c("I", "se.I", "G", "se.G", "rls", "se.rls" );
 gomp.out <- data.frame( matrix(nrow=length(names(rls)), ncol=length(return.labels) ) );
 names(gomp.out) <- return.labels;
 row.names(gomp.out) <- names(rls);

 #go over each column anb call bootstrap.gompfit
 group.1 = c(1:20 );
 group.2 = c(21:30 );
 group.3 = c(31:length(strains) );
 
 for ( j in group.1 ) {
   boot.array = bootstrap.gompfit( rls[,j] );
   gomp.out[j,c(1,3,5)] = mean(boot.array);
   gomp.out[j,c(2,4,6)] = sd(boot.array);
 }

 #gomp.out;

 for ( j in group.2 ) {
   boot.array = bootstrap.gompfit( rls[,j] );
   gomp.out[j,c(1,3,5)] = mean(boot.array);
   gomp.out[j,c(2,4,6)] = sd(boot.array);
 }

 
 for ( j in group.3 ) {
   boot.array = bootstrap.gompfit( rls[,j] );
   gomp.out[j,c(1,3,5)] = mean(boot.array);
   gomp.out[j,c(2,4,6)] = sd(boot.array);
 }

 

 write.table(gomp.out, "092204.gomp.bootstrap.merged.lab-strains.csv", row.names=T, quote=T, sep="\t");


#q("no")