Sunday, May 19, 2013

R code for nested model test, likelihood ratio test, using AgingData 0517.xls

2013 Nov 6 Note: Permutation based test are probably more 'accurate' at estimating p-values than Chi-square. 


rm( list=ls())
list.files()

file = "AgingData_0517.csv"
tb = read.csv( file );


##### log likelihood function, simple gompertz mortality model
   #s = exp( (I/G) *(1 - exp(G* my.data)) )  ;
   #m = I * exp( G * my.data );  
 llh.gompertz.single.run <- function( IG, lifespan ) {
   #print(IG)
   I = IG[1]; G = IG[2];
   my.data = lifespan[!is.na(lifespan)];
   log_s = (I/G) *(1 - exp(G* my.data))
   if( I< 0 ) { I = 1E-10 }
   log_m = log(I) +  G * my.data ;
   my.lh = sum(log_s)  + sum(log_m);
   print (IG ); #trace the convergence
   ret = - my.lh # because optim seems to maximize
 }

#####

ret1a = optim ( c(0.0034, 0.1), fn=llh.gompertz.single.run, lifespan=tb[,1], lower=c(1E-10, 1E-5), method="L-BFGS-B" );
ret1b = optim ( c(0.05, 0.2), fn=llh.gompertz.single.run, lifespan=tb[,1], lower=c(1E-10, 1E-5), method="L-BFGS-B" );
ret1c = optim ( c(0.039, 0.1), fn=llh.gompertz.single.run, lifespan=tb[,1]  );
ret1d = optim ( c(0.05, 0.15), fn=llh.gompertz.single.run, lifespan=tb[,1]  );
ret1c$par / ret1d$par
ret1e = optim ( c(0.1, 0.1), fn=llh.gompertz.single.run, lifespan=tb[,1]  );
ret1e$par / ret1d$par

#set up a template for fitting parameters
label = c("R","G", "mean", "median", "stddev", "CV", 'p_wilcox', 'p_ks')
allFitPar = data.frame( matrix(NA, nrow=length(label), ncol=length(tb[1,])) )
rownames(allFitPar) = label
names(allFitPar) = names(tb)

 
#loop over
for (col in names(tb)) {
  cur_lifespan = tb[,col]
  retTmp = optim ( c(0.039, 0.1), fn=llh.gompertz.single.run, lifespan=cur_lifespan  );
  allFitPar[1:2, col] = retTmp$par
  allFitPar["mean", col] = mean(cur_lifespan, na.rm=T)
  allFitPar["median", col] = median(cur_lifespan, na.rm=T)
  allFitPar["stddev", col] = sqrt( var(cur_lifespan, na.rm=T))
  allFitPar["CV", col] = allFitPar["stddev", col] / allFitPar["mean", col]
  tmp = wilcox.test( tb[,'wt'], tb[,col] ) 
  allFitPar['p_wilcox', col] = tmp$p.value
  tmp2 = ks.test( tb[,'wt'], tb[,col] ) 
  allFitPar['p_ks', col] = tmp2$p.value
 
}
allFitPar


#################################
##### LRT
### 1) model H0, same G and same I
### 2) model H1i, same G, different I
### 3) model H1g, different G, same I
### 4) model H2, different G, different I
                                  #    I1       G1       I2         G2
 H0  <- function( rawIG ) { IG <- c(rawIG[1], rawIG[2], rawIG[1], rawIG[2] ) }  #all the same
 H1i <- function( rawIG ) { IG <- c(rawIG[1], rawIG[2], rawIG[3], rawIG[2] ) }  #different I
 H1g <- function( rawIG ) { IG <- c(rawIG[1], rawIG[2], rawIG[1], rawIG[4] ) }  # different G
 H2  <- function( rawIG ) { IG <- c(rawIG[1], rawIG[2], rawIG[3], rawIG[4] ) }  # all different

 Hx.llh.gompertz.single.run <- function( rawIG, model, lifespan1, lifespan2 ) {
   IG = model(rawIG);
   I1 = IG[1]; G1 = IG[2]; I2 = IG[3]; G2 = IG[4];
   my.data1 = lifespan1[!is.na(lifespan1)];
   my.data2 = lifespan2[!is.na(lifespan2)];
   log_s1 = (I1/G1) *(1 - exp(G1* my.data1))
   log_s2 = (I2/G2) *(1 - exp(G2* my.data2))
   log_m1 = log(I1) +  G1 * my.data1 ;
   log_m2 = log(I2) +  G2 * my.data2 ;
   my.lh = sum(log_s1) + sum(log_m1) + sum(log_s2) + sum(log_m2)
   print (IG ); #trace the convergence
   ret = - my.lh # because optim seems to maximize
 }

## LRT to exam whether wt, wtCR share the same G, I
llh.H0   = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H0,   lifespan1=tb$wt, lifespan2=tb$wtCR );
llh.H1i  = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H1i,  lifespan1=tb$wt, lifespan2=tb$wtCR );
llh.H1g  = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H1g,  lifespan1=tb$wt, lifespan2=tb$wtCR );
llh.H2  = optim( c(0.01,0.2,0.01,0.1),   Hx.llh.gompertz.single.run, model=H2,  lifespan1=tb$wt, lifespan2=tb$wtCR );

cbind(llh.H0$par, llh.H1i$par, llh.H1g$par, llh.H2$par);

LH = c(llh.H0$value, llh.H1i$value, llh.H1g$value, llh.H2$value);
LH
deltaLH =  - LH + llh.H0$value;
deltaLH
1 - pchisq( 2*deltaLH, df =c(1,1,1,2) );
#p=0.06 not significant for all models.

#################################
###### End of LRT    ############
#################################

## LRT to exam whether ybr053cD, and CR share the same G, I
llh.H0   = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H0,   lifespan1=tb[,3], lifespan2=tb[,4] );
llh.H1i  = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H1i,  lifespan1=tb[,3], lifespan2=tb[,4] );
llh.H1g  = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H1g,  lifespan1=tb[,3], lifespan2=tb[,4] );
llh.H2   = optim( c(0.01,0.2,0.01,0.1),   Hx.llh.gompertz.single.run, model=H2,  lifespan1=tb[,3], lifespan2=tb[,4] );

cbind(llh.H0$par, llh.H1i$par, llh.H1g$par, llh.H2$par);

LH = c(llh.H0$value, llh.H1i$value, llh.H1g$value, llh.H2$value);
LH
deltaLH =  - LH + llh.H0$value;
deltaLH
1 - pchisq( 2*deltaLH, df =c(1,1,1,2) );
#[1] 1.00000000 0.01498437 0.01368829 0.04515191

#################################
###### End of LRT    ############
#################################

## LRT to exam whether ybr053cD, and CR share the same G, I
llh.H0   = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H0,   lifespan1=tb[,5], lifespan2=tb[,6] );
llh.H1i  = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H1i,  lifespan1=tb[,5], lifespan2=tb[,6] );
llh.H1g  = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H1g,  lifespan1=tb[,5], lifespan2=tb[,6] );
llh.H2   = optim( c(0.01,0.2,0.01,0.1),   Hx.llh.gompertz.single.run, model=H2,  lifespan1=tb[,5], lifespan2=tb[,6] );
cbind(llh.H0$par, llh.H1i$par, llh.H1g$par, llh.H2$par);

LH = c(llh.H0$value, llh.H1i$value, llh.H1g$value, llh.H2$value);
LH
deltaLH =  - LH + llh.H0$value;
deltaLH
1 - pchisq( 2*deltaLH, df =c(1,1,1,2) );
#[1] 1.00000000 0.04595566 0.31817030 0.04188332

#################################
###### End of LRT    ############
#################################


## LRT to exam whether ybr053cD, and CR share the same G, I
llh.H0   = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H0,   lifespan1=tb[,7], lifespan2=tb[,8] );
llh.H1i  = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H1i,  lifespan1=tb[,7], lifespan2=tb[,8] );
llh.H1g  = optim( c(0.01,0.2,0.01,0.1),  Hx.llh.gompertz.single.run, model=H1g,  lifespan1=tb[,7], lifespan2=tb[,8] );
llh.H2   = optim( c(0.01,0.2,0.01,0.1),   Hx.llh.gompertz.single.run, model=H2,  lifespan1=tb[,7], lifespan2=tb[,8] );

cbind(llh.H0$par, llh.H1i$par, llh.H1g$par, llh.H2$par);

LH = c(llh.H0$value, llh.H1i$value, llh.H1g$value, llh.H2$value);
LH
deltaLH =  - LH + llh.H0$value;
deltaLH
1 - pchisq( 2*deltaLH, df =c(1,1,1,2) );
#[1] 1.00000000 0.04595566 0.31817030 0.04188332

#################################
###### End of LRT    ############
#################################

No comments:

Post a Comment