Friday, October 10, 2008

Aligned chromosome plots in R

### 101008 use my annotation, there are errors in Combimatrix's chr annotation files, such as orf19.5320

### 042308 remove hist part. remove x-axis. tick inside

### based 021908-chr-hist.R
### after background correction and non-chr5 nomalization
### histogram of expression ratio along chromosome, then histogram
### modified from 020508 version

rm(list=ls())


 #left ajusted

 MyBox <- function( x0,y0,h,w,col) {
   xin = c( x0, x0,   x0+w, x0+w );
   yin = c( y0, y0+h, y0+h, y0   );
   polygon(xin,yin, col=col, border=NA);
 }

 #centered

 MyBoxC <- function( x0,y0,h,w,col) {
   xin = c( x0-w/2, x0-w/2,   x0+w/2, x0+w/2 );
   yin = c( y0-h/2, y0+h/2,   y0+h/2, y0-h/2   );
   polygon(xin,yin, col=col, border=NA);
 }

#  Exactly form of mulinomial sampling 

# vector of x and p
loglh.multinomial.sampling <- function( x, p ) {
  total = sum( x );
  if ( length(x) == length(p) ) { 
        y = p ^ x;
        ret <- lfactorial(total) - sum( lfactorial(x) ) + sum(log(y) );
  } else {
        ret <- NA;
  }
}

################################

#source ("source/021908-sor55-norm-merge.b.R");
# or 
# load ("021908.bkgrdCorrected.nonChr5Normalized.RData");
 load ("old/022008.bkgrdCorrected.nonChr5Normalized.RData");

 out$chr.combimatrix = out$chr; #store the original chr annotation


################# the expression results

cen.orfs = c('CEN1','CENR','CEN2','CEN3','CEN4','CEN5','CEN6','CEN7');
chrs     = c('Chr1','ChrR','Chr2','Chr3','Chr4','Chr5','Chr6','Chr7');
locs = c( '1', 'R','2','3','4','5','6','7');

dic= data.frame( cbind( chrs, locs) )

dic$chrs = as.character( dic$chrs );
dic$locs = as.character( dic$locs );

#tb = read.delim("cal21-annotation.csv");

tb = read.delim("cal21-annotation.csv",
 colClasses = c("character","character","character","character","integer","integer","integer",
"character") );
#tb$chr = as.character( tb$chr );
cens = tb[ match(cen.orfs, tb$orf), ]; 

## now combine tables   101008 change

out$start = tb$start[ match( out$orf, tb$orf ) ]; # 100208
out$end   = tb$end[ match( out$orf, tb$orf ) ]; #  100208
out$strand= tb$strand[ match( out$orf, tb$orf ) ]; #  100208
out$chr2  = tb$chr[ match( out$orf, tb$orf ) ]; #  100208
out$chr   = dic$chrs[ match( out$chr2, dic$locs) ] 
out$loc   = out$start /2  + out$end/2 

 table(out$chr); table( out$chr2); #check, OK


 save.image ("101008.bkgrdCorrected.nonChr5Normalized.myChr.RData");


xscale = 3.2E6;  

xlim=c(0, xscale); 
yscale = 4;
ylim = c(0.1, 6);
yy = c(0.1, 0.2, 0.5, 1, 2, 4);
xx = pretty( range( tb$end, na.rm=T ) )

cutoffs = quantile( out$fold.mut.by.wt, probs=c(0.05,0.95) );


window.bp = 0.5E5;  window = 15;  pcutoff = 1E-4;  #

expecteds = c(0.05, 0.9, 0.05) * window;


 pdf( "_101008-chrs-chisq.pdf",height=9,width=6);

 #jpeg( "_42308-chrs-chisq.jpg",height=2000,width=1500, horizontal=T);
 #jpeg( "_42308-chrs-chisq.jpg",height=900,width=600, horizontal=T);

mat = matrix( seq(1,length(chrs)), nrow=length( chrs), ncol= 2 ); 


layout(mat, heights= c( 1.15, rep(1, nrow(mat)-2), 1.2) );


#i=4; i=5

for ( i in 1:length(chrs)) {
  mychr = chrs[ i ];
exp.chrtb = out[ grep( mychr, out$chr),] ;  ###change 100208, not work for >10 chrs

summary(exp.chrtb) #25 NAs ??, maybe, 

exp.chrtb[is.na(exp.chrtb$start),]

u = 1; #u =  mean( exp.chrtb$fold.mut.by.wt); # [1] 0.7891938


if ( i == length(chrs) ) {

 par(mar=c(2,2,0,1));
 plot( exp.chrtb$fold.mut.by.wt ~ exp.chrtb$loc, col="white",xlab='',ylab='sor55/wt',log='y',
xlim=xlim,ylim=ylim, axes=F); 
 axis( 1, at = xx, tcl=0.2, line= 0);
} else {
  if ( i == 1) { 
par(mar=c(0,2,1,1) ); 
  } else { 
par(mar=c(0,2,0,1) ); 
  }
  plot( exp.chrtb$fold.mut.by.wt ~ exp.chrtb$loc, col="white",xlab='bp',ylab='sor55/wt',log='y',
xlim=xlim,ylim=ylim, axes=F); 
 axis( 1, at = xx, labels=F, tcl=0.2);  #no labels, tick inside


box( ); #set the boxes

        axis(2, at= yy, tcl=0.4, line= 0); 
#axis(2, at=pretty(range(ylim)) ); 
# main= paste( "expression ratio sor55/wt along", mychr, sep=' ') );

for( j in 1:length(exp.chrtb$orf) ) {
yin = exp.chrtb$fold.mut.by.wt[j];
mycol = 'NA';
if (( yin > cutoffs[2] ) && ( exp.chrtb$WtMean>5)) { mycol = 'blue';   ####remove outliers??!!!!
} else if ( yin < cutoffs[1] ) { mycol = 'red';
} else { mycol= 'green'; }

exp.chrtb$type[j] = mycol;

y = ifelse ( yin > u, yin-u, yin-u );
MyBox( exp.chrtb$loc[j], u, y, xscale/1000, col= mycol);
}

MyBox( 0, u, 0.01, max(exp.chrtb$loc,na.rm=T), col="black");
points( cens$start[cens$orf==cen.orfs[i]], u+0.005 ,pch=19,cex=0.8, col="black");
#text( -5E4, 2, chrs[i] );
text( -5E3, 4, chrs[i] );

#sliding chi-sq test
sorted.locs = sort( exp.chrtb$loc );
labels = c('orf','loc', 'type', 'pchisq');
sliding.test = data.frame( matrix(nrow=length(sorted.locs), ncol=length(labels) ) );
names( sliding.test) = labels;

sliding.test$loc  = sorted.locs;

sliding.test$orf  = exp.chrtb$orf [match( sorted.locs, exp.chrtb$loc )];
sliding.test$type = exp.chrtb$type[match( sorted.locs, exp.chrtb$loc )];
sliding.test$clustertype = "green"; ### 101008 change
sliding.test[1:5,];
exp.chrtb[exp.chrtb$orf=='orf19.5692',] #check passed.

for( j in 1:(length(sliding.test$loc) - window + 1) ) {

xs = j:(j+window-1);
mylocs = sliding.test$loc[xs];  mylocs = mylocs[ ! is.na(mylocs) ];

if (( max(mylocs) - min(mylocs)) < window.bp ) {
types = sliding.test$type[xs];
downs = length( types[ types=="red"] );
means = length( types[ types=="green"] );
ups = length( types[ types=="blue"] );
obs = c( downs, means, ups);
obs[is.na(obs)] = 0;
#expecteds = c(0.5, 9, 0.5);
k2 = sum( (obs - expecteds)^2 / expecteds );
  center = floor( j + window/2 ); 
sliding.test$pchisq[center] = pchisq( k2, length(obs)-1, lower.tail = F);

yplot= NA;

if ( sliding.test$pchisq[ center ] < pcutoff ) {
 if ((obs[1]> expecteds[1]) && (obs[3]< expecteds[3]) ){
    sliding.test$clustertype[center] = "red"; yplot = 0.15;
    #rect( sliding.test$loc[j], yplot, sliding.test$loc[j+window-1], yplot+yscale/100,col="red",
    #     border=NA );
 } else if ((obs[1] < expecteds[1]) && (obs[3] > expecteds[3]) ) {
    sliding.test$clustertype[center] = "blue"; yplot = 0.15;
 } 
  MyBoxC( sliding.test$loc[center],yplot,yscale/100,xscale/200,
  col=sliding.test$clustertype[center] );
 #   rect( sliding.test$loc[j], yplot, sliding.test$loc[j+window-1], yplot+yscale/100,
 #        border=NA,sliding.test$clustertype[center] );
} else {
sliding.test$clustertype[center] = "green"; #greens are not shown
}

} ## window.bi if loop

}
#sliding.test[1:20,]

#par(new=T);

#plot ( sliding.test$pchisq ~ sliding.test$loc, col='purple', type='l',xlab='',ylab='',log='y', xlim=xlim, axes=F,lty=2); 

}#chr loop

dev.off();


q("yes");