Thursday, November 30, 2017

RLS pairwise difference, P-value < 1/10K using ms02 permutation

rm(list=ls())
#setwd("~/github/0.network.aging.ms02/1.Fraser02")
setwd("/home/hqin/github/network.aging.configuration/1.Fraser02")
source("../network.r")
set.seed(2017)
debug = 1; 
start_time = Sys.time();
list.files(path="../data/")
## [1] "ken-RLS-byORF.csv"                             
## [2] "SummaryRegressionHetHomFactorized2015Oct13.csv"
## [3] "unique_biogrid_ScePPI.csv"
rls = read.csv("../data/ken-RLS-byORF.csv");
biogrid = read.csv("../data/unique_biogrid_ScePPI.csv");
fit = read.csv("../data/SummaryRegressionHetHomFactorized2015Oct13.csv")
ppi = biogrid[, c("Systematic.Name.Interactor.A","Systematic.Name.Interactor.B")];
names(ppi) = c("ORF1", "ORF2" )
#First, define a function to calculate V difference in pairs of proteins
 diff.RLS = function( inpairs ) {
   inpairs$rls1 = rls$avgLS[match( inpairs$ORF1, rls$ORF ) ];
   inpairs$rls2 = rls$avgLS[match( inpairs$ORF2, rls$ORF ) ];
   
   inpairs$essen1 = fit$essenflag[match(inpairs$ORF1, fit$orf)];
   inpairs$essen2 = fit$essenflag[match(inpairs$ORF2, fit$orf)];
   
   inpairs$rls1 = ifelse( inpairs$essen1=='essential', 0, inpairs$rls1);
   inpairs$rls2 = ifelse( inpairs$essen2=='essential', 0, inpairs$rls2);
   
   ret = mean( abs( inpairs$rls1 - inpairs$rls2 ), na.rm=T );
 } 
 # calculate the observed difference in RLS
 diff.RLS.obs = diff.RLS ( ppi );
 paste( "Observed deltaRLS = ", diff.RLS.obs); 
## [1] "Observed deltaRLS =  12.759632586095"
#permutation of pairs, and their difference in Ka
 Nsims = 10000; #number of permutations
 permutated.diff.RLS = numeric( Nsims ); #empty vector to store calculations

library(foreach)
library(doMC)
## Loading required package: iterators
## Loading required package: parallel
registerDoMC(cores=8) #Intel i7 has 6 cores, Xeon E5-2603 @ridgeside has 8 cores

permutated.diff.RLS = foreach(i=1:Nsims) %dopar% {
   new.pairs = ms02_singlerun(ppi ) #generate a new MS02 random network
   new.pairs = new.pairs[,1:2] #reformating into two-columns
   names(new.pairs) = c("ORF1", "ORF2")
   diff.RLS( new.pairs ); 
  }
p-value
permutated.diff.RLS = unlist(permutated.diff.RLS)

summary(permutated.diff.RLS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.03   14.20   14.22   14.22   14.25   14.37
sub = permutated.diff.RLS[ permutated.diff.RLS < diff.RLS.obs ]
paste("pvalue = ", length(sub)/Nsims)
## [1] "pvalue =  0"
hist(permutated.diff.RLS)
stop_time = Sys.time()
start_time;
## [1] "2017-11-29 10:01:30 EST"
stop_time;
## [1] "2017-11-29 13:04:53 EST"
paste( "running time = ", stop_time - start_time) 
## [1] "running time =  3.05638415979015"

No comments:

Post a Comment