RLS pairwise difference, P-value evaluation using ms02 permutation
H Qin
11/27/2017
rm(list=ls())
#setwd("~/github/0.network.aging.ms02/1.Fraser02")
setwd("/home/hqin/github/network.aging.configuration/1.Fraser02")
source("../network.r")
set.seed(2017)
debug = 1;
start_time = Sys.time();
list.files(path="../data/")
## [1] "ken-RLS-byORF.csv"
## [2] "SummaryRegressionHetHomFactorized2015Oct13.csv"
## [3] "unique_biogrid_ScePPI.csv"
rls = read.csv("../data/ken-RLS-byORF.csv");
biogrid = read.csv("../data/unique_biogrid_ScePPI.csv");
fit = read.csv("../data/SummaryRegressionHetHomFactorized2015Oct13.csv")
ppi = biogrid[, c("Systematic.Name.Interactor.A","Systematic.Name.Interactor.B")];
names(ppi) = c("ORF1", "ORF2" )
#First, define a function to calculate V difference in pairs of proteins
diff.RLS = function( inpairs ) {
inpairs$rls1 = rls$avgLS[match( inpairs$ORF1, rls$ORF ) ];
inpairs$rls2 = rls$avgLS[match( inpairs$ORF2, rls$ORF ) ];
inpairs$essen1 = fit$essenflag[match(inpairs$ORF1, fit$orf)];
inpairs$essen2 = fit$essenflag[match(inpairs$ORF2, fit$orf)];
inpairs$rls1 = ifelse( inpairs$essen1=='essential', 0, inpairs$rls1);
inpairs$rls2 = ifelse( inpairs$essen2=='essential', 0, inpairs$rls2);
ret = mean( abs( inpairs$rls1 - inpairs$rls2 ), na.rm=T );
}
# calculate the observed difference in RLS
diff.RLS.obs = diff.RLS ( ppi );
paste( "Observed deltaRLS = ", diff.RLS.obs);
## [1] "Observed deltaRLS = 12.759632586095"
#permutation of pairs, and their difference in Ka
Nsims = 10000; #number of permutations permutated.diff.RLS = numeric( Nsims ); #empty vector to store calculations library(foreach) library(doMC)
## Loading required package: iterators
## Loading required package: parallel
registerDoMC(cores=8) #Intel i7 has 6 cores, Xeon E5-2603 @ridgeside has 8 cores
permutated.diff.RLS = foreach(i=1:Nsims) %dopar% {
new.pairs = ms02_singlerun(ppi ) #generate a new MS02 random network
new.pairs = new.pairs[,1:2] #reformating into two-columns
names(new.pairs) = c("ORF1", "ORF2")
diff.RLS( new.pairs );
}
p-value
permutated.diff.RLS = unlist(permutated.diff.RLS)
summary(permutated.diff.RLS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.03 14.20 14.22 14.22 14.25 14.37
sub = permutated.diff.RLS[ permutated.diff.RLS < diff.RLS.obs ]
paste("pvalue = ", length(sub)/Nsims)
## [1] "pvalue = 0"
hist(permutated.diff.RLS)
stop_time = Sys.time()
start_time;
## [1] "2017-11-29 10:01:30 EST"
stop_time;
## [1] "2017-11-29 13:04:53 EST"
paste( "running time = ", stop_time - start_time)
## [1] "running time = 3.05638415979015"