https://www.nature.com/articles/s43588-020-00009-4
This site is to serve as my note-book and to effectively communicate with my students and collaborators. Every now and then, a blog may be of interest to other researchers or teachers. Views in this blog are my own. All rights of research results and findings on this blog are reserved. See also http://youtube.com/c/hongqin @hongqin
https://www.nature.com/articles/s43588-020-00009-4
use network with known theoretical random permutation
this direction might be too theoretic and has very little practical importance.
rm(list=ls())
#setwd("~/github/0.network.aging.ms02/1.Fraser02")
setwd("/home/hqin/github/network.aging.configuration/1.Fraser02")
source("../network.r")
set.seed(2017)
debug = 1;
start_time = Sys.time();
list.files(path="../data/")
## [1] "ken-RLS-byORF.csv"
## [2] "SummaryRegressionHetHomFactorized2015Oct13.csv"
## [3] "unique_biogrid_ScePPI.csv"
rls = read.csv("../data/ken-RLS-byORF.csv");
biogrid = read.csv("../data/unique_biogrid_ScePPI.csv");
fit = read.csv("../data/SummaryRegressionHetHomFactorized2015Oct13.csv")
ppi = biogrid[, c("Systematic.Name.Interactor.A","Systematic.Name.Interactor.B")];
names(ppi) = c("ORF1", "ORF2" )
#First, define a function to calculate V difference in pairs of proteins
diff.RLS = function( inpairs ) {
inpairs$rls1 = rls$avgLS[match( inpairs$ORF1, rls$ORF ) ];
inpairs$rls2 = rls$avgLS[match( inpairs$ORF2, rls$ORF ) ];
inpairs$essen1 = fit$essenflag[match(inpairs$ORF1, fit$orf)];
inpairs$essen2 = fit$essenflag[match(inpairs$ORF2, fit$orf)];
inpairs$rls1 = ifelse( inpairs$essen1=='essential', 0, inpairs$rls1);
inpairs$rls2 = ifelse( inpairs$essen2=='essential', 0, inpairs$rls2);
ret = mean( abs( inpairs$rls1 - inpairs$rls2 ), na.rm=T );
}
# calculate the observed difference in RLS
diff.RLS.obs = diff.RLS ( ppi );
paste( "Observed deltaRLS = ", diff.RLS.obs);
## [1] "Observed deltaRLS = 12.759632586095"
#permutation of pairs, and their difference in Ka
Nsims = 1000; #number of permutations
permutated.diff.RLS = numeric( Nsims ); #empty vector to store calculations
library(foreach)
library(doMC)
## Loading required package: iterators
## Loading required package: parallel
registerDoMC(cores=8) #Intel i7 has 6 cores, Xeon E5-2603 @ridgeside has 8 cores
permutated.diff.RLS = foreach(i=1:Nsims) %dopar% {
new.pairs = ms02_singlerun(ppi ) #generate a new MS02 random network
new.pairs = new.pairs[,1:2] #reformating into two-columns
names(new.pairs) = c("ORF1", "ORF2")
diff.RLS( new.pairs );
}
permutated.diff.RLS = unlist(permutated.diff.RLS)
summary(permutated.diff.RLS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.09 14.20 14.22 14.22 14.25 14.39
sub = permutated.diff.RLS[ permutated.diff.RLS < diff.RLS.obs ]
paste("pvalue = ", length(sub)/Nsims)
## [1] "pvalue = 0"
hist(permutated.diff.RLS)
stop_time = Sys.time()
paste( "running time = ", stop_time - start_time)
## [1] "running time = 18.9267802158992"
rm(list=ls())
setwd("~/github/0.network.aging.ms02/1.Fraser02")
source("../network.r")
set.seed(2017)
debug = 1;
list.files(path="../data/")
## [1] "ken-RLS-byORF.csv"
## [2] "SummaryRegressionHetHomFactorized2015Oct13.csv"
## [3] "unique_biogrid_ScePPI.csv"
rls = read.csv("../data/ken-RLS-byORF.csv");
biogrid = read.csv("../data/unique_biogrid_ScePPI.csv");
fit = read.csv("../data/SummaryRegressionHetHomFactorized2015Oct13.csv")
ppi = biogrid[, c("Systematic.Name.Interactor.A","Systematic.Name.Interactor.B")];
names(ppi) = c("ORF1", "ORF2" )
#First, define a function to calculate V difference in pairs of proteins
diff.RLS = function( inpairs ) {
inpairs$rls1 = rls$avgLS[match( inpairs$ORF1, rls$ORF ) ];
inpairs$rls2 = rls$avgLS[match( inpairs$ORF2, rls$ORF ) ];
inpairs$essen1 = fit$essenflag[match(inpairs$ORF1, fit$orf)];
inpairs$essen2 = fit$essenflag[match(inpairs$ORF2, fit$orf)];
inpairs$rls1 = ifelse( inpairs$essen1=='essential', 0, inpairs$rls1);
inpairs$rls2 = ifelse( inpairs$essen2=='essential', 0, inpairs$rls2);
ret = mean( abs( inpairs$rls1 - inpairs$rls2 ), na.rm=T );
}
# calculate the observed difference in RLS
diff.RLS.obs = diff.RLS ( ppi );
paste( "Observed deltaRLS = ", diff.RLS.obs);
## [1] "Observed deltaRLS = 12.759632586095"
#permutation of pairs, and their difference in Ka
Nsims = 100; #number of permutations
permutated.diff.RLS = numeric( Nsims ); #empty vector to store calculations
library(foreach)
library(doMC)
## Loading required package: iterators
## Loading required package: parallel
registerDoMC(cores=5) #Intel i7 has 6 cores
permutated.diff.RLS = foreach(i=1:Nsims) %dopar% {
new.pairs = ms02_singlerun(ppi ) #generate a new MS02 random network
new.pairs = new.pairs[,1:2] #reformating into two-columns
names(new.pairs) = c("ORF1", "ORF2")
diff.RLS( new.pairs );
}
permutated.diff.RLS = unlist(permutated.diff.RLS)
summary(permutated.diff.RLS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.14 14.20 14.23 14.23 14.25 14.31
sub = permutated.diff.RLS[ permutated.diff.RLS < diff.RLS.obs ]
paste("pvalue = ", length(sub)/Nsims)
## [1] "pvalue = 0"
hist(permutated.diff.RLS)