#permute merged yeast PPI+GIN, fix a bug that insert "NA" into new networks.
#require(igraph)
rm(list=ls())
debug = 0
set.seed(2014)
permute.pairs.wo.selfpairs = function( inpairs, ncycles=10, debug=1 ) {#bug, id1-id2 and id2-id1 are not considered.
if (ncycles >= 1 ) {
if(debug) {
print(paste('ncycles=', ncycles))
}
longids = c(as.character(inpairs[,1]), as.character(inpairs[,2]) )
longids = sample(longids)
len = length(inpairs[,1])
newpairs = data.frame( cbind( longids[1:len], longids[(len+1): (2*len)]) )
names(newpairs) = c('id1', 'id2')
newpairs$id1 = as.character( newpairs$id1)
newpairs$id2 = as.character( newpairs$id2)
newpairs$selfpairs = ifelse( newpairs$id1 == newpairs$id2, 1, 0 )
self.tb = newpairs[ newpairs$selfpairs==1, ]
nonself.tb = newpairs[newpairs$selfpairs==0, ]
if(debug) {
print(paste("===selfpairs===="),NULL)
print(self.tb)
print(paste("================="),NULL)
}
if( length(self.tb[,1])>=1 ) {
if ( ncycles == 0) {
#return (c(NA,NA, NA) );
print(paste("ncycles reached zero, ncycles"),ncycles)
print(paste("Abort!"),NULL)
stop;
} else {
ncycles = ncycles - 1
splitPos = round( length(self.tb[,1]) * sqrt(ncycles) ) + 5 #2014 Jan 31 changes
selectedpairs = rbind(self.tb, nonself.tb[1: splitPos, ] )
restpairs = nonself.tb[ (splitPos + 1): length(nonself.tb[,1]), ]
return( rbind(restpairs, permute.pairs.wo.selfpairs(selectedpairs, ncycles)))
}
} else {
return (newpairs)
}
} else {
return( c(NA,NA,NA ))
}
}
#write.table(pairs, "merged_PPIGIN_2014Jan20.tab", quote=F, row.names=F, col.names=F, sep='\t')
net = read.table( "merged_PPIGIN_2014Jan20.tab", header=F, sep="\t", colClass = c("character", "character") )
head(net)
if(debug==9) {
#net = read.table('pair.tab',header=F)
net = net[1:90000,]
}
net2 = permute.pairs.wo.selfpairs( net )
write.csv(net2, "/tmp/net2.csv")
write.csv(net,"/tmp/net.csv")
#do they have the same degree?
t1 = table(c(net[,1],net[,2]))
t2 = table(c(net2[,1],net2[,2]))
comp <- t1 == t2
table(comp)
tf = comp[comp==F]; tf
t1[names(tf)[1]]
t1[names(tf)]
t2[names(tf)]
#note, this R code runs even faster than ms02, presumably because ms02 spent time on network configuration.
#2014 April 7. Bug found. id1-id2, id2-id1 should be considered.
No comments:
Post a Comment