Friday, January 31, 2014

R, permute network without selfpairing, bug fix

#permute merged yeast PPI+GIN, fix a bug that insert "NA" into new networks. 

#require(igraph)
rm(list=ls())
debug = 0
set.seed(2014)
permute.pairs.wo.selfpairs = function( inpairs,  ncycles=10, debug=1 ) {#bug, id1-id2 and id2-id1 are not considered. 
  if (ncycles >= 1 ) {
    if(debug) {
      print(paste('ncycles=', ncycles))
    }
    longids = c(as.character(inpairs[,1]), as.character(inpairs[,2]) )
    longids = sample(longids)
    len = length(inpairs[,1])
    newpairs = data.frame( cbind( longids[1:len], longids[(len+1): (2*len)]) )
    names(newpairs) = c('id1', 'id2')
    newpairs$id1 = as.character( newpairs$id1)
    newpairs$id2 = as.character( newpairs$id2)    
    newpairs$selfpairs = ifelse( newpairs$id1 == newpairs$id2, 1, 0 )
    self.tb = newpairs[ newpairs$selfpairs==1, ]
    nonself.tb = newpairs[newpairs$selfpairs==0, ]
    if(debug) {
      print(paste("===selfpairs===="),NULL)
      print(self.tb)
      print(paste("================="),NULL)
    }
    if( length(self.tb[,1])>=1 ) {
      if ( ncycles == 0) { 
        #return (c(NA,NA, NA) );
        print(paste("ncycles reached zero, ncycles"),ncycles)
        print(paste("Abort!"),NULL)
        stop; 
      } else {
        ncycles = ncycles - 1
        splitPos = round( length(self.tb[,1]) * sqrt(ncycles) ) + 5  #2014 Jan 31 changes
        selectedpairs = rbind(self.tb,  nonself.tb[1: splitPos, ] )
        restpairs = nonself.tb[ (splitPos + 1): length(nonself.tb[,1]), ]
        return( rbind(restpairs, permute.pairs.wo.selfpairs(selectedpairs, ncycles)))
      }
    } else {  
      return (newpairs)
    }
  } else {
    return( c(NA,NA,NA )) 
  }
}

#write.table(pairs, "merged_PPIGIN_2014Jan20.tab", quote=F, row.names=F, col.names=F, sep='\t')
net = read.table( "merged_PPIGIN_2014Jan20.tab", header=F, sep="\t", colClass = c("character", "character") )
head(net)
if(debug==9) { 
  #net = read.table('pair.tab',header=F) 
 net = net[1:90000,]
}


net2 = permute.pairs.wo.selfpairs( net  )
write.csv(net2, "/tmp/net2.csv")
write.csv(net,"/tmp/net.csv")

#do they have the same degree?
t1 = table(c(net[,1],net[,2]))
t2 = table(c(net2[,1],net2[,2]))
comp <- t1 == t2
table(comp)
tf = comp[comp==F]; tf
t1[names(tf)[1]]
t1[names(tf)]
t2[names(tf)]



#note, this R code runs even faster than ms02, presumably because ms02 spent time on network configuration. 

#2014 April 7. Bug found. id1-id2, id2-id1 should be considered. 


No comments:

Post a Comment