Wednesday, November 7, 2018

NetBAS running time problem, yeast PIN, GOBP ~ GOBP 4.2 hours


The following codes run for 4.2 hours on applejack laptop using single core.  Need to use parallel method to speed this up. 

```{r}
pairsBuffer = data.frame(matrix(NA, nrow = 1, ncol=3))
names(pairsBuffer) = c("name1", "name2", "tag")
for ( i in 1:length(pairs[,1])){
  print(i)
  #els1 = sort( unlist( strsplit(  pairs$cat1[i], split=",") ))
  #els2 = sort( unlist( strsplit(  pairs$cat2[i], split=",") ))
  sub1 = cats[ cats$id == pairs$name1[i], ]
  sub2 = cats[ cats$id == pairs$name2[i], ]
   
  tagbuffer = allCombinationsOfTwoVectors ( sub1$GO, sub2$GO)  #all combinations
 
  # generate a dataframe buffer with ids
  currentBuffer = data.frame( cbind(rep(pairs$name1[i], length(tagbuffer)),
                        rep(pairs$name2[i], length(tagbuffer)),
                        tagbuffer                        ))
  names(currentBuffer) = c("name1", "name2", "tag")
 
  pairsBuffer = rbind( pairsBuffer, currentBuffer) #combine with dataframe buffer
}

F.obs = data.frame( table(pairsBuffer$tag))
names(F.obs) = c("tag", "freq")
F.obs

No comments:

Post a Comment