Wednesday, December 26, 2018

foreach ms02 run, yeast PIN, GO terms, lowRAM implementation

This version of foreach write.csv ms02 counts of each ms02 null network. This way use very ~180M RAM for each session.

Yeast PIN foreach MS02 run for GO - dang CR,
each session takes ~180M RAM







ms02files = list.files(path='yeastMS02')
if (debug > 0 ) {ms02files = ms02files[1: 5] }
start = Sys.time()
CPUs = 3
registerDoMC( CPUs )

tmpbuffer= foreach( fi =1:length(ms02files) ) %dopar% {
  file = ms02files[fi]
 #for (file in ms02files ){
  ms02_pairs= read.csv(paste("yeastMS02/", file, sep=''),
                       colClasses = c("character", "character"))
  ms02_pairs = ms02_pairs[,1:2]
  if ( debug > 5 ) { ms02_pairs = ms02_pairs[1:1000, ];  print(paste("foreach:fi=",fi))  }

  tagbufferMS02 = c()
  for ( i in 1:length(ms02_pairs[,1])){
    sub1A = cats[ cats$id == ms02_pairs$id1[i], ]
    sub2A = cats[ cats$id == ms02_pairs$id2[i], ]
    els1A = sub1A$GO
    els2A = sub2A$GO
    if ( is.null(sub1A) ) { els1A = c("NA") }
    if ( is.null(sub2A) ) { els2A = c("NA") }
   
    els1B = as.character( g.dang.Q[ms02_pairs$id1[i]] )  #B for 2nd data set
    els2B = as.character( g.dang.Q[ms02_pairs$id2[i]] )  #B for 2nd data set
    els1B = ifelse( is.na(els1B), "NA", els1B)
    els2B = ifelse( is.na(els2B), "NA", els2B)
     
    tagbuffer1 = allCombinationsOfTwoVectors ( els1A, els2B )
    tagbuffer2 = allCombinationsOfTwoVectors ( els2A, els1B )
    tagbufferMS02 = c( tagbufferMS02, tagbuffer1,tagbuffer2 ) #combine with dataframe buffer
  } #i loop

  F.ms02current = data.frame( table(tagbufferMS02))
  write.csv(F.ms02current, file=paste("tmp/_Fms02tag_", file, sep = ""), quote = T, row.names = F)
}#tmpbuffer

No comments:

Post a Comment