20171026: bug found. ORF1 and ORF2 order are not checked, so pairs may not be unique. 
Move 'merged_PPIGIN_2014Jan20.tab" to "~/projects/0.ginppi.reliability.simulation/data".
# permutation effect on aging?
# lambda ~ 1/connectivity of nodes
# 2013 Dec 20, merge DIP PPI and Genetic Inxt Net -> Multi-net approach
rm(list=ls())
#require(date)
require('flexsurv')
#source("/Users/hongqin/lib/R/lifespan.r")
source("lifespan.r")
#setwd("~/projects/0.network.aging.prj/0.ppi.reliability.simulation")
list.files(path='data', )
debug = 0;
#yeast PPI
#pairs = read.csv('data/pairs.csv', colClasses=c('character','character'))
#this yeast ppi dataset is consistent with Taiwan group's report.
dip = read.csv("data/yeastDIP.csv")
pairsPPI = dip[,c(1,2)]
pairsPPI$ORF1 = as.character(pairsPPI$ORF1)
pairsPPI$ORF2 = as.character(pairsPPI$ORF2)
pairsPPI = pairsPPI[ pairsPPI$ORF1 != pairsPPI$ORF2, ]#remove self-intxns
# yeast genetic network
#gPairs = read.csv("data/sgadata_costanzo2009_stringentCutoff_101120.csv", header=F)
gPairs = read.csv("data/sgadata_costanzo2009_lenientCutoff_101120.csv", header=F)
names(gPairs) = c("ORF1", "Name1", "ORF2", "Name2", NA, NA, NA)
gPairs$ORF1 = as.character( gPairs$ORF1 )
gPairs$ORF2 = as.character( gPairs$ORF2 )
#merge PPI and GIN
pairs = rbind(pairsPPI[,c("ORF1","ORF2")], gPairs[,c("ORF1","ORF2")])
# remove self-intxns
pairs = pairs[ pairs$ORF1 != pairs$ORF2, ]
# 96851 pairs for DIP+GIN.strigentCutoff
# 786118 for DIP+GIN.lenientCutoff
#essential gene info
essenTb = read.csv('data/SummaryRegressionHetHom2013Oct29.csv', colClasses=rep('character', 9))
#######################
# How do the two data set overlap? DIP seems to contain some questionable orfs
uniq.orf.from.pairs = unique(c(pairs$ORF1, pairs$ORF2)) #4207 ORF
matches = uniq.orf.from.pairs %in% unique(essenTb$orf)
table(matches)
#FALSE TRUE
# 720 5507
unmatchedORF = uniq.orf.from.pairs[! matches]
matches = uniq.orf.from.pairs %in% unique(essenTb$orf[essenTb$essenflag=='essential'])
table(matches)
#FALSE TRUE
#5171 1056
#This is a good coverage
matches = uniq.orf.from.pairs %in% unique(essenTb$orf[essenTb$essenflag=='nonessential'])
table(matches)
# FALSE TRUE
# 1839 4388 #this is amazingly consistent with Taiwan group's report.
#remove unmatched orfs from pairs
pairs$Removeflag = ifelse( pairs$ORF1 %in%unmatchedORF | pairs$ORF2 %in%unmatchedORF, T,F )
table(pairs$Removeflag)
# FALSE TRUE
# 572221 213897
#So, the updated DIP has 19770 intxn
#So, the DIP+GIN.lenior lead to 572221 intxns
pairs = pairs[! pairs$Removeflag, ]
table(pairs$Removeflag)
pairs = pairs[,1:2] ##This set of pairs is read for analysis
#write.table(pairs, "merged_PPIGIN_2014Jan20.tab", quote=F, row.names=F, col.names=F, sep='\t')

 
No comments:
Post a Comment