list.files()
## [1] "_explore_dip.html" "_explore_dip.Rmd" "Scere20170205.csv"
## [4] "Scere20170205.txt" "Scere20170205.xlsx"
#library(xlsx)
tb = read.table("Scere20170205.txt", header=T, sep="\t", row.names=NULL)
#tb = read.xlsx("Scere20170205.xlsx", 1)
Visual check show that there are interactions between yeast proteins and non-yeast proteins (such as human and flys) Some the column names are move by one-column.
big2small = function(char1, char2) {
if ( char1 > char2) {
return( c( char1, char2) )
} else {
return( c(char2, char1) )
}
}
for( i in 1:length(tb[,1])) {
#for( i in 1:19) {
pair = big2small(as.character(tb[i, 1]), as.character(tb[i, 2]))
tb$pairID[i] = paste( pair[1], pair[2], sep = "::")
}
How many ExE interactions?
unique_EEpairs = unique(tb$pairID)
all_names = c();
for( pair in unique_EEpairs) {
all_names = c(all_names, unlist( strsplit(pair, split="::") ))
}
degree = table(all_names)
str(degree)
## 'table' int [1:5176(1d)] 3 68 7 6 9 9 56 5 5 1 ...
## - attr(*, "dimnames")=List of 1
## ..$ all_names: chr [1:5176] "DIP-1000N|refseq:NP_014991|uniprotkb:P12689" "DIP-1001N|refseq:NP_010241|uniprotkb:Q07350" "DIP-1002N|refseq:NP_010206|uniprotkb:Q07468" "DIP-1003N|refseq:NP_010131|uniprotkb:P25441" ...
mean(degree)
## [1] 8.878284
median(degree)
## [1] 4
No comments:
Post a Comment