applejack:0.yeast.growth.fitness hqin$ ll *Rmd
-rw-r--r-- 1 hqin staff 3.9K Nov 22 22:24 analyze_growth_fitness.Rmd
Analyze growth fitness
H Qin
11/16/2017
Based on 20131028 code and its 20151012 corrections of comma in gene names. For deletion fitness data, download from http://www-deletion.stanford.edu/YDPM/YDPM_index.html
“In the Deletion Project, strains from the deletion collection were monitored under 9 different media conditions selected for the study of mitochondrial function. 5791 heterozygous diploid and 4706 homozygous diploid deletion strains were monitored in parallel using molecular barcodes on fermentable (YPD, YPDGE) and non-fermentable substrates (YPG, YPE, YPL). The YDPM database contains both the raw data and growth rates calculated for each strain in each media condition. Strains can be searched by ORF or Gene name to access growth measurements and data plots for each strain.”
What are the genotypes in the 4 yeast fitness files?
rm(list=ls());
setwd("~/github/yeast.growth.fitness");
datapath = "data";
my.files = c("Regression_Tc1_het.txt",
"Regression_Tc1_hom.txt",
"Regression_Tc2_het.txt",
"Regression_Tc2_hom.txt");
tb1het = read.table(paste('data/',my.files[1],sep=''), header=T, sep='\t', fill=T)
tb2hom = read.table(paste('data/',my.files[2],sep=''), header=T, sep='\t', fill=T)
tb3het = read.table(paste('data/',my.files[3],sep=''), header=T, sep='\t', fill=T)
tb4hom = read.table(paste('data/',my.files[4],sep=''), header=T, sep='\t', fill=T)
Essetial genes are expected to have YPD measurement in tb1het and tb3het, but not in tb2hom and tb4hom.
tb1het$YPDFlag1 = ifelse( is.na(tb1het$YPD), 'noYPDMeasurement', 'HasYPDMeasurement' )
table(tb1het$YPDFlag1)
##
## HasYPDMeasurement noYPDMeasurement
## 5744 174
tb3het$YPDFlag3 = ifelse( is.na(tb3het$YPD), 'noYPDMeasurement', 'HasYPDMeasurement' )
table(tb3het$YPDFlag3)
##
## HasYPDMeasurement noYPDMeasurement
## 5766 152
tb2hom$YPDFlag2 = ifelse( is.na(tb2hom$YPD), 'noYPDMeasurement', 'HasYPDMeasurement' )
table(tb2hom$YPDFlag2)
##
## HasYPDMeasurement noYPDMeasurement
## 4659 1259
tb4hom$YPDFlag4 = ifelse( is.na(tb4hom$YPD), 'noYPDMeasurement', 'HasYPDMeasurement' )
table(tb4hom$YPDFlag4)
##
## HasYPDMeasurement noYPDMeasurement
## 4718 1200
tb12 = merge(tb1het, tb2hom, by.x='orf', by.y='orf')
tb12 = tb12[, c(1,2,grep('Flag',names(tb12)))]
tb34 = merge(tb3het,tb4hom, by.x='orf', by.y='orf')
tb34 = tb34[, c(1,2,grep('Flag',names(tb34)))]
#names(tb12) = c("orf",'gene','Anno1','Growth1')
#names(tb34) = c('orf','gene','Anno2','Growth2')
str(tb12)
## 'data.frame': 5918 obs. of 4 variables:
## $ orf : Factor w/ 5918 levels "YAL001C","YAL002W",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ gene.x : Factor w/ 5915 levels "37165","AAC1",..: 3133 3395 750 3424 2971 803 906 2912 1470 3425 ...
## $ YPDFlag1: chr "HasYPDMeasurement" "HasYPDMeasurement" "HasYPDMeasurement" "HasYPDMeasurement" ...
## $ YPDFlag2: chr "HasYPDMeasurement" "HasYPDMeasurement" "noYPDMeasurement" "HasYPDMeasurement" ...
str(tb34)
## 'data.frame': 5918 obs. of 4 variables:
## $ orf : Factor w/ 5918 levels "YAL001C","YAL002W",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ gene.x : Factor w/ 5915 levels "37165","AAC1",..: 3133 3395 750 3424 2971 803 906 2912 1470 3425 ...
## $ YPDFlag3: chr "HasYPDMeasurement" "HasYPDMeasurement" "HasYPDMeasurement" "HasYPDMeasurement" ...
## $ YPDFlag4: chr "HasYPDMeasurement" "HasYPDMeasurement" "noYPDMeasurement" "HasYPDMeasurement" ...
tb = merge(tb12,tb34, by.x='orf', by.y='orf')
head(tb)
## orf gene.x.x YPDFlag1 YPDFlag2 gene.x.y
## 1 YAL001C TFC3 HasYPDMeasurement HasYPDMeasurement TFC3
## 2 YAL002W VPS8 HasYPDMeasurement HasYPDMeasurement VPS8
## 3 YAL003W EFB1 HasYPDMeasurement noYPDMeasurement EFB1
## 4 YAL004W YAL004W HasYPDMeasurement HasYPDMeasurement YAL004W
## 5 YAL005C ssa1 HasYPDMeasurement HasYPDMeasurement ssa1
## 6 YAL007C ERP2 HasYPDMeasurement HasYPDMeasurement ERP2
## YPDFlag3 YPDFlag4
## 1 HasYPDMeasurement HasYPDMeasurement
## 2 HasYPDMeasurement HasYPDMeasurement
## 3 HasYPDMeasurement noYPDMeasurement
## 4 HasYPDMeasurement HasYPDMeasurement
## 5 HasYPDMeasurement HasYPDMeasurement
## 6 HasYPDMeasurement HasYPDMeasurement
Consistency check for sanity.
tb$name.check= ifelse(tb$gene.x.x == tb$gene.x.y, T, F)
table(tb$name.check)
##
## TRUE
## 5918
#tb$essentiality = NA;
tb$essentiality = 'abnormal';
tb$essentiality = ifelse( tb$YPDFlag2=='HasYPDMeasurement' & tb$YPDFlag4=='HasYPDMeasurement', 'nonessential', tb$essentiality ); #2 and 4 are homozygous deletion
table(tb$essentiality)
##
## abnormal nonessential
## 1264 4654
tb$essentiality = ifelse( tb$YPDFlag2=='noYPDMeasurement' & tb$YPDFlag4=='noYPDMeasurement' & tb$YPDFlag1 == 'HasYPDMeasurement' & tb$YPDFlag3 == 'HasYPDMeasurement' , 'essential', tb$essentiality ); #2 and 4 are homozygous deletion, 1 and 3 are heterozynous
table(tb$essentiality)
##
## abnormal essential nonessential ## 122 1142 4654
sum( table(tb$essentiality) )
## [1] 5918
write.csv(tb, "data/SummaryRegressionHetHom20171122.csv", row.names = F );
#what was I doing here?
#tbReal$essenflag = ifelse( tbReal$Growth1=='growth1' & tbReal$Growth2=='growth2', 'nonessential',
# ifelse(tbReal$Growth1=='nogrowth1' & tbReal$Growth2=='nogrowth2', 'essential', 'abnormal') )
#head(tbReal)
#table(tbReal$essenflag)
# abnormal essential nonessential
# 68 1152 4552
#Good, consistent results. Use these for further analysis. 2013 Oct 29
# 20151012, found two "TRUE" in orfs
#length(tbReal$orf) #5772
#length(unique(tbReal$orf)) #5772
#write.csv(tbReal, "SummaryRegressionHetHom2015Oct12.csv", row.names = F ) #change 20151012
#write.csv(tbReal, "SummaryRegressionHetHom2015Oct12.csv", quote=F, row.names=F )
No comments:
Post a Comment