Q: Why do I have 'abnormal', 'essential', 'nonessential' genes?
Genetic interactions CellMap exploration
H Qin
10/17/2017
CELLMAP provids genetics interaction in pairwise format. The interactions are provided for strain_ids (alleles). The mapping of strain_id and ORF is in “strain_ids_and_single_mutant_fitness.csv” .
rm(list=ls());
set.seed(2017);
datapath = "~/data/Sce/CellMap/20170626/S1.pairwise/";
debug = 0;
list.files(path=datapath);
## [1] "SGA_DAmP.txt"
## [2] "SGA_ExE.txt"
## [3] "SGA_ExN_NxE.txt"
## [4] "SGA_NxN.txt"
## [5] "strain_ids_and_single_mutant_fitness.csv"
## [6] "strain_ids_and_single_mutant_fitness.xlsx"
Load naming lookup tables
dic = read.csv(paste(datapath, "strain_ids_and_single_mutant_fitness.csv", sep=''))
Load essential and non-essential infor that H. Qin generated.
list.files(path="data");
## [1] "SummaryRegressionHetHom2015Oct12.csv"
## [2] "SummaryRegressionHetHomFactorized2015Oct13.csv"
fit = read.csv("data/SummaryRegressionHetHomFactorized2015Oct13.csv")
Load pairwise interaction data
#Essential X Essential
tb.ee = read.table(paste(datapath,"SGA_ExE.txt", sep=''), header=T, sep="\t");
summary(tb.ee);
## Query.Strain.ID Query.allele.name Array.Strain.ID
## YNL308C_tsq2680: 792 mob2-11-supp1: 1419 YCR002C_tsa78 : 1090
## YEL019C_tsq533 : 788 kri1-5001 : 792 YCR002C_tsa79 : 1090
## YNL287W_tsq38 : 788 mms21-1 : 788 YAL041W_tsa410: 1088
## YCL059C_tsq1104: 785 sec21-1 : 788 YAR007C_tsa273: 1088
## YCL059C_tsq326 : 785 cdc10-1 : 785 YAL038W_tsa34 : 1087
## YCR002C_tsq1072: 785 cdc10-2 : 785 YAL041W_tsa412: 1086
## (Other) :813847 (Other) :813213 (Other) :812041
## Array.allele.name Arraytype.Temp Genetic.interaction.score..ε.
## cdc10-1: 1090 TSA26:815635 Min. :-0.790500
## cdc10-2: 1090 TSA30: 2935 1st Qu.:-0.034800
## cdc24-2: 1088 Median : 0.005500
## rfa1-m2: 1088 Mean :-0.008011
## cdc19-1: 1087 3rd Qu.: 0.035700
## cdc24-3: 1086 Max. : 0.440300
## (Other):812041
## P.value Query.single.mutant.fitness..SMF. Array.SMF
## Min. :0.00000 Min. :0.13 Min. :0.1137
## 1st Qu.:0.03872 1st Qu.:0.72 1st Qu.:0.7570
## Median :0.19940 Median :0.85 Median :0.8581
## Mean :0.20933 Mean :0.82 Mean :0.8326
## 3rd Qu.:0.35770 3rd Qu.:0.93 3rd Qu.:0.9359
## Max. :1.00000 Max. :1.14 Max. :1.0550
## NA's :109963
## Double.mutant.fitness Double.mutant.fitness.standard.deviation
## Min. :-0.1435 Min. :0.00000
## 1st Qu.: 0.5666 1st Qu.:0.02290
## Median : 0.7123 Median :0.03850
## Mean : 0.6912 Mean :0.04786
## 3rd Qu.: 0.8370 3rd Qu.:0.06120
## Max. : 1.3817 Max. :1.24310
##
#EXN and NXE
tb.en = read.table(paste(datapath,"SGA_ExN_NxE.txt", sep=''), header=T, sep="\t");
summary(tb.en);
## Query.Strain.ID Query.allele.name Array.Strain.ID
## YJL143W_tsq3031: 3657 prp21-ts : 3657 YAR007C_tsa273 : 2212
## YJL203W_tsq401 : 3657 tim17-5001: 3657 YFR036W_tsa88 : 2212
## YIL021W_tsq1136: 3623 med6-ts : 3646 YAL025C_tsa1066: 2211
## YJL072C_tsq2842: 3623 psf2-5001 : 3623 YAL038W_tsa34 : 2210
## YIL021W_tsq2727: 3615 rpb3-2 : 3623 YFL008W_tsa68 : 2210
## YJR064W_tsq2853: 3585 rpb3-5001 : 3615 YFL009W_tsa334 : 2210
## (Other) :3687193 (Other) :3687132 (Other) :3695688
## Array.allele.name Arraytype.Temp Genetic.interaction.score..ε.
## cdc26-1 : 2212 DMA26:1212145 Min. :-1.09550
## rfa1-m2 : 2212 DMA30: 798532 1st Qu.:-0.02740
## mak16-5001: 2211 TSA26:1677715 Median :-0.00060
## cdc19-1 : 2210 TSA30: 20561 Mean :-0.00528
## cdc4-3 : 2210 3rd Qu.: 0.02390
## rsc8-ts16 : 2210 Max. : 1.33530
## (Other) :3695688
## P.value Query.single.mutant.fitness..SMF. Array.SMF
## Min. :0.0000 Min. :0.1 Min. :0.1137
## 1st Qu.:0.1155 1st Qu.:0.7 1st Qu.:0.8393
## Median :0.2746 Median :0.9 Median :0.9577
## Mean :0.2554 Mean :0.8 Mean :0.9072
## 3rd Qu.:0.3950 3rd Qu.:1.0 3rd Qu.:1.0065
## Max. :1.0000 Max. :1.1 Max. :1.1118
## NA's :403475
## Double.mutant.fitness Double.mutant.fitness.standard.deviation
## Min. :-0.1764 Min. :0.00000
## 1st Qu.: 0.6532 1st Qu.:0.02160
## Median : 0.8001 Median :0.03650
## Mean : 0.7680 Mean :0.04677
## 3rd Qu.: 0.9126 3rd Qu.:0.05910
## Max. : 2.1866 Max. :1.19310
##
#NXN I do not need consider NxN for my aging modeling project
tb.nn = read.table(paste(datapath,"SGA_NxN.txt", sep=''), header=T, sep="\t");
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : number of items read is not a multiple of the number of columns
summary(tb.nn);
## Query.Strain.ID Query.allele.name Array.Strain.ID
## YJL219W_sn2362: 3708 hxt9 : 3708 YIL173W_dma2391: 1712
## YIR017C_sn2001: 3696 met28 : 3696 YIL165C_dma2396: 1710
## YJL154C_sn371 : 3674 vps35 : 3674 YIL170W_dma2392: 1706
## YJL141C_sn1525: 3673 yak1 : 3673 YIL141W_dma2367: 1700
## YJL056C_sn2004: 3667 zap1 : 3667 YIL145C_dma2366: 1698
## YJR117W_sn882 : 3666 ste24 : 3666 YIL140W_dma2368: 1694
## (Other) :6261409 (Other):6261409 (Other) :6273273
## Array.allele.name Arraytype.Temp Genetic.interaction.score..ε.
## vth1 : 1712 : 1 Min. :-1.16160
## yil165c: 1710 DMA26: 203777 1st Qu.:-0.02200
## hxt12 : 1706 DMA30:6079715 Median :-0.00220
## yil141w: 1700 Mean :-0.00389
## pan6 : 1698 3rd Qu.: 0.01780
## axl2 : 1694 Max. : 0.86760
## (Other):6273273 NA's :1
## P.value Query.single.mutant.fitness..SMF. Array.SMF
## Min. :0.0000 Min. :0.2 Min. :0.2265
## 1st Qu.:0.1633 1st Qu.:0.9 1st Qu.:0.9667
## Median :0.3083 Median :1.0 Median :1.0010
## Mean :0.2809 Mean :0.9 Mean :0.9712
## 3rd Qu.:0.4113 3rd Qu.:1.0 3rd Qu.:1.0195
## Max. :1.0000 Max. :1.1 Max. :1.1118
## NA's :1 NA's :479989 NA's :1
## Double.mutant.fitness Double.mutant.fitness.standard.deviation
## Min. :-0.2577 Min. :0.00000
## 1st Qu.: 0.8521 1st Qu.:0.01900
## Median : 0.9695 Median :0.03170
## Mean : 0.9069 Mean :0.04062
## 3rd Qu.: 1.0199 3rd Qu.:0.05050
## Max. : 1.7606 Max. :1.00090
## NA's :1 NA's :1
Columns names in the 3 tables are the same.
rbind( names(tb.en), names(tb.ee), names(tb.nn))
## [,1] [,2] [,3]
## [1,] "Query.Strain.ID" "Query.allele.name" "Array.Strain.ID"
## [2,] "Query.Strain.ID" "Query.allele.name" "Array.Strain.ID"
## [3,] "Query.Strain.ID" "Query.allele.name" "Array.Strain.ID"
## [,4] [,5] [,6]
## [1,] "Array.allele.name" "Arraytype.Temp" "Genetic.interaction.score..ε."
## [2,] "Array.allele.name" "Arraytype.Temp" "Genetic.interaction.score..ε."
## [3,] "Array.allele.name" "Arraytype.Temp" "Genetic.interaction.score..ε."
## [,7] [,8] [,9]
## [1,] "P.value" "Query.single.mutant.fitness..SMF." "Array.SMF"
## [2,] "P.value" "Query.single.mutant.fitness..SMF." "Array.SMF"
## [3,] "P.value" "Query.single.mutant.fitness..SMF." "Array.SMF"
## [,10] [,11]
## [1,] "Double.mutant.fitness" "Double.mutant.fitness.standard.deviation"
## [2,] "Double.mutant.fitness" "Double.mutant.fitness.standard.deviation"
## [3,] "Double.mutant.fitness" "Double.mutant.fitness.standard.deviation"
Merge 3 tables into 1 table.
tb.gin = rbind(tb.ee, tb.en, tb.nn);
Double-check the merged results
length(tb.gin[,1]) == sum(length(tb.ee[,1]), length(tb.en[,1]), length(tb.nn[,1]))
## [1] TRUE
Remove unused table to free up memory
ls()
## [1] "datapath" "debug" "dic" "fit" "tb.ee" "tb.en"
## [7] "tb.gin" "tb.nn"
rm(tb.en, tb.ee, tb.nn)
ls()
## [1] "datapath" "debug" "dic" "fit" "tb.gin"
Costanzo2016 suggested lenient, intermediate, and stringennt ways for gin quality check.
tb.gin.lenient = tb.gin[ tb.gin$P.value<=0.05, ];
if (debug ==0) { rm(tb.gin); } #freeup memory
Map strain IDs to ORFs, add my essentialFalgs
tb.gin.lenient$ORF1 = dic$Systematic.gene.name[match( tb.gin.lenient$Query.Strain.ID, dic$Strain.ID)]
tb.gin.lenient$ORF2 = dic$Systematic.gene.name[match( tb.gin.lenient$Array.Strain.ID, dic$Strain.ID)]
tb.gin.lenient$essenflag1 = fit$essenflag[ match(tb.gin.lenient$ORF1, fit$orf)]
tb.gin.lenient$essenflag2 = fit$essenflag[ match(tb.gin.lenient$ORF2, fit$orf)]
head(tb.gin.lenient)
## Query.Strain.ID Query.allele.name Array.Strain.ID Array.allele.name
## 1 YAL001C_tsq508 tfc3-g349e YBL023C_tsa111 mcm2-1
## 2 YAL001C_tsq508 tfc3-g349e YBL026W_tsa1065 lsm2-5001
## 7 YAL001C_tsq508 tfc3-g349e YBL034C_tsa950 stu1-7
## 13 YAL001C_tsq508 tfc3-g349e YBL076C_tsa275 ils1-1
## 16 YAL001C_tsq508 tfc3-g349e YBL097W_tsa510 brn1-9
## 25 YAL001C_tsq508 tfc3-g349e YBR029C_tsa1063 cds1-5001
## Arraytype.Temp Genetic.interaction.score..ε. P.value
## 1 TSA30 -0.0348 5.042e-03
## 2 TSA30 -0.3529 3.591e-06
## 7 TSA30 -0.1294 1.931e-02
## 13 TSA30 -0.0250 1.301e-04
## 16 TSA30 -0.0808 5.582e-15
## 25 TSA30 -0.1173 8.243e-05
## Query.single.mutant.fitness..SMF. Array.SMF Double.mutant.fitness
## 1 0.8285 0.9254 0.7319
## 2 0.8285 0.9408 0.4266
## 7 0.8285 0.6690 0.4249
## 13 0.8285 0.8097 0.6458
## 16 0.8285 0.5464 0.3719
## 25 0.8285 0.9007 0.6289
## Double.mutant.fitness.standard.deviation ORF1 ORF2 essenflag1
## 1 0.0102 YAL001C YBL023C nonessential
## 2 0.0790 YAL001C YBL026W nonessential
## 7 0.0482 YAL001C YBL034C nonessential
## 13 0.0054 YAL001C YBL076C nonessential
## 16 0.0077 YAL001C YBL097W nonessential
## 25 0.0226 YAL001C YBR029C nonessential
## essenflag2
## 1 essential
## 2 essential
## 7 abnormal
## 13 essential
## 16 essential
## 25 essential
tb.gin.intermediate = tb.gin.lenient[ abs(tb.gin.lenient$Genetic.interaction.score..ε.) >0.08, ];
tb.gin.stringent = tb.gin.lenient[ tb.gin.lenient$Genetic.interaction.score..ε.>0.16 | tb.gin.lenient$Genetic.interaction.score..ε.< -0.12, ]
hist(tb.gin.lenient$Genetic.interaction.score..ε., breaks = 100)
summary(tb.gin.lenient);
## Query.Strain.ID Query.allele.name Array.Strain.ID
## YJL029C_sn248 : 1364 vps53 : 1364 YIL048W_tsa188: 1368
## YIR033W_sn1943 : 1214 mga2 : 1214 YDL008W_tsa783: 1328
## YIL004C_tsq1171: 1208 bet1-1 : 1208 YDR172W_tsa28 : 1316
## YJR002W_tsq2069: 1124 mpp10-5001: 1124 YFL039C_tsa140: 1315
## YJR045C_tsq2790: 1045 ssc1-2 : 1045 YHR164C_tsa352: 1274
## (Other) :1563516 (Other) :1563516 (Other) :1562870
## NA's : 1 NA's : 1 NA's : 1
## Array.allele.name Arraytype.Temp Genetic.interaction.score..ε.
## neo1-2 : 1368 TSA26:523518 Min. :-1.16160
## apc11-13: 1328 TSA30: 5813 1st Qu.:-0.07190
## sup35-td: 1316 DMA26:203445 Median :-0.01780
## act1-125: 1315 DMA30:836695 Mean :-0.01934
## dna2-2 : 1274 : 0 3rd Qu.: 0.05520
## (Other) :1562870 NA's : 1 Max. : 1.33530
## NA's : 1 NA's :1
## P.value Query.single.mutant.fitness..SMF. Array.SMF
## Min. :0.0000000 Min. :0.11 Min. :0.1137
## 1st Qu.:0.0000455 1st Qu.:0.76 1st Qu.:0.8243
## Median :0.0048550 Median :0.92 Median :0.9525
## Mean :0.0125985 Mean :0.86 Mean :0.8981
## 3rd Qu.:0.0229400 3rd Qu.:1.00 3rd Qu.:1.0060
## Max. :0.0500000 Max. :1.14 Max. :1.1118
## NA's :1 NA's :165086 NA's :1
## Double.mutant.fitness Double.mutant.fitness.standard.deviation
## Min. :-0.2577 Min. :0.0000
## 1st Qu.: 0.6044 1st Qu.:0.0090
## Median : 0.8079 Median :0.0163
## Mean : 0.7654 Mean :0.0260
## 3rd Qu.: 0.9619 3rd Qu.:0.0301
## Max. : 2.1866 Max. :0.9147
## NA's :1 NA's :1
## ORF1 ORF2 essenflag1
## YFL039C : 8053 YFL039C : 14881 abnormal : 27510
## YFL034C-B: 3562 YFL034C-B: 7918 essential :493722
## YNL181W : 3529 YHR036W : 4162 nonessential:916926
## YBL105C : 3090 YNL061W : 3851 NA's :131314
## YBR088C : 2778 YJR076C : 3789
## (Other) :1548459 (Other) :1534870
## NA's : 1 NA's : 1
## essenflag2
## abnormal : 25160
## essential : 480824
## nonessential:1009399
## NA's : 54089
##
##
##
summary(tb.gin.intermediate);
## Query.Strain.ID Query.allele.name Array.Strain.ID
## YNL243W_sn760 : 711 sla2 : 711 YDL008W_tsa783: 1017
## YDR293C_sn729 : 692 ssd1 : 692 YLR078C_tsa199: 940
## YKL154W_tsq1163: 678 srp102-510: 678 YDR172W_tsa28 : 935
## YJR045C_tsq2790: 664 ssc1-2 : 664 YFL039C_tsa140: 883
## YLR275W_tsq2653: 656 smd2-5005 : 656 YER157W_tsa41 : 881
## (Other) :566233 (Other) :566233 (Other) :564978
## NA's : 1 NA's : 1 NA's : 1
## Array.allele.name Arraytype.Temp Genetic.interaction.score..ε.
## apc11-13: 1017 TSA26:256208 Min. :-1.16160
## bos1-1 : 940 TSA30: 3153 1st Qu.:-0.15690
## sup35-td: 935 DMA26: 81038 Median :-0.09740
## act1-125: 883 DMA30:229235 Mean :-0.05968
## cog3-1 : 881 : 0 3rd Qu.: 0.09720
## (Other) :564978 NA's : 1 Max. : 1.33530
## NA's : 1 NA's :1
## P.value Query.single.mutant.fitness..SMF. Array.SMF
## Min. :0.0000000 Min. :0.18 Min. :0.1137
## 1st Qu.:0.0000015 1st Qu.:0.76 1st Qu.:0.7400
## Median :0.0016875 Median :0.89 Median :0.8690
## Mean :0.0097757 Mean :0.85 Mean :0.8414
## 3rd Qu.:0.0160700 3rd Qu.:0.99 3rd Qu.:0.9745
## Max. :0.0500000 Max. :1.14 Max. :1.1118
## NA's :1 NA's :87094 NA's :1
## Double.mutant.fitness Double.mutant.fitness.standard.deviation
## Min. :-0.2577 Min. :0.00000
## 1st Qu.: 0.4933 1st Qu.:0.02280
## Median : 0.6851 Median :0.03680
## Mean : 0.6773 Mean :0.04859
## 3rd Qu.: 0.8650 3rd Qu.:0.05780
## Max. : 2.1866 Max. :0.91470
## NA's :1 NA's :1
## ORF1 ORF2 essenflag1
## YFL039C : 4584 YFL039C : 8373 abnormal : 13170
## YNL181W : 2435 YFL034C-B: 4706 essential :221747
## YFL034C-B: 2028 YJR076C : 2437 nonessential:290106
## YBL105C : 1752 YDR182W : 2224 NA's : 44612
## YEL034W : 1522 YNL061W : 1928
## (Other) :557313 (Other) :549966
## NA's : 1 NA's : 1
## essenflag2
## abnormal : 12310
## essential :234519
## nonessential:303111
## NA's : 19695
##
##
##
summary(tb.gin.stringent)
## Query.Strain.ID Query.allele.name Array.Strain.ID
## YNL243W_sn760 : 574 sla2 : 574 YDL008W_tsa783: 649
## YDR293C_sn729 : 460 ssd1 : 460 YLR268W_tsa121: 629
## YBR049C_tsq1348: 448 reb1-5001 : 448 YFL039C_tsa140: 518
## YLR275W_tsq2653: 425 smd2-5005 : 425 YER157W_tsa41 : 512
## YEL034W_tsq737 : 377 mob2-11-supp1: 380 YJR076C_tsa84 : 494
## (Other) :250780 (Other) :250777 (Other) :250262
## NA's : 1 NA's : 1 NA's : 1
## Array.allele.name Arraytype.Temp Genetic.interaction.score..ε.
## apc11-13: 649 TSA26:124897 Min. :-1.1616
## sec22-3 : 629 TSA30: 1638 1st Qu.:-0.2354
## act1-125: 518 DMA26: 35986 Median :-0.1670
## cog3-1 : 512 DMA30: 90543 Mean :-0.1610
## cdc11-5 : 494 : 0 3rd Qu.:-0.1322
## (Other) :250262 NA's : 1 Max. : 1.3353
## NA's : 1 NA's :1
## P.value Query.single.mutant.fitness..SMF. Array.SMF
## Min. :0.0000000 Min. :0.18 Min. :0.1137
## 1st Qu.:0.0000000 1st Qu.:0.76 1st Qu.:0.7235
## Median :0.0003424 Median :0.89 Median :0.8465
## Mean :0.0072841 Mean :0.86 Mean :0.8220
## 3rd Qu.:0.0092682 3rd Qu.:0.98 3rd Qu.:0.9510
## Max. :0.0500000 Max. :1.14 Max. :1.1118
## NA's :1 NA's :43715 NA's :1
## Double.mutant.fitness Double.mutant.fitness.standard.deviation
## Min. :-0.2577 Min. :0.0000
## 1st Qu.: 0.3829 1st Qu.:0.0310
## Median : 0.5491 Median :0.0536
## Mean : 0.5619 Mean :0.0686
## 3rd Qu.: 0.7233 3rd Qu.:0.0857
## Max. : 2.1866 Max. :0.9147
## NA's :1 NA's :1
## ORF1 ORF2 essenflag1
## YFL039C : 2200 YFL039C : 4416 abnormal : 6379
## YNL181W : 1350 YFL034C-B: 2238 essential :104477
## YFL034C-B: 1068 YJR076C : 1381 nonessential:122575
## YEL034W : 897 YDR182W : 1215 NA's : 19634
## YBL105C : 818 YLR268W : 950
## (Other) :246731 (Other) :242864
## NA's : 1 NA's : 1
## essenflag2
## abnormal : 6140
## essential :113948
## nonessential:124214
## NA's : 8763
##
##
##
No comments:
Post a Comment