Thursday, December 19, 2013

Reciprocal blast runs for DIP and Sce ID match

# Do reciprocal best hits to match DIP IDs to SGD ORFs.

ace:sandbox hongqin$ pwd
/Users/hongqin/DIP/sandbox


$ makeblastdb -in s288c-prot.faa -dbtype prot 
Building a new DB, current time: 12/19/2013 20:30:31
New DB name:   s288c-prot.faa
New DB title:  s288c-prot.faa
Sequence type: Protein
Keep Linkouts: T
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 6718 sequences in 0.428148 seconds.
 

$ makeblastdb -in fasta20131201.seq -dbtype prot 
#This give out many warnings for empty entries.

#Runs from 20:50 to 21:48, for 1 hour, on a snowleopard laptop
$ blastp -db s288c-prot.faa -query fasta20131201.seq -outfmt 6 -out _out_DIP2SGD.txt -evalue 1e-10 &
$ blastp -db fasta20131201.seq -query s288c-prot.faa -outfmt 6 -out _out_SGD2DIP.txt -evalue 1e-10 &

$ps
  PID TTY           TIME CMD
  879 ttys000    0:00.04 -bash
  906 ttys000    3:50.79 blastp -db s288c-prot.faa -query fasta20131201.seq -outfmt 6 -out _out_DIP2SGD.txt -evalue 1e
  919 ttys000    0:02.18 blastp -db fasta20131201.seq -query s288c-prot.faa -outfmt 6 -out _out_SGD2DIP.txt -evalue 1e
  713 ttys001    0:00.18 -bash



$ simple_reciprocal_best_hits.01.pl -i1 _out_SGD2DIP.txt -i2 _out_DIP2SGD.txt -o _matchedSGD2DIP_IDs.txt

$ wc -l _matchedSGD2DIP_IDs.txt
    5160 _matchedSGD2DIP_IDs.txt



*** BLASTP Formatting options
 -outfmt <String>
   alignment view options:
     0 = pairwise,
     1 = query-anchored showing identities,
     2 = query-anchored no identities,
     3 = flat query-anchored, show identities,
     4 = flat query-anchored, no identities,
     5 = XML Blast output,
     6 = tabular,
     7 = tabular with comment lines,
     8 = Text ASN.1,
     9 = Binary ASN.1,
    10 = Comma-separated values,
    11 = BLAST archive format (ASN.1)
 

 
#######################
ace:blast.fasta.demo hongqin$ cat commands.txt
blastp -subject db.faa -query query.faa

makeblastdb -in db2.faa
blastp -query my.seq.faa -db db2.faa | less

blastp -query my.seq.faa -db db2.faa -outfmt 6
blastp -query my.seq.faa -db db2.faa -outfmt 7 | less

#old version, still work on osX
blastall -p blastp -d db.faa -i query.faa -F F | less



No comments:

Post a Comment