# Do reciprocal best hits to match DIP IDs to SGD ORFs.
ace:sandbox hongqin$ pwd
/Users/hongqin/DIP/sandbox
$ makeblastdb -in s288c-prot.faa -dbtype prot
Building a new DB, current time: 12/19/2013 20:30:31
New DB name: s288c-prot.faa
New DB title: s288c-prot.faa
Sequence type: Protein
Keep Linkouts: T
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 6718 sequences in 0.428148 seconds.
$ makeblastdb -in fasta20131201.seq -dbtype prot
#This give out many warnings for empty entries.
#Runs from 20:50 to 21:48, for 1 hour, on a snowleopard laptop
$ blastp -db s288c-prot.faa -query fasta20131201.seq -outfmt 6 -out _out_DIP2SGD.txt -evalue 1e-10 &
$ blastp -db fasta20131201.seq -query s288c-prot.faa -outfmt 6 -out _out_SGD2DIP.txt -evalue 1e-10 &
$ps
PID TTY TIME CMD
879 ttys000 0:00.04 -bash
906 ttys000 3:50.79 blastp -db s288c-prot.faa -query fasta20131201.seq -outfmt 6 -out _out_DIP2SGD.txt -evalue 1e
919 ttys000 0:02.18 blastp -db fasta20131201.seq -query s288c-prot.faa -outfmt 6 -out _out_SGD2DIP.txt -evalue 1e
713 ttys001 0:00.18 -bash
$ simple_reciprocal_best_hits.01.pl -i1 _out_SGD2DIP.txt -i2 _out_DIP2SGD.txt -o _matchedSGD2DIP_IDs.txt
$ wc -l _matchedSGD2DIP_IDs.txt
5160 _matchedSGD2DIP_IDs.txt
*** BLASTP Formatting options
-outfmt <String>
alignment view options:
0 = pairwise,
1 = query-anchored showing identities,
2 = query-anchored no identities,
3 = flat query-anchored, show identities,
4 = flat query-anchored, no identities,
5 = XML Blast output,
6 = tabular,
7 = tabular with comment lines,
8 = Text ASN.1,
9 = Binary ASN.1,
10 = Comma-separated values,
11 = BLAST archive format (ASN.1)
#######################
ace:blast.fasta.demo hongqin$ cat commands.txt
blastp -subject db.faa -query query.faa
makeblastdb -in db2.faa
blastp -query my.seq.faa -db db2.faa | less
blastp -query my.seq.faa -db db2.faa -outfmt 6
blastp -query my.seq.faa -db db2.faa -outfmt 7 | less
#old version, still work on osX
blastall -p blastp -d db.faa -i query.faa -F F | less
No comments:
Post a Comment