import warnings
import re
from Bio import Phylo
stree = "((Bha,Bcl), (Bpu,(Bli,(Bam,(Bmo,Bsu)))), (Bwe,(Bce,(Ban,Bth))))"
stree2 = re.sub( r'[\(\)\s+]', '', stree)
taxa = re.split( ',', stree2)
outgrouptaxa = taxa[0:2]
bsutaxa = taxa[2:7]
bcetaxa = taxa[7:12]
bacillustaxa = bsutaxa + bcetaxa
#tmpfl = "/tmp/_tmpfile2013Jan2.nwk"
#tmpFH = open( tmpfl, 'w')
#tmpFH.write( stree + ";\n" )
#tmpFH.close()
#mastertree = Phylo.read(tmpfl, 'newick')
def lookup_by_names(tree):
names = {}
for clade in tree.get_terminals():
if clade.name:
if clade.name in names:
raise ValueError("Duplicate key: %s" % clade.name)
names[clade.name] = clade
return names
from StringIO import StringIO
#mastertree = Phylo.read(StringIO(stree), 'newick')
#names = lookup_by_names(mastertree)
#single leaf-node deletion
for node in bacillustaxa:
tree = Phylo.read(StringIO(stree), 'newick')
names = lookup_by_names(tree)
tree.prune(names[node])
Phylo.write( tree, 'sandbox/-' + node + '.nwk', 'newick')
#double leaf node deletion
for i in range(len(bacillustaxa)):
for j in range( (i+1), len(bacillustaxa)):
print i, j
tree = Phylo.read(StringIO(stree), 'newick')
names = lookup_by_names(tree)
tree.prune(names[bacillustaxa[i]])
tree.prune(names[bacillustaxa[j]])
Phylo.write( tree, 'sandbox/_' + bacillustaxa[i] + bacillustaxa[j]+'.nwk', 'newick')
$ pwd
~coat.protein/ortholog.analysis/coatpaml/python/
This is not bad for a one-hour coding on a first try. I should try to write multiple trees in a single file. I should try a generic loop or more likely, a recursive call.
No comments:
Post a Comment