example.py

#! /usr/bin/env python
# Dom Bennett
# 22/01/2015
# TODO: add tutorial on advanced TaxDict usage
'''
Example script for using taxon_names_resolver
'''
# this is for forward compatibility with python 3
from __future__ import absolute_import
from __future__ import print_function

# SETUP LOGGING (OPTIONAL)
import logging
logger = logging.getLogger('')
logger.setLevel(logging.DEBUG)
console = logging.StreamHandler()
console.setFormatter(logging.Formatter('%(message)s'))
logger.addHandler(console)

# PACKAGES
from taxon_names_resolver import Resolver
from taxon_names_resolver import TaxDict
from taxon_names_resolver import taxTree

# EXAMPLE NAMES
terms = ['Homo sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta',
         'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens',
         'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus']

# RESOLVE
# pass the terms, the datasource and the logger (optional)
resolver = Resolver(terms=terms, datasource="NCBI", logger=logger)
resolver.main()  # resolve!

# CREATE TAXDICT
# extract the unique names for each term ('idents', query_name is best as it is
#  guaranteed to be unique)
idents = resolver.retrieve('query_name')
# extract the lists of names for all known parental taxonomic groups for each
#  term ('lineages', e.g. Homo, Primate, Mammalia)
lineages = resolver.retrieve('classification_path')
# for Taxonomic IDs instead of names, use:
#  lineages = resolver.retrieve('classification_path_ids')
# extract the lists of corresponding rank names for 'lineages' ('ranks', e.g.
#  species, genus etc.) for each entity
ranks = resolver.retrieve('classification_path_ranks')
# optional extra data slots are also possible, for example a list of 1s and 0s
# it could be anything, just as long as its in the same order
extra = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
# create a taxonomy specifying the names and order of 'ranks'. N.B. this is the
#  default and is based on NCBI's taxonomy.
taxonomy = ['subspecies', 'species', 'subgenus', 'genus', 'tribe', 'subfamily',
            'family', 'superfamily', 'parvorder', 'infraorder', 'suborder',
            'order', 'superorder', 'parvclass', 'infraclass', 'subclass',
            'class', 'superclass', 'subphylum', 'phylum', 'kingdom',
            'superkingdom']
# use 'idents', 'ranks', 'lineages' and 'taxonomy' (optional) to construct a
#  TaxDict
taxdict = TaxDict(idents=idents, ranks=ranks, lineages=lineages,
                  taxonomy=taxonomy, extra=extra)

# EXPLORE TAXDICT
# a dictionary for each ident with: 'lineage', 'taxref', 'ident', 'cident' and
#  'rank' (+ 'extra')
# the lineage
taxdict['Homo sapiens']['lineage']  # N.B. not all lineages are named, ''
# the ident is the same format as lineage e.g. it could be an ID
taxdict['Homo sapiens']['ident']
# the 'cident' (Contextual Ident), the highest named taxonomic group unique to
#  this ident among all other idents
taxdict['Arabidopsis thaliana']['cident']  # A. thaliana is the only plant
# the 'taxref', a holder of 'ident' and taxonomic posistion. Requires printing
#  e.g. C. tytonis could only resolved to the genus level (22/01/2015)
print(taxdict['Chlorotalpa tytonis']['taxref'])
# check the taxonomy
print(taxdict.taxonomy)
# check the hierarchy, a dictionary of taxrefs ranked and grouped in the form:
#  {'rank':[([taxref1, taxref2, ....],'lineage1'),
#           ([taxref3, taxref4, ....],'lineage2'), ....]}
print(taxdict.hierarchy)
# we've also added an extra data slot
taxdict['Homo sapiens']['extra']

# CREATE TREE
# use the taxdict to create a Newick string
treestring = taxTree(taxdict)

# SAVE TREE
with open('example.tre', 'w') as file:
    file.write(treestring)