From 7037e8541b9cb09dd2b4ff5d9dcb6aeb5e90b0fc Mon Sep 17 00:00:00 2001 From: Chris Rosenthal Date: Fri, 17 Nov 2023 10:14:33 -0800 Subject: [PATCH] Removing references to fastalite that are not reading or writing fasta files --- taxtastic/subcommands/add_nodes.py | 3 +-- taxtastic/subcommands/extract_nodes.py | 3 +-- taxtastic/subcommands/named.py | 9 +++---- taxtastic/subcommands/update_taxids.py | 10 +++---- taxtastic/utils.py | 36 ++++++++++++++++++++++++++ 5 files changed, 47 insertions(+), 14 deletions(-) diff --git a/taxtastic/subcommands/add_nodes.py b/taxtastic/subcommands/add_nodes.py index 589aba6..89fc738 100644 --- a/taxtastic/subcommands/add_nodes.py +++ b/taxtastic/subcommands/add_nodes.py @@ -27,10 +27,9 @@ import traceback import yaml -from fastalite import Opener from taxtastic.taxonomy import Taxonomy -from taxtastic.utils import add_database_args +from taxtastic.utils import add_database_args, Opener log = logging.getLogger(__name__) diff --git a/taxtastic/subcommands/extract_nodes.py b/taxtastic/subcommands/extract_nodes.py index a5efa55..f79656a 100644 --- a/taxtastic/subcommands/extract_nodes.py +++ b/taxtastic/subcommands/extract_nodes.py @@ -24,10 +24,9 @@ # from collections import OrderedDict import yaml -from fastalite import Opener from taxtastic.taxonomy import Taxonomy -from taxtastic.utils import add_database_args +from taxtastic.utils import add_database_args, Opener log = logging.getLogger(__name__) diff --git a/taxtastic/subcommands/named.py b/taxtastic/subcommands/named.py index c69e2ee..4214b07 100644 --- a/taxtastic/subcommands/named.py +++ b/taxtastic/subcommands/named.py @@ -15,11 +15,10 @@ """ Filters unclassified, unnamed taxonomy ids """ -import argparse import csv import sqlalchemy import sys -from taxtastic.utils import add_database_args +from taxtastic.utils import add_database_args, Opener from taxtastic.taxonomy import Taxonomy @@ -33,12 +32,12 @@ def build_parser(parser): input_group.add_argument( '-f', '--tax-id-file', metavar='FILE', - type=argparse.FileType('rt'), + type=Opener('rt'), help=('File containing a whitespace-delimited list of ' 'tax_ids (ie, separated by tabs, spaces, or newlines.')) input_group.add_argument( '-i', '--seq-info', - type=argparse.FileType('rt'), + type=Opener('rt'), help=('Read tax_ids from sequence info file, minimally ' 'containing a column named "tax_id"')) parser.add_argument( @@ -47,7 +46,7 @@ def build_parser(parser): help='Ignore "no rank" taxonomies [%(default)s]') parser.add_argument( '-o', '--outfile', - type=argparse.FileType('wt'), + type=Opener('wt'), default=sys.stdout, metavar='FILE', help=('Output file containing named taxonomy ids;' diff --git a/taxtastic/subcommands/update_taxids.py b/taxtastic/subcommands/update_taxids.py index 081a341..8464b95 100644 --- a/taxtastic/subcommands/update_taxids.py +++ b/taxtastic/subcommands/update_taxids.py @@ -26,8 +26,6 @@ import sqlalchemy as sa -from fastalite import Opener - import taxtastic from taxtastic.taxonomy import Taxonomy @@ -36,12 +34,14 @@ def build_parser(parser): parser.add_argument( - 'infile', type=Opener('r'), + 'infile', type=taxtastic.utils.Opener('r'), help=('Input CSV file to process, minimally containing the field ' '`tax_id`. Use "-" for stdin.')) parser = taxtastic.utils.add_database_args(parser) parser.add_argument( - '-o', '--outfile', default=sys.stdout, type=Opener('wt'), + '-o', '--outfile', + default=sys.stdout, + type=taxtastic.utils.Opener('wt'), help='Modified version of input file [default: stdout]') input_format = parser.add_mutually_exclusive_group(required=False) input_format.add_argument( @@ -52,7 +52,7 @@ def build_parser(parser): help='Infile is a headerless text file ' 'of tax_ids separated by newlines. [%(default)s]') parser.add_argument( - '--unknowns', type=Opener('wt'), + '--unknowns', type=taxtastic.utils.Opener('wt'), help=('optional output file containing rows with unknown tax_ids ' 'having no replacements in merged table')) parser.add_argument( diff --git a/taxtastic/utils.py b/taxtastic/utils.py index 4bdf71c..4483452 100644 --- a/taxtastic/utils.py +++ b/taxtastic/utils.py @@ -12,8 +12,10 @@ # # You should have received a copy of the GNU General Public License # along with taxtastic. If not, see . +import bz2 import csv import errno +import gzip import logging import os import re @@ -21,12 +23,46 @@ import string import random import configparser +import sys from collections import OrderedDict log = logging +class Opener(object): + """Factory for creating file objects. Transparenty opens compressed + files for reading or writing based on suffix (.gz and .bz2 only). + + Example:: + + with Opener()('in.txt') as infile, Opener('w')('out.gz') as outfile: + outfile.write(infile.read()) + """ + + def __init__(self, mode='r', *args, **kwargs): + self.mode = mode + self.args = args + self.kwargs = kwargs + self.writable = 'w' in self.mode + + def __call__(self, obj): + if obj is sys.stdout or obj is sys.stdin: + return obj + elif obj == '-': + return sys.stdout if self.writable else sys.stdin + else: + openers = {'bz2': bz2.open, 'gz': gzip.open} + __, suffix = obj.rsplit('.', 1) + # in python3, both bz2 and gz libraries default to binary input and output + mode = self.mode + if sys.version_info.major == 3 and suffix in openers \ + and mode in {'w', 'r'}: + mode += 't' + opener = openers.get(suffix, open) + return opener(obj, mode=mode, *self.args, **self.kwargs) + + def get_new_nodes(fname): """ Return an iterator of dicts given a .csv-format file.