Skip to content

Commit

Permalink
Removing references to fastalite that are not reading or writing fast…
Browse files Browse the repository at this point in the history
…a files
  • Loading branch information
crosenth committed Nov 17, 2023
1 parent 24727db commit 7037e85
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 14 deletions.
3 changes: 1 addition & 2 deletions taxtastic/subcommands/add_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,9 @@
import traceback

import yaml
from fastalite import Opener

from taxtastic.taxonomy import Taxonomy
from taxtastic.utils import add_database_args
from taxtastic.utils import add_database_args, Opener

log = logging.getLogger(__name__)

Expand Down
3 changes: 1 addition & 2 deletions taxtastic/subcommands/extract_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@
# from collections import OrderedDict

import yaml
from fastalite import Opener

from taxtastic.taxonomy import Taxonomy
from taxtastic.utils import add_database_args
from taxtastic.utils import add_database_args, Opener

log = logging.getLogger(__name__)

Expand Down
9 changes: 4 additions & 5 deletions taxtastic/subcommands/named.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@
"""
Filters unclassified, unnamed taxonomy ids
"""
import argparse
import csv
import sqlalchemy
import sys
from taxtastic.utils import add_database_args
from taxtastic.utils import add_database_args, Opener
from taxtastic.taxonomy import Taxonomy


Expand All @@ -33,12 +32,12 @@ def build_parser(parser):
input_group.add_argument(
'-f', '--tax-id-file',
metavar='FILE',
type=argparse.FileType('rt'),
type=Opener('rt'),
help=('File containing a whitespace-delimited list of '
'tax_ids (ie, separated by tabs, spaces, or newlines.'))
input_group.add_argument(
'-i', '--seq-info',
type=argparse.FileType('rt'),
type=Opener('rt'),
help=('Read tax_ids from sequence info file, minimally '
'containing a column named "tax_id"'))
parser.add_argument(
Expand All @@ -47,7 +46,7 @@ def build_parser(parser):
help='Ignore "no rank" taxonomies [%(default)s]')
parser.add_argument(
'-o', '--outfile',
type=argparse.FileType('wt'),
type=Opener('wt'),
default=sys.stdout,
metavar='FILE',
help=('Output file containing named taxonomy ids;'
Expand Down
10 changes: 5 additions & 5 deletions taxtastic/subcommands/update_taxids.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@

import sqlalchemy as sa

from fastalite import Opener

import taxtastic
from taxtastic.taxonomy import Taxonomy

Expand All @@ -36,12 +34,14 @@

def build_parser(parser):
parser.add_argument(
'infile', type=Opener('r'),
'infile', type=taxtastic.utils.Opener('r'),
help=('Input CSV file to process, minimally containing the field '
'`tax_id`. Use "-" for stdin.'))
parser = taxtastic.utils.add_database_args(parser)
parser.add_argument(
'-o', '--outfile', default=sys.stdout, type=Opener('wt'),
'-o', '--outfile',
default=sys.stdout,
type=taxtastic.utils.Opener('wt'),
help='Modified version of input file [default: stdout]')
input_format = parser.add_mutually_exclusive_group(required=False)
input_format.add_argument(
Expand All @@ -52,7 +52,7 @@ def build_parser(parser):
help='Infile is a headerless text file '
'of tax_ids separated by newlines. [%(default)s]')
parser.add_argument(
'--unknowns', type=Opener('wt'),
'--unknowns', type=taxtastic.utils.Opener('wt'),
help=('optional output file containing rows with unknown tax_ids '
'having no replacements in merged table'))
parser.add_argument(
Expand Down
36 changes: 36 additions & 0 deletions taxtastic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,57 @@
#
# You should have received a copy of the GNU General Public License
# along with taxtastic. If not, see <http://www.gnu.org/licenses/>.
import bz2
import csv
import errno
import gzip
import logging
import os
import re
import subprocess
import string
import random
import configparser
import sys
from collections import OrderedDict


log = logging


class Opener(object):
"""Factory for creating file objects. Transparenty opens compressed
files for reading or writing based on suffix (.gz and .bz2 only).
Example::
with Opener()('in.txt') as infile, Opener('w')('out.gz') as outfile:
outfile.write(infile.read())
"""

def __init__(self, mode='r', *args, **kwargs):
self.mode = mode
self.args = args
self.kwargs = kwargs
self.writable = 'w' in self.mode

def __call__(self, obj):
if obj is sys.stdout or obj is sys.stdin:
return obj
elif obj == '-':
return sys.stdout if self.writable else sys.stdin
else:
openers = {'bz2': bz2.open, 'gz': gzip.open}
__, suffix = obj.rsplit('.', 1)
# in python3, both bz2 and gz libraries default to binary input and output
mode = self.mode
if sys.version_info.major == 3 and suffix in openers \
and mode in {'w', 'r'}:
mode += 't'
opener = openers.get(suffix, open)
return opener(obj, mode=mode, *self.args, **self.kwargs)


def get_new_nodes(fname):
"""
Return an iterator of dicts given a .csv-format file.
Expand Down

0 comments on commit 7037e85

Please sign in to comment.