Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor changes to make compatible with python3 install #129

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 66 additions & 59 deletions poretools/Fast5File.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
### and must be converted to seconds by dividing by sample frequency.

# poretools imports
import formats
from Event import Event
from . import formats
from . import Event

fastq_paths = {
'closed' : {},
Expand Down Expand Up @@ -77,6 +77,8 @@ def next(self):
else:
raise StopIteration()

__next__ = next


class Fast5FileSet(object):

Expand All @@ -103,13 +105,16 @@ def __iter__(self):

def next(self):
try:
return Fast5File(self.files.next(), self.group)
nextFile = next(self.files)
return Fast5File(nextFile, self.group)
except Exception as e:
# cleanup our mess
if self.set_type == FAST5SET_TARBALL:
shutil.rmtree(PORETOOLS_TMPDIR)
raise StopIteration

__next__ = next

def _extract_fast5_files(self):

# return as-is if list of files
Expand Down Expand Up @@ -171,14 +176,16 @@ def __iter__(self):

def next(self):
while True:
tarinfo = self._tarfile.next()
tarinfo = next(self._tarfile)
if tarinfo is None:
raise StopIteration
elif self._fast5_filename_filter(tarinfo.name):
break
self._tarfile.extract(tarinfo, path=PORETOOLS_TMPDIR)
return os.path.join(PORETOOLS_TMPDIR, tarinfo.name)

__next__ = next

def __len__(self):
with tarfile.open(self._tarball) as tar:
return len(tar.getnames())
Expand Down Expand Up @@ -224,7 +231,7 @@ def open(self):
try:
self.hdf5file = h5py.File(self.filename, 'r')
return True
except Exception, e:
except Exception as e:
logger.warning("Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
return False

Expand All @@ -245,11 +252,11 @@ def guess_version(self):
pass

# less likely
try:
self.hdf5file["/Analyses/Basecall_RNN_1D_%03d/BaseCalled_template" % (self.group)]
return 'r9rnn'
except KeyError:
pass
try:
self.hdf5file["/Analyses/Basecall_RNN_1D_%03d/BaseCalled_template" % (self.group)]
return 'r9rnn'
except KeyError:
pass

return 'prebasecalled'

Expand Down Expand Up @@ -333,13 +340,13 @@ def get_fastas(self, choice):
return fas

def get_fastas_dict(self):
"""
Return the set of base called sequences in the FAST5
in FASTQ format.
"""
Return the set of base called sequences in the FAST5
in FASTQ format.
"""
if self.have_fastas is False:
self._extract_fastas_from_fast5()
self.have_fastas = True
if self.have_fastas is False:
self._extract_fastas_from_fast5()
self.have_fastas = True

return self.fastas

Expand Down Expand Up @@ -430,7 +437,7 @@ def get_exp_start_time(self):
# Unix time stamp from MinKNOW < 1.4
timestamp = int(self.keyinfo['tracking_id'].attrs['exp_start_time'])
return timestamp
except KeyError, e:
except KeyError as e:
return None

def get_channel_number(self):
Expand Down Expand Up @@ -486,7 +493,7 @@ def hdf_internal_error(self,reason):
https://github.com/arq5x/poretools/issues""" % (self.filename, reason)
sys.exit(msg)

def find_read_number_block_fixed_raw(self):
def find_read_number_block_fixed_raw(self):
"""
New-style FAST5/HDF5 structure:
There is a fixed 'Raw/Reads' node with only one 'read_NNN' item
Expand All @@ -510,7 +517,7 @@ def find_read_number_block_fixed_raw(self):
self.hdf_internal_error("Failed to get HDF5 item '%s'"% (path))
return node

def find_read_number_block(self):
def find_read_number_block(self):
"""Returns the node of the 'Read_NNN' information, or None if not
found"""
node = self.find_read_number_block_link()
Expand Down Expand Up @@ -565,7 +572,7 @@ def get_duration(self):
if node:
try:
return int(node.attrs['duration']) / self.get_sample_frequency()
except Exception, e:
except Exception as e:
logger.error(str(e))
pass

Expand All @@ -589,7 +596,7 @@ def get_start_time(self):
try:
frequency = int(self.get_sample_frequency())
return int(exp_start_time) + int(node.attrs['start_time'] / frequency)
except Exception, e:
except Exception as e:
logger.error(str(e))
pass

Expand Down Expand Up @@ -735,31 +742,31 @@ def get_asic_id(self):
self._get_metadata()
self.have_metadata = True

def get_host_name(self):
"""
Return the MinKNOW host computer name.
"""
if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True
def get_host_name(self):
"""
Return the MinKNOW host computer name.
"""
if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True

try:
return self.keyinfo['tracking_id'].attrs['hostname']
except:
return None
try:
return self.keyinfo['tracking_id'].attrs['hostname']
except:
return None

if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True
if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True

def get_device_id(self):
"""
Return the flowcell's device id.
"""

if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True
if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True

try:
return self.keyinfo['tracking_id'].attrs['device_id']
Expand All @@ -771,27 +778,27 @@ def get_sample_name(self):
Return the user supplied sample name
"""

if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True
if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True

try:
return self.keyinfo['context_tags'].attrs['user_filename_input']
except Exception, e:
except Exception as e:
return None

def get_sample_frequency(self):
"""
Return the user supplied sample name
"""

if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True
if self.have_metadata is False:
self._get_metadata()
self.have_metadata = True

try:
return int(self.keyinfo['context_tags'].attrs['sample_frequency'])
except Exception, e:
except Exception as e:
return None

def get_script_name(self):
Expand All @@ -800,7 +807,7 @@ def get_script_name(self):
self.have_metdata = True
try:
return self.keyinfo['tracking_id'].attrs['exp_script_name']
except Exception, e:
except Exception as e:
return None

def get_template_events_count(self):
Expand All @@ -810,7 +817,7 @@ def get_template_events_count(self):
try:
table = self.hdf5file[fastq_paths[self.version]['template'] % self.group]
return len(table['Events'][()])
except Exception, e:
except Exception as e:
return 0

def get_complement_events_count(self):
Expand All @@ -820,7 +827,7 @@ def get_complement_events_count(self):
try:
table = self.hdf5file[fastq_paths[self.version]['complement'] % self.group]
return len(table['Events'][()])
except Exception, e:
except Exception as e:
return 0

def is_high_quality(self):
Expand Down Expand Up @@ -851,7 +858,7 @@ def get_best_type(self):
return 'template'
else:
return 'complement'
except Exception, e:
except Exception as e:
return None

####################################################################
Expand All @@ -862,26 +869,26 @@ def _extract_fastqs_from_fast5(self):
"""
Return the sequence in the FAST5 file in FASTQ format
"""
for id, h5path in fastq_paths[self.version].iteritems():
for (id, h5path) in fastq_paths[self.version].items():
try:
table = self.hdf5file[h5path % self.group]
fq = formats.Fastq(table['Fastq'][()])
fq.name += " " + self.filename
self.fastqs[id] = fq
except Exception, e:
except Exception as e:
pass

def _extract_fastas_from_fast5(self):
"""
Return the sequence in the FAST5 file in FASTA format
"""
for id, h5path in fastq_paths[self.version].iteritems():
for (id, h5path) in fastq_paths[self.version].items():
try:
table = self.hdf5file[h5path % self.group]
fa = formats.Fasta(table['Fastq'][()])
fa.name += " " + self.filename
self.fastas[id] = fa
except Exception, e:
except Exception as e:
pass

def _extract_template_events(self):
Expand All @@ -891,7 +898,7 @@ def _extract_template_events(self):
try:
table = self.hdf5file[fastq_paths[self.version]['template'] % self.group]
self.template_events = [Event(x) for x in table['Events'][()]]
except Exception, e:
except Exception as e:
self.template_events = []

def _extract_complement_events(self):
Expand All @@ -901,7 +908,7 @@ def _extract_complement_events(self):
try:
table = self.hdf5file[fastq_paths[self.version]['complement'] % self.group]
self.complement_events = [Event(x) for x in table['Events'][()]]
except Exception, e:
except Exception as e:
self.complement_events = []

def _extract_pre_basecalled_events(self):
Expand All @@ -914,15 +921,15 @@ def _extract_pre_basecalled_events(self):
for read in table:
events.extend(table[read]["Events"][()])
self.pre_basecalled_events = [Event(x) for x in events]
# except Exception, e:
# except Exception as e:
# self.pre_basecalled_events = []

def _get_metadata(self):
try:
self.keyinfo = self.hdf5file['/UniqueGlobalKey']
except Exception, e:
except Exception as e:
try:
self.keyinfo = self.hdf5file['/Key']
except Exception, e:
except Exception as e:
self.keyinfo = None
logger.warning("Cannot find keyinfo. Exiting.\n")
6 changes: 3 additions & 3 deletions poretools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import sys
import scripts
from Fast5File import *
from version import __version__
import poretools.scripts
from . import Fast5File
from . import version
2 changes: 1 addition & 1 deletion poretools/combine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import tarfile
import sys
import Fast5File
from . import Fast5File

#logging
import logging
Expand Down
10 changes: 5 additions & 5 deletions poretools/events.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import Fast5File
from . import Fast5File

def run(parser, args):

Expand All @@ -7,18 +7,18 @@ def run(parser, args):
'length', 'model_state', 'model_level', 'move', \
'p_model_state', 'mp_model_state', 'p_mp_model_state', \
'p_A', 'p_C', 'p_G', 'p_T', 'raw_index']
print "\t".join(keys)
print("\t".join(keys))

if args.pre_basecalled:
for fast5 in Fast5File.Fast5FileSet(args.files):
for event in fast5.get_pre_basecalled_events():
print '\t'.join([fast5.filename, 'pre_basecalled', str(event)])
print('\t'.join([fast5.filename, 'pre_basecalled', str(event)]))
else:
for fast5 in Fast5File.Fast5FileSet(args.files):
for event in fast5.get_template_events():
print '\t'.join([fast5.filename, 'template', str(event)])
print('\t'.join([fast5.filename, 'template', str(event)]))
for event in fast5.get_complement_events():
print '\t'.join([fast5.filename, 'complement', str(event)])
print('\t'.join([fast5.filename, 'complement', str(event)]))

fast5.close()

4 changes: 2 additions & 2 deletions poretools/fasta.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import Fast5File
from . import Fast5File
import sys

def run(parser, args):
Expand Down Expand Up @@ -42,7 +42,7 @@ def run(parser, args):
args.max_length > 0):
continue

print fa
print(fa)

fast5.close()

Loading