From a7951c255ae804a2ca61dd23eee171473e7d8e2b Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Thu, 3 Aug 2023 10:53:37 +0200 Subject: [PATCH] Port `fileobj` implementation to C to enable conditional compilation --- include/libhmmer/p7_hmmfile.pxd | 12 +- pyhmmer/easel.pyx | 23 ++-- pyhmmer/fileobj/__init__.pxd | 0 pyhmmer/fileobj/bsd.h | 188 ++++++++++++++++++++++++++++++++ pyhmmer/fileobj/bsd.pxd | 4 + pyhmmer/fileobj/bsd.pxi | 121 -------------------- pyhmmer/fileobj/linux.h | 183 +++++++++++++++++++++++++++++++ pyhmmer/fileobj/linux.pxd | 4 + pyhmmer/fileobj/linux.pxi | 124 --------------------- pyhmmer/fileobj/util.h | 23 ++++ pyhmmer/plan7.pyx | 21 ++-- setup.py | 3 + 12 files changed, 432 insertions(+), 274 deletions(-) create mode 100644 pyhmmer/fileobj/__init__.pxd create mode 100644 pyhmmer/fileobj/bsd.h create mode 100644 pyhmmer/fileobj/bsd.pxd delete mode 100644 pyhmmer/fileobj/bsd.pxi create mode 100644 pyhmmer/fileobj/linux.h create mode 100644 pyhmmer/fileobj/linux.pxd delete mode 100644 pyhmmer/fileobj/linux.pxi create mode 100644 pyhmmer/fileobj/util.h diff --git a/include/libhmmer/p7_hmmfile.pxd b/include/libhmmer/p7_hmmfile.pxd index 52b886c5..3c2e8080 100644 --- a/include/libhmmer/p7_hmmfile.pxd +++ b/include/libhmmer/p7_hmmfile.pxd @@ -45,12 +45,12 @@ cdef extern from "hmmer.h" nogil: char[eslERRBUFSIZE] errbuf - int p7_hmmfile_OpenE (const char *filename, char *env, P7_HMMFILE **ret_hfp, char *errbuf) - int p7_hmmfile_OpenENoDB(const char *filename, char *env, P7_HMMFILE **ret_hfp, char *errbuf) - int p7_hmmfile_Open (const char *filename, char *env, P7_HMMFILE **ret_hfp) # Deprecated - int p7_hmmfile_OpenNoDB (const char *filename, char *env, P7_HMMFILE **ret_hfp) # Deprecated - int p7_hmmfile_OpenBuffer(const char *buffer, int size, P7_HMMFILE **ret_hfp) - void p7_hmmfile_Close(P7_HMMFILE *hfp) + int p7_hmmfile_OpenE (const char *filename, char *env, P7_HMMFILE **ret_hfp, char *errbuf) except * + int p7_hmmfile_OpenENoDB(const char *filename, char *env, P7_HMMFILE **ret_hfp, char *errbuf) except * + int p7_hmmfile_Open (const char *filename, char *env, P7_HMMFILE **ret_hfp) except * # Deprecated + int p7_hmmfile_OpenNoDB (const char *filename, char *env, P7_HMMFILE **ret_hfp) except * # Deprecated + int p7_hmmfile_OpenBuffer(const char *buffer, int size, P7_HMMFILE **ret_hfp) except * + void p7_hmmfile_Close(P7_HMMFILE *hfp) except * int p7_hmmfile_WriteBinary(FILE *fp, int format, P7_HMM *hmm) except * int p7_hmmfile_WriteASCII (FILE *fp, int format, P7_HMM *hmm) except * diff --git a/pyhmmer/easel.pyx b/pyhmmer/easel.pyx index c0d5502e..3a411c75 100644 --- a/pyhmmer/easel.pyx +++ b/pyhmmer/easel.pyx @@ -8,13 +8,6 @@ to facilitate the development of biological software in C. It is used by """ -# --- C declarations --------------------------------------------------------- - -IF UNAME_SYSNAME == "Linux": - include "fileobj/linux.pxi" -ELIF UNAME_SYSNAME == "Darwin" or UNAME_SYSNAME.endswith("BSD"): - include "fileobj/bsd.pxi" - # --- C imports -------------------------------------------------------------- cimport cython @@ -26,7 +19,7 @@ from cpython.memoryview cimport PyMemoryView_FromMemory from cpython.ref cimport Py_INCREF from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, SIZE_MAX -from libc.stdio cimport fclose +from libc.stdio cimport fclose, FILE from libc.stdlib cimport calloc, malloc, realloc, free from libc.string cimport memcmp, memcpy, memmove, memset, strdup, strlen, strncpy from posix.types cimport off_t @@ -94,8 +87,12 @@ from .reexports.esl_sqio_ascii cimport ( fileheader_hmmpgmd, ) -include "exceptions.pxi" +if PLATFORM_UNAME_SYSTEM == "Linux": + from .fileobj.linux cimport fileobj_linux_open as fopen_obj +elif PLATFORM_UNAME_SYSTEM == "Darwin" or PLATFORM_UNAME_SYSTEM.endswith("BSD"): + from .fileobj.bsd cimport fileobj_bsd_open as fopen_obj +include "exceptions.pxi" # --- Python imports --------------------------------------------------------- @@ -3465,7 +3462,7 @@ cdef class MSA: raise InvalidParameter("format", format, choices=list(MSA_FILE_FORMATS)) fmt = MSA_FILE_FORMATS[format] - file = fopen_obj(fh, mode="w") + file = fopen_obj(fh, "w") status = libeasel.msafile.esl_msafile_Write(file, self._msa, fmt) fclose(file) @@ -4080,7 +4077,7 @@ cdef class MSAFile: cdef int status cdef ESL_BUFFER* buffer = NULL cdef ESL_MSAFILE* msaf = NULL - cdef FILE* fp = fopen_obj(fh) + cdef FILE* fp = fopen_obj(fh, "r") cdef bytes fh_repr = repr(fh).encode("ascii") try: @@ -4818,7 +4815,7 @@ cdef class Sequence: assert self._sq != NULL cdef int status - cdef FILE* file = fopen_obj(fh, mode="w") + cdef FILE* file = fopen_obj(fh, "w") status = libeasel.sqio.ascii.esl_sqascii_WriteFasta(file, self._sq, False) fclose(file) @@ -6108,7 +6105,7 @@ cdef class SequenceFile: cdef int status cdef ESL_SQFILE* sqfp = NULL cdef ESL_SQASCII_DATA* ascii = NULL - cdef FILE* fp = fopen_obj(fh) + cdef FILE* fp = fopen_obj(fh, "r") cdef bytes fh_repr = repr(fh).encode("ascii") # bail out early if format is not supported diff --git a/pyhmmer/fileobj/__init__.pxd b/pyhmmer/fileobj/__init__.pxd new file mode 100644 index 00000000..e69de29b diff --git a/pyhmmer/fileobj/bsd.h b/pyhmmer/fileobj/bsd.h new file mode 100644 index 00000000..80146019 --- /dev/null +++ b/pyhmmer/fileobj/bsd.h @@ -0,0 +1,188 @@ +#ifndef _PYHMMER_FILEOBJ_LINUX +#define _PYHMMER_FILEOBJ_LINUX + +#include + +#include +#include "util.h" + +#define _COOKIE_ERROR_CLOSE -1 +#define _COOKIE_ERROR_WRITE -1 +#define _COOKIE_ERROR_READ -1 +#define _COOKIE_ERROR_SEEK -1 + +int fileobj_bsd_write(void* cookie, const char* buf, int size) { + PyObject* file = (PyObject*) cookie; + + PyObject* out = PyObject_CallMethod(file, "write", "y#", buf, (Py_ssize_t) size); + if (out == NULL) + return _COOKIE_ERROR_WRITE; + + if (!PyLong_Check(out)) { + Py_DECREF(out); + PyErr_SetString(PyExc_TypeError, "Expected int"); + return _COOKIE_ERROR_WRITE; + } + + int n = PyLong_AsLongLong(out); + Py_DECREF(out); + return n; +} + +int fileobj_bsd_read(void* cookie, char* buf, int size) { + PyObject* file = (PyObject*) cookie; + + PyObject* chunk = PyObject_CallMethod(file, "read", "n", size); + if (chunk == NULL) + return _COOKIE_ERROR_READ; + + const char* data = PyBytes_AsString(chunk); + if (data == NULL) { + Py_DECREF(chunk); + return _COOKIE_ERROR_READ; + } + + Py_ssize_t len = PyBytes_Size(chunk); + if (len > size) { + Py_DECREF(chunk); + PyErr_SetString(PyExc_BufferError, "buffer too small to store `read` result"); + return _COOKIE_ERROR_READ; + } + + memcpy(buf, data, len); + + Py_DECREF(chunk); + return len; +} + +int fileobj_bsd_readinto(void* cookie, char* buf, int size) { + PyObject* file = (PyObject*) cookie; + + PyObject* mem = PyMemoryView_FromMemory(buf, (Py_ssize_t) size, PyBUF_WRITE); + if (mem == NULL) + return _COOKIE_ERROR_READ; + + PyObject* out = PyObject_CallMethod(file, "readinto", "O", mem); + if (out == NULL) { + Py_DECREF(mem); + return _COOKIE_ERROR_READ; + } + + if (!PyLong_Check(out)) { + Py_DECREF(out); + Py_DECREF(mem); + PyErr_SetString(PyExc_TypeError, "Expected int"); + return _COOKIE_ERROR_WRITE; + } + + Py_ssize_t len = PyLong_AsSize_t(out); + Py_DECREF(out); + Py_DECREF(mem); + return len; +} + +int fileobj_bsd_seek(void* cookie, off64_t* offset, int whence) { + PyObject* file = (PyObject*) cookie; + + PyObject* out = PyObject_CallMethod(file, "seek", "Li", *offset, whence); + if (out == NULL) + return _COOKIE_ERROR_SEEK; + + if (!PyLong_Check(out)) { + Py_DECREF(out); + PyErr_SetString(PyExc_TypeError, "Expected int"); + return _COOKIE_ERROR_SEEK; + } + + *offset = PyLong_AsLongLong(out); + Py_DECREF(out); + return 0; +} + +int fileobj_bsd_close(void* cookie) { + PyObject* file = (PyObject*) cookie; + Py_DECREF(file); + return _COOKIE_ERROR_CLOSE; +} + +FILE* fileobj_bsd_open(PyObject* obj, const char* mode) { + Py_INCREF(obj); + + PyTypeObject* ty = Py_TYPE(obj); + + readfn_t readfn; + writefn_t writefn; + seekfn_t seekfn; + + PyObject* readable = PyObject_CallMethod(obj, "readable", NULL); + if (readable == NULL) + return NULL; + switch (PyObject_IsTrue(readable)) { + case 1: + Py_DECREF(readable); + functions.read = ((is_cpython() == 1) && PyObject_HasAttrString(obj, "readinto")) ? fileobj_linux_readinto : fileobj_linux_read; + break; + case 0: + Py_DECREF(readable); + readfn = NULL; + break; + default: + Py_DECREF(readable); + PyErr_Format(PyExc_TypeError, "Expected `io.IOBase` instance, found %s", ty->tp_name); + return NULL; + } + + PyObject* seekable = PyObject_CallMethod(obj, "seekable", NULL); + if (seekable == NULL) + return NULL; + switch (PyObject_IsTrue(seekable)) { + case 1: + Py_DECREF(seekable); + seekfn = fileobj_linux_seek; + break; + case 0: + Py_DECREF(seekable); + seekfn = NULL; + break; + default: + Py_DECREF(seekable); + PyErr_Format(PyExc_TypeError, "Expected `io.IOBase` instance, found %s", ty->tp_name); + return NULL; + } + + PyObject* writable = PyObject_CallMethod(obj, "writable", NULL); + if (writable == NULL) + return NULL; + switch (PyObject_IsTrue(writable)) { + case 1: + Py_DECREF(writable); + writefn = fileobj_linux_write; + break; + case 0: + Py_DECREF(writable); + writefn = NULL; + break; + default: + Py_DECREF(writable); + PyErr_Format(PyExc_TypeError, "Expected `io.IOBase` instance, found %s", ty->tp_name); + return NULL; + } + + FILE* file = funopen( + (void*) obj, + readfn, + writefn, + seekfn, + fileobj_bsd_close + ); + if (file == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Failed to open file-like object"); + Py_DECREF(obj); + } + + return file; +} + + + +#endif \ No newline at end of file diff --git a/pyhmmer/fileobj/bsd.pxd b/pyhmmer/fileobj/bsd.pxd new file mode 100644 index 00000000..0ee0b61c --- /dev/null +++ b/pyhmmer/fileobj/bsd.pxd @@ -0,0 +1,4 @@ +from libc.stdio cimport FILE + +cdef extern from "fileobj/linux.h": + FILE* fileobj_bsd_open(object obj, const char* mode) except NULL \ No newline at end of file diff --git a/pyhmmer/fileobj/bsd.pxi b/pyhmmer/fileobj/bsd.pxi deleted file mode 100644 index b3fa9c82..00000000 --- a/pyhmmer/fileobj/bsd.pxi +++ /dev/null @@ -1,121 +0,0 @@ -# coding: utf-8 -"""Obtain a `FILE*` from a Python object using ``funopen``. -""" - -# --- C imports -------------------------------------------------------------- - -from cpython.buffer cimport Py_buffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_READ, PyBUF_WRITE -from cpython.memoryview cimport PyMemoryView_FromMemory -from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF -from libc.stdio cimport EOF, FILE -from libc.stdint cimport int64_t, uint64_t -from libc.string cimport strcpy, strncpy, memcpy - - -# --- BSD interface ---------------------------------------------------------- - -cdef extern from "stdio.h": - ctypedef long fpos_t - -ctypedef int (*readfn_t) (void *cookie, char *buf, int size) -ctypedef int (*writefn_t)(void *cookie, const char *buf, int size) -ctypedef fpos_t (*seekfn_t) (void *cookie, fpos_t offset, int whence) -ctypedef int (*closefn_t)(void *cookie) - -cdef extern from "stdio.h": - FILE* funopen( - const void* cookie, - readfn_t readfn, - writefn_t writefn, - seekfn_t seekfn, - closefn_t closefn - ) - - -# --- fwrite implementation -------------------------------------------------- - -cdef int fwrite_obj(void *cookie, const char *buf, int size) except 0: - """Zero-copy implementation of `fwrite` for a Python file-handle. - """ - cdef object obj = cookie - cdef object mem = PyMemoryView_FromMemory( buf, size, PyBUF_READ) - return obj.write(mem) - - -# --- fread implementations -------------------------------------------------- - -cdef int fread_obj_read(void *cookie, char *buf, int size) except -1: - """Copying variant of `fread` for files lacking `readinto`. - """ - cdef object obj = cookie - cdef object chunk = obj.read(size) - cdef Py_buffer pybuffer - - if PyObject_GetBuffer(chunk, &pybuffer, PyBUF_READ) < 0: - raise RuntimeError("could not get buffer") - memcpy(buf, pybuffer.buf, len(chunk)) - PyBuffer_Release(&pybuffer) - - return len(chunk) - - -cdef int fread_obj_readinto(void *cookie, char *buf, int size) except -1: - """Zero-copy implementation of `fread` using the `readinto` method. - """ - cdef object obj = cookie - cdef object mem - - IF SYS_IMPLEMENTATION_NAME == "pypy": - # NB: PyPy has a bug in the `readinto` implementation that requires the - # memoryview to be read/write and not just write, which is why we - # create the memoryview in read/write mode and not just in write mode. - mem = PyMemoryView_FromMemory(buf, size, PyBUF_READ | PyBUF_WRITE) - ELSE: - mem = PyMemoryView_FromMemory(buf, size, PyBUF_WRITE) - - try: - return obj.readinto(mem.cast('B')) - except TypeError: - # NB: on PyPy, `obj.readinto` sometimes complains that the memoryview - # is not a byte-like object when you cast it to `unsigned char*`. - return obj.readinto(mem) - - -# --- fseek implementation --------------------------------------------------- - -cdef fpos_t fseek_obj(void* cookie, fpos_t offset, int whence) except -1: - cdef object obj = cookie - return obj.seek(offset, whence) - - -# --- fclose implementation -------------------------------------------------- - -cdef int fclose_obj(void *cookie) except EOF: - Py_DECREF( cookie) - return 0 - - -# --- fopen_obj -------------------------------------------------------------- - -cdef FILE* fopen_obj(object obj, str mode = "r") except NULL: - cdef closefn_t closefn = fclose_obj - cdef readfn_t readfn = NULL - cdef writefn_t writefn = NULL - cdef seekfn_t seekfn = NULL - - try: - if obj.readable(): - if hasattr(( obj), "readinto"): - readfn = fread_obj_readinto - else: - readfn = fread_obj_read - if obj.writable(): - writefn = fwrite_obj - if obj.seekable(): - seekfn = fseek_obj - except AttributeError as err: - ty = type(obj).__name__ - raise TypeError("expected `io.IOBase` instance, found {}".format(ty)) from err - - Py_INCREF(obj) - return funopen( obj, readfn, writefn, seekfn, closefn) diff --git a/pyhmmer/fileobj/linux.h b/pyhmmer/fileobj/linux.h new file mode 100644 index 00000000..ddadba1a --- /dev/null +++ b/pyhmmer/fileobj/linux.h @@ -0,0 +1,183 @@ +#ifndef _PYHMMER_FILEOBJ_LINUX +#define _PYHMMER_FILEOBJ_LINUX + +#include +#include + +#include +#include "util.h" + +#define _COOKIE_ERROR_CLOSE EOF +#define _COOKIE_ERROR_WRITE 0 +#define _COOKIE_ERROR_READ -1 +#define _COOKIE_ERROR_SEEK -1 + + +Py_ssize_t fileobj_linux_write(void* cookie, const char* buf, size_t size) { + PyObject* file = (PyObject*) cookie; + + PyObject* out = PyObject_CallMethod(file, "write", "y#", buf, (Py_ssize_t) size); + if (out == NULL) + return _COOKIE_ERROR_WRITE; + + if (!PyLong_Check(out)) { + Py_DECREF(out); + PyErr_SetString(PyExc_TypeError, "Expected int"); + return _COOKIE_ERROR_WRITE; + } + + Py_ssize_t n = PyLong_AsSize_t(out); + Py_DECREF(out); + return n; +} + +Py_ssize_t fileobj_linux_read(void* cookie, char* buf, size_t size) { + PyObject* file = (PyObject*) cookie; + + PyObject* chunk = PyObject_CallMethod(file, "read", "n", size); + if (chunk == NULL) + return _COOKIE_ERROR_READ; + + const char* data = PyBytes_AsString(chunk); + if (data == NULL) { + Py_DECREF(chunk); + return _COOKIE_ERROR_READ; + } + + Py_ssize_t len = PyBytes_Size(chunk); + if (len > size) { + Py_DECREF(chunk); + PyErr_SetString(PyExc_BufferError, "buffer too small to store `read` result"); + return _COOKIE_ERROR_READ; + } + + memcpy(buf, data, len); + + Py_DECREF(chunk); + return len; +} + +Py_ssize_t fileobj_linux_readinto(void* cookie, char* buf, size_t size) { + PyObject* file = (PyObject*) cookie; + + PyObject* mem = PyMemoryView_FromMemory(buf, (Py_ssize_t) size, PyBUF_WRITE); + if (mem == NULL) + return _COOKIE_ERROR_READ; + + PyObject* out = PyObject_CallMethod(file, "readinto", "O", mem); + if (out == NULL) { + Py_DECREF(mem); + return _COOKIE_ERROR_READ; + } + + if (!PyLong_Check(out)) { + Py_DECREF(out); + Py_DECREF(mem); + PyErr_SetString(PyExc_TypeError, "Expected int"); + return _COOKIE_ERROR_WRITE; + } + + Py_ssize_t len = PyLong_AsSize_t(out); + Py_DECREF(out); + Py_DECREF(mem); + return len; +} + +int fileobj_linux_seek(void* cookie, off64_t* offset, int whence) { + PyObject* file = (PyObject*) cookie; + + PyObject* out = PyObject_CallMethod(file, "seek", "Li", *offset, whence); + if (out == NULL) + return _COOKIE_ERROR_SEEK; + + if (!PyLong_Check(out)) { + Py_DECREF(out); + PyErr_SetString(PyExc_TypeError, "Expected int"); + return _COOKIE_ERROR_SEEK; + } + + *offset = PyLong_AsLongLong(out); + Py_DECREF(out); + return 0; +} + +int fileobj_linux_close(void* cookie) { + PyObject* file = (PyObject*) cookie; + Py_DECREF(file); + return _COOKIE_ERROR_CLOSE; +} + +FILE* fileobj_linux_open(PyObject* obj, const char* mode) { + Py_INCREF(obj); + + PyTypeObject* ty = Py_TYPE(obj); + + cookie_io_functions_t functions; + functions.close = fileobj_linux_close; + + PyObject* readable = PyObject_CallMethod(obj, "readable", NULL); + if (readable == NULL) + return NULL; + switch (PyObject_IsTrue(readable)) { + case 1: + Py_DECREF(readable); + functions.read = ((is_cpython() == 1) && PyObject_HasAttrString(obj, "readinto")) ? fileobj_linux_readinto : fileobj_linux_read; + break; + case 0: + Py_DECREF(readable); + functions.read = NULL; + break; + default: + Py_DECREF(readable); + PyErr_Format(PyExc_TypeError, "Expected `io.IOBase` instance, found %s", ty->tp_name); + return NULL; + } + + PyObject* seekable = PyObject_CallMethod(obj, "seekable", NULL); + if (seekable == NULL) + return NULL; + switch (PyObject_IsTrue(seekable)) { + case 1: + Py_DECREF(seekable); + functions.seek = fileobj_linux_seek; + break; + case 0: + Py_DECREF(seekable); + functions.seek = NULL; + break; + default: + Py_DECREF(seekable); + PyErr_Format(PyExc_TypeError, "Expected `io.IOBase` instance, found %s", ty->tp_name); + return NULL; + } + + PyObject* writable = PyObject_CallMethod(obj, "writable", NULL); + if (writable == NULL) + return NULL; + switch (PyObject_IsTrue(writable)) { + case 1: + Py_DECREF(writable); + functions.write = fileobj_linux_write; + break; + case 0: + Py_DECREF(writable); + functions.write = NULL; + break; + default: + Py_DECREF(writable); + PyErr_Format(PyExc_TypeError, "Expected `io.IOBase` instance, found %s", ty->tp_name); + return NULL; + } + + FILE* file = fopencookie((void*) obj, mode, functions); + if (file == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Failed to open file-like object"); + Py_DECREF(obj); + } + + return file; +} + + + +#endif \ No newline at end of file diff --git a/pyhmmer/fileobj/linux.pxd b/pyhmmer/fileobj/linux.pxd new file mode 100644 index 00000000..974932cd --- /dev/null +++ b/pyhmmer/fileobj/linux.pxd @@ -0,0 +1,4 @@ +from libc.stdio cimport FILE + +cdef extern from "fileobj/linux.h": + FILE* fileobj_linux_open(object obj, const char* mode) except NULL \ No newline at end of file diff --git a/pyhmmer/fileobj/linux.pxi b/pyhmmer/fileobj/linux.pxi deleted file mode 100644 index 04c5a887..00000000 --- a/pyhmmer/fileobj/linux.pxi +++ /dev/null @@ -1,124 +0,0 @@ -# coding: utf-8 -"""Obtain a `FILE*` from a Python object using ``fopencookie``. -""" - -# --- C imports -------------------------------------------------------------- - -from cpython.buffer cimport Py_buffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_READ, PyBUF_WRITE -from cpython.memoryview cimport PyMemoryView_FromMemory -from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF -from libc.stdio cimport EOF, FILE -from libc.stdint cimport uint64_t -from libc.string cimport strcpy, strncpy, memcpy - - -# --- Linux interface -------------------------------------------------------- - -cdef extern from "sys/types.h": - ctypedef uint64_t off64_t - -cdef extern from "stdio.h": - - ctypedef ssize_t (*cookie_read_function_t) (void *cookie, char *buf, size_t size) - ctypedef ssize_t (*cookie_write_function_t)(void *cookie, const char *buf, size_t size) - ctypedef int (*cookie_seek_function_t) (void *cookie, off64_t *offset, int whence); - ctypedef int (*cookie_close_function_t)(void *cookie) - - ctypedef struct cookie_io_functions_t: - cookie_read_function_t* read - cookie_write_function_t* write - cookie_seek_function_t* seek - cookie_close_function_t* close - - FILE *fopencookie(void *cookie, const char *mode, cookie_io_functions_t io_funcs); - - -# --- fwrite implementation -------------------------------------------------- - -cdef ssize_t fwrite_obj(void *cookie, const char *buf, size_t size) except 0: - """Zero-copy implementation of `fwrite` for a Python file-handle. - """ - cdef object obj = cookie - cdef object mem = PyMemoryView_FromMemory( buf, size, PyBUF_READ) - return obj.write(mem) - - -# --- fread implementations -------------------------------------------------- - -cdef ssize_t fread_obj_read(void *cookie, char *buf, size_t size) except -1: - """Copying variant of `fread` for files lacking `readinto`. - """ - cdef object obj = cookie - cdef object chunk = obj.read(size) - cdef Py_buffer pybuffer - - if PyObject_GetBuffer(chunk, &pybuffer, PyBUF_READ) < 0: - raise RuntimeError("could not get buffer") - memcpy(buf, pybuffer.buf, len(chunk)) - PyBuffer_Release(&pybuffer) - - return len(chunk) - - -cdef ssize_t fread_obj_readinto(void *cookie, char *buf, size_t size) except -1: - """Zero-copy implementation of `fread` using the `readinto` method. - """ - cdef object obj = cookie - cdef object mem - - if SYS_IMPLEMENTATION_NAME == "pypy": - # NB: PyPy has a bug in the `readinto` implementation that requires the - # memoryview to be read/write and not just write, which is why we - # create the memoryview in read/write mode and not just in write mode. - mem = PyMemoryView_FromMemory(buf, size, PyBUF_READ | PyBUF_WRITE) - else: - mem = PyMemoryView_FromMemory(buf, size, PyBUF_WRITE) - - try: - return obj.readinto(mem.cast('B')) - except TypeError: - # NB: on PyPy, `obj.readinto` sometimes complains that the memoryview - # is not a byte-like object when you cast it to `unsigned char*`. - return obj.readinto(mem) - - -# --- fseek implementation --------------------------------------------------- - -cdef int fseek_obj(void* cookie, off64_t* offset, int whence) except -1: - cdef object obj = cookie - offset[0] = obj.seek(offset[0], whence) - return 0 - - -# --- fclose implementation -------------------------------------------------- - -cdef ssize_t fclose_obj(void *cookie) except EOF: - Py_DECREF( cookie) - return 0 - - -# --- fopen_obj -------------------------------------------------------------- - -cdef FILE* fopen_obj(object obj, str mode = "r") except NULL: - cdef cookie_io_functions_t functions - functions.close = fclose_obj - functions.read = NULL - functions.seek = NULL - functions.write = NULL - - try: - if obj.readable(): - if hasattr(( obj), "readinto"): - functions.read = fread_obj_readinto - else: - functions.read = fread_obj_read - if obj.writable(): - functions.write = fwrite_obj - if obj.seekable(): - functions.seek = fseek_obj - except AttributeError as err: - ty = type(obj).__name__ - raise TypeError("expected `io.IOBase` instance, found {}".format(ty)) from err - - Py_INCREF(obj) - return fopencookie( obj, mode.encode("ascii"), functions) diff --git a/pyhmmer/fileobj/util.h b/pyhmmer/fileobj/util.h new file mode 100644 index 00000000..d2724448 --- /dev/null +++ b/pyhmmer/fileobj/util.h @@ -0,0 +1,23 @@ +#ifndef _PYHMMER_FILEOBJ_UTIL +#define _PYHMMER_FILEOBJ_UTIL + +static int is_cpython() { + PyObject* impl = PySys_GetObject("implementation"); + if (impl == NULL) + return -1; + + PyObject* name = PyObject_GetAttrString(impl, "name"); + if (name == NULL) + return -1; + + if (!PyUnicode_Check(name)) { + Py_DECREF(name); + return -1; + } + + int cpython = (PyUnicode_CompareWithASCIIString(name, "cpython") == 0); + Py_DECREF(name); + return cpython; +} + +#endif \ No newline at end of file diff --git a/pyhmmer/plan7.pyx b/pyhmmer/plan7.pyx index 208cf66c..6452e598 100644 --- a/pyhmmer/plan7.pyx +++ b/pyhmmer/plan7.pyx @@ -22,7 +22,7 @@ from libc.math cimport exp, ceil from libc.stddef cimport ptrdiff_t from libc.stdio cimport printf, rewind from libc.stdlib cimport calloc, malloc, realloc, free, llabs -from libc.stdint cimport uint8_t, uint32_t, int64_t +from libc.stdint cimport uint8_t, uint32_t, uint64_t, int64_t from libc.stdio cimport fprintf, FILE, stdout, fclose from libc.string cimport memset, memcpy, memmove, strdup, strndup, strlen, strcmp, strncpy from libc.time cimport ctime, strftime, time, time_t, tm, localtime_r @@ -152,12 +152,13 @@ from .reexports.p7_hmmfile cimport ( v3f_magic ) +if PLATFORM_UNAME_SYSTEM == "Linux": + from .fileobj.linux cimport fileobj_linux_open as fopen_obj +elif PLATFORM_UNAME_SYSTEM == "Darwin" or PLATFORM_UNAME_SYSTEM.endswith("BSD"): + from .fileobj.bsd cimport fileobj_bsd_open as fopen_obj + include "exceptions.pxi" -IF UNAME_SYSNAME == "Linux": - include "fileobj/linux.pxi" -ELIF UNAME_SYSNAME == "Darwin" or UNAME_SYSNAME.endswith("BSD"): - include "fileobj/bsd.pxi" # --- Python imports --------------------------------------------------------- @@ -3245,7 +3246,7 @@ cdef class HMM: cdef FILE* file cdef P7_HMM* hm = self._hmm - file = fopen_obj(fh, mode="w") + file = fopen_obj(fh, "w") if binary: status = libhmmer.p7_hmmfile.p7_hmmfile_WriteBinary(file, -1, hm) @@ -3320,7 +3321,7 @@ cdef class HMMFile: raise AllocationError("P7_HMMFILE", sizeof(P7_HMMFILE)) # store options - hfp.f = fopen_obj(fh_) + hfp.f = fopen_obj(fh_, "r") hfp.do_gzip = False hfp.do_stdin = False hfp.newly_opened = True @@ -4299,8 +4300,8 @@ cdef class OptimizedProfile: assert self._om != NULL - pfp = fopen_obj(fh_profile, mode="w") - ffp = fopen_obj(fh_filter, mode="w") + pfp = fopen_obj(fh_profile, "w") + ffp = fopen_obj(fh_filter, "w") status = p7_oprofile_Write(ffp, pfp, self._om) if status == libeasel.eslOK: fclose(ffp) @@ -8218,7 +8219,7 @@ cdef class TopHits: cdef char* qname = unk if self._qname is None else self._qname cdef char* qacc = unk if self._qacc is None else self._qacc - file = fopen_obj(fh, mode="w") + file = fopen_obj(fh, "w") try: if format == "targets": fname = "p7_tophits_TabularTargets" diff --git a/setup.py b/setup.py index 1dcc771c..d44abdca 100644 --- a/setup.py +++ b/setup.py @@ -106,6 +106,7 @@ def run(self): "SYS_VERSION_INFO_MINOR": sys.version_info.minor, "SYS_VERSION_INFO_MICRO": sys.version_info.micro, "SYS_BYTEORDER": sys.byteorder, + "PLATFORM_UNAME_SYSTEM": platform.uname().system, } } if hmmer_impl is not None: @@ -714,6 +715,7 @@ def run(self): libraries=["easel"], define_macros=platform_define_macros, extra_compile_args=platform_compile_args, + depends=glob.glob(os.path.join("pyhmmer", "fileobj", "*.h")), ), Extension( "pyhmmer.plan7", @@ -721,6 +723,7 @@ def run(self): libraries=["hmmer", "easel", "divsufsort"], define_macros=platform_define_macros, extra_compile_args=platform_compile_args, + depends=glob.glob(os.path.join("pyhmmer", "fileobj", "*.h")), ), Extension( "pyhmmer.daemon",