Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental hmm #959

Draft
wants to merge 9 commits into
base: 0.3.3-backport
Choose a base branch
from
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
CC?=gcc
CFLAGS=-std=c99 -g -O3 -march=native -funroll-loops -ffast-math \
CFLAGS=-std=c11 -g -O3 -march=native -funroll-loops -ffast-math \
# -ftree-vectorize \
# -ftree-vectorizer-verbose=6 \
# -fopt-info-vec-missed

all: _tsinfer.cpython-34m.so
all: _tsinfer.cpython-34m.so

_tsinfer.cpython-34m.so: _tsinfermodule.c
CC="${CC}" CFLAGS="${CFLAGS}" python setup.py build_ext --inplace
_tsinfer.cpython-34m.so: _tsinfermodule.c
CC="${CC}" CFLAGS="${CFLAGS}" python3 setup.py build_ext --inplace

ctags:
ctags lib/*.c lib/*.h tsinfer/*.py
Expand Down
45 changes: 35 additions & 10 deletions _tsinfermodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,17 @@ uint64_PyArray_converter(PyObject *in, PyObject **out)
return NPY_SUCCEED;
}

static int
int8_PyArray_converter(PyObject *in, PyObject **out)
{
PyObject *ret = PyArray_FROMANY(in, NPY_INT8, 1, 1, NPY_ARRAY_IN_ARRAY);
if (ret == NULL) {
return NPY_FAIL;
}
*out = ret;
return NPY_SUCCEED;
}

/*===================================================================
* AncestorBuilder
*===================================================================
Expand Down Expand Up @@ -429,30 +440,43 @@ TreeSequenceBuilder_init(TreeSequenceBuilder *self, PyObject *args, PyObject *kw
{
int ret = -1;
int err;
static char *kwlist[] = {"num_alleles", "max_nodes", "max_edges", NULL};
static char *kwlist[] = {"num_alleles", "max_nodes", "max_edges", "ancestral_state",
NULL};
PyArrayObject *num_alleles = NULL;
PyArrayObject *ancestral_state = NULL;
int8_t *ancestral_state_data = NULL;
unsigned long max_nodes = 1024;
unsigned long max_edges = 1024;
unsigned long num_sites;
npy_intp *shape;
int flags = 0;

self->tree_sequence_builder = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|kk", kwlist,
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|kkO&", kwlist,
uint64_PyArray_converter, &num_alleles,
&max_nodes, &max_edges)) {
&max_nodes, &max_edges,
int8_PyArray_converter, &ancestral_state)) {
goto out;
}
shape = PyArray_DIMS(num_alleles);
num_sites = shape[0];

if (ancestral_state != NULL) {
shape = PyArray_DIMS(ancestral_state);
if (shape[0] != (npy_intp) num_sites) {
PyErr_SetString(PyExc_ValueError, "ancestral state array wrong size");
goto out;
}
ancestral_state_data = PyArray_DATA(ancestral_state);
}
self->tree_sequence_builder = PyMem_Malloc(sizeof(tree_sequence_builder_t));
if (self->tree_sequence_builder == NULL) {
PyErr_NoMemory();
goto out;
}
err = tree_sequence_builder_alloc(self->tree_sequence_builder,
num_sites, PyArray_DATA(num_alleles),
num_sites,
PyArray_DATA(num_alleles),
ancestral_state_data,
max_nodes, max_edges, flags);
if (err != 0) {
handle_library_error(err);
Expand All @@ -461,6 +485,7 @@ TreeSequenceBuilder_init(TreeSequenceBuilder *self, PyObject *args, PyObject *kw
ret = 0;
out:
Py_XDECREF(num_alleles);
Py_XDECREF(ancestral_state);
return ret;
}

Expand Down Expand Up @@ -1283,21 +1308,21 @@ AncestorMatcher_init(AncestorMatcher *self, PyObject *args, PyObject *kwds)
int err;
int extended_checks = 0;
static char *kwlist[] = {"tree_sequence_builder", "recombination",
"mismatch", "precision", "extended_checks", NULL};
"mismatch", "likelihood_threshold", "extended_checks", NULL};
TreeSequenceBuilder *tree_sequence_builder = NULL;
PyObject *recombination = NULL;
PyObject *mismatch = NULL;
PyArrayObject *recombination_array = NULL;
PyArrayObject *mismatch_array = NULL;
npy_intp *shape;
unsigned int precision = 22;
double likelihood_threshold = DBL_MIN;
int flags = 0;

self->ancestor_matcher = NULL;
self->tree_sequence_builder = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!OO|Ii", kwlist,
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!OO|di", kwlist,
&TreeSequenceBuilderType, &tree_sequence_builder,
&recombination, &mismatch, &precision,
&recombination, &mismatch, &likelihood_threshold,
&extended_checks)) {
goto out;
}
Expand Down Expand Up @@ -1343,7 +1368,7 @@ AncestorMatcher_init(AncestorMatcher *self, PyObject *args, PyObject *kwds)
self->tree_sequence_builder->tree_sequence_builder,
PyArray_DATA(recombination_array),
PyArray_DATA(mismatch_array),
precision, flags);
likelihood_threshold, flags);
if (err != 0) {
handle_library_error(err);
goto out;
Expand Down
22 changes: 11 additions & 11 deletions lib/ancestor_builder.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ ancestor_builder_print_state(ancestor_builder_t *self, FILE *out)
}
fprintf(out, "\n");
}
tsk_blkalloc_print_state(&self->allocator, out);
tsi_blkalloc_print_state(&self->allocator, out);
ancestor_builder_check_state(self);
return 0;
}
Expand Down Expand Up @@ -144,12 +144,12 @@ ancestor_builder_alloc(
goto out;
}
/* Pre-calculate the maximum sizes asked for in other methods when calling
* tsk_blkalloc_get(&self->allocator, ...) */
* tsi_blkalloc_get(&self->allocator, ...) */
max_size = TSK_MAX(self->num_samples * sizeof(allele_t), max_size);
/* NB: using self->max_sites below is probably overkill: the real number should be
* the maximum number of focal sites in a single ancestor, usually << max_sites */
max_size = TSK_MAX(self->max_sites * sizeof(tsk_id_t), max_size);
ret = tsk_blkalloc_init(&self->allocator, max_size);
ret = tsi_blkalloc_init(&self->allocator, max_size);
if (ret != 0) {
goto out;
}
Expand All @@ -163,7 +163,7 @@ ancestor_builder_free(ancestor_builder_t *self)
{
tsi_safe_free(self->sites);
tsi_safe_free(self->descriptors);
tsk_blkalloc_free(&self->allocator);
tsi_blkalloc_free(&self->allocator);
return 0;
}

Expand All @@ -177,8 +177,8 @@ ancestor_builder_get_time_map(ancestor_builder_t *self, double time)
search.time = time;
avl_node = avl_search(&self->time_map, &search);
if (avl_node == NULL) {
avl_node = tsk_blkalloc_get(&self->allocator, sizeof(*avl_node));
time_map = tsk_blkalloc_get(&self->allocator, sizeof(*time_map));
avl_node = tsi_blkalloc_get(&self->allocator, sizeof(*avl_node));
time_map = tsi_blkalloc_get(&self->allocator, sizeof(*time_map));
if (avl_node == NULL || time_map == NULL) {
goto out;
}
Expand Down Expand Up @@ -439,10 +439,10 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
search.num_samples = self->num_samples;
avl_node = avl_search(pattern_map, &search);
if (avl_node == NULL) {
avl_node = tsk_blkalloc_get(&self->allocator, sizeof(avl_node_t));
map_elem = tsk_blkalloc_get(&self->allocator, sizeof(pattern_map_t));
avl_node = tsi_blkalloc_get(&self->allocator, sizeof(avl_node_t));
map_elem = tsi_blkalloc_get(&self->allocator, sizeof(pattern_map_t));
site->genotypes
= tsk_blkalloc_get(&self->allocator, self->num_samples * sizeof(allele_t));
= tsi_blkalloc_get(&self->allocator, self->num_samples * sizeof(allele_t));
if (avl_node == NULL || map_elem == NULL || site->genotypes == NULL) {
ret = TSI_ERR_NO_MEMORY;
goto out;
Expand All @@ -465,7 +465,7 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
}
map_elem->num_sites++;

list_node = tsk_blkalloc_get(&self->allocator, sizeof(site_list_t));
list_node = tsi_blkalloc_get(&self->allocator, sizeof(site_list_t));
if (list_node == NULL) {
ret = TSI_ERR_NO_MEMORY;
goto out;
Expand Down Expand Up @@ -538,7 +538,7 @@ ancestor_builder_finalise(ancestor_builder_t *self)
descriptor = self->descriptors + self->num_ancestors;
self->num_ancestors++;
descriptor->time = time_map->time;
focal_sites = tsk_blkalloc_get(
focal_sites = tsi_blkalloc_get(
&self->allocator, pattern_map->num_sites * sizeof(tsk_id_t));
if (focal_sites == NULL) {
ret = TSI_ERR_NO_MEMORY;
Expand Down
27 changes: 14 additions & 13 deletions lib/ancestor_matcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ ancestor_matcher_print_state(ancestor_matcher_t *self, FILE *out)
}
fprintf(out, "\n");
}
tsk_blkalloc_print_state(&self->traceback_allocator, out);
tsi_blkalloc_print_state(&self->traceback_allocator, out);

/* ancestor_matcher_check_state(self); */
return 0;
Expand All @@ -103,7 +103,7 @@ ancestor_matcher_print_state(ancestor_matcher_t *self, FILE *out)
int
ancestor_matcher_alloc(ancestor_matcher_t *self,
tree_sequence_builder_t *tree_sequence_builder, double *recombination_rate,
double *mismatch_rate, unsigned int precision, int flags)
double *mismatch_rate, double likelihood_threshold, int flags)
{
int ret = 0;
/* TODO make these input parameters. */
Expand All @@ -112,7 +112,7 @@ ancestor_matcher_alloc(ancestor_matcher_t *self,
memset(self, 0, sizeof(ancestor_matcher_t));
/* All allocs for arrays related to nodes are done in expand_nodes */
self->flags = flags;
self->precision = precision;
self->likelihood_threshold = likelihood_threshold;
self->max_nodes = 0;
self->tree_sequence_builder = tree_sequence_builder;
self->num_sites = tree_sequence_builder->num_sites;
Expand All @@ -132,7 +132,7 @@ ancestor_matcher_alloc(ancestor_matcher_t *self,
ret = TSI_ERR_NO_MEMORY;
goto out;
}
ret = tsk_blkalloc_init(&self->traceback_allocator, traceback_block_size);
ret = tsi_blkalloc_init(&self->traceback_allocator, traceback_block_size);
if (ret != 0) {
goto out;
}
Expand Down Expand Up @@ -165,7 +165,7 @@ ancestor_matcher_free(ancestor_matcher_t *self)
tsi_safe_free(self->output.left);
tsi_safe_free(self->output.right);
tsi_safe_free(self->output.parent);
tsk_blkalloc_free(&self->traceback_allocator);
tsi_blkalloc_free(&self->traceback_allocator);
return 0;
}

Expand Down Expand Up @@ -229,9 +229,9 @@ ancestor_matcher_store_traceback(ancestor_matcher_t *self, const tsk_id_t site_i
T[site_id].node = T[site_id - 1].node;
T[site_id].recombination_required = T[site_id - 1].recombination_required;
} else {
list_node = tsk_blkalloc_get(&self->traceback_allocator,
list_node = tsi_blkalloc_get(&self->traceback_allocator,
(size_t) num_likelihood_nodes * sizeof(tsk_id_t));
list_R = tsk_blkalloc_get(
list_R = tsi_blkalloc_get(
&self->traceback_allocator, (size_t) num_likelihood_nodes * sizeof(int8_t));
if (list_node == NULL || list_R == NULL) {
ret = TSI_ERR_NO_MEMORY;
Expand All @@ -257,12 +257,12 @@ static inline void
ancestor_matcher_set_allelic_state(
ancestor_matcher_t *self, const tsk_id_t site, allele_t *restrict allelic_state)
{
const tree_sequence_builder_t *tsb = self->tree_sequence_builder;
mutation_list_node_t *mutation;

/* FIXME assuming that 0 is always the ancestral state */
allelic_state[0] = 0;
allelic_state[0] = tsb->sites.ancestral_state[site];

for (mutation = self->tree_sequence_builder->sites.mutations[site]; mutation != NULL;
for (mutation = tsb->sites.mutations[site]; mutation != NULL;
mutation = mutation->next) {
allelic_state[mutation->node] = mutation->derived_state;
}
Expand Down Expand Up @@ -297,7 +297,8 @@ ancestor_matcher_update_site_likelihood_values(ancestor_matcher_t *self,
double max_L, p_last, p_no_recomb, p_recomb, p_t, p_e;
const double rho = self->recombination_rate[site];
const double mu = self->mismatch_rate[site];
const double n = (double) self->tree_sequence_builder->num_match_nodes;
/* const double n = (double) self->tree_sequence_builder->num_match_nodes; */
const double n = 1;
const double num_alleles
= (double) self->tree_sequence_builder->sites.num_alleles[site];

Expand Down Expand Up @@ -365,7 +366,7 @@ ancestor_matcher_update_site_likelihood_values(ancestor_matcher_t *self,
/* Renormalise the likelihoods. */
for (j = 0; j < num_likelihood_nodes; j++) {
u = L_nodes[j];
L[u] = tsk_round(L[u] / max_L, self->precision);
L[u] = TSK_MAX(L[u] / max_L, self->likelihood_threshold);
}
ancestor_matcher_unset_allelic_state(self, site, allelic_state);
out:
Expand Down Expand Up @@ -553,7 +554,7 @@ ancestor_matcher_reset(ancestor_matcher_t *self)
assert(self->num_nodes <= self->max_nodes);

memset(self->allelic_state, 0xff, self->num_nodes * sizeof(*self->allelic_state));
ret = tsk_blkalloc_reset(&self->traceback_allocator);
ret = tsi_blkalloc_reset(&self->traceback_allocator);
if (ret != 0) {
goto out;
}
Expand Down
Loading
Loading