Skip to content

Commit

Permalink
Initial version with basic match functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
buske committed Jan 5, 2016
1 parent ed074f8 commit abc78a7
Show file tree
Hide file tree
Showing 9 changed files with 1,209 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ __pycache__/
# Distribution / packaging
.Python
env/
.virtualenv/
build/
develop-eggs/
dist/
Expand Down
71 changes: 69 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,69 @@
# reference-server
A simple illustrative reference server for the Matchmaker Exchange API
# Matchmaker Exchange Reference Server
A simple illustrative reference server for the Matchmaker Exchange API.

The server is backed by elasticsearch, and creates local indexes of the Human Phenotype Ontology, Ensembl-Entrez-HGNC gene symbol mappings, and the MME API benchmark set of 50 rare disease patients.

## Dependencies
- Python 3.X (not yet tested on 2.7 but should be easy to get working)
- elasticsearch 2.X


## Quickstart

1. Start up a local elasticsearch cluster, for example:

```bash
$ wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.1.1/elasticsearch-2.1.1.tar.gz
$ tar -xzf elasticsearch-2.1.1.tar.gz
$ cd elasticsearch-2.1.1/
$ ./bin/elasticsearch
```

1. Set up your Python virtual environment and install necessary Python packages, for example:

```bash
$ virtualenv -p python3 --prompt="(mme-server)" .virtualenv
$ source .virtualenv/bin/activate
$ pip install -r requirements.txt
```

1. Download and index vocabularies and sample data:

```bash
$ python datastore.py
```

1. Run tests:

```bash
$ python test.py
```

1. Start up MME reference server:

```bash
$ python server.py
```

By default, the server listens globally (`--host 0.0.0.0`) on port 8000 (`--port 8000`).

1. Try it out:

```bash
$ curl -XPOST -d '{"patient":{ \
"id":"1", \
"contact": {"name":"Jane Doe", "href":"mailto:[email protected]"}, \
"features":[{"id":"HP:0000522"}], \
"genomicFeatures":[{"gene":{"id":"NGLY1"}}] \
}}' localhost:8000/match
```


## TODO
- Avoid costly/redundant parsing `api.Patient` objects when generating MatchResponse objects from patients in database
- Inspect `Accepts` header for API versioning
- Add `Content-Type` header to responses
- Handle errors with proper HTTP statuses and JSON message bodies
- Add tests for gene index
- Add end-to-end API query tests
- Add parser tests
178 changes: 178 additions & 0 deletions api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
"""
The API module:
Contains API methods and classes for API objects.
Handles parsing of API requests into API objects, and serializing API objects into API responses.
Also contains some code to help convert API objects to their database representations.
"""
from __future__ import with_statement, division, unicode_literals

import json

from datastore import DatastoreConnection


class Feature:
# Connection to backend to validate vocabulary terms
db = DatastoreConnection()

def __init__(self, data):
self._observed = data.get('observed', 'yes') == 'yes'
# TODO: parse ageOfOnset
self.term = self.db.get_vocabulary_term(data['id'])

def _get_implied_terms(self):
return self.term['term_category']

def _get_id(self):
return self.term['id']

@property
def observed(self):
return self._observed


class GenomicFeature:
# Connection to backend to validate vocabulary terms
db = DatastoreConnection()

def __init__(self, data):
self.term = None
gene_id = data.get('gene', {}).get('id')
# TODO: parse additional genomicFeature fields
if gene_id:
self.term = self.db.get_vocabulary_term(gene_id)

def _get_gene_id(self):
if self.term:
return self.term['id']


class Patient:
def __init__(self, data):
self.id = data['id']
self.contact = data['contact']
assert self.contact['name'] and self.contact['href']

features_json = data.get('features', [])
genomic_features_json = data.get('genomicFeatures', [])

assert features_json or genomic_features_json, "At least one of 'features' or 'genomicFeatures' must be provided"

# Parse phenotype terms
features = [Feature(feature_json) for feature_json in features_json]

# Parse genomic features
genomic_features = [GenomicFeature(gf_json) for gf_json in genomic_features_json]

assert features or genomic_features, "Was unable to parse any phenotype or gene terms"

disorders = data.get('disorders', [])
self.label = data.get('label')
self.age_of_onset = data.get('ageOfOnset')
self.features = features
self.genomic_features = genomic_features
self.disorders = disorders
self.test = data.get('test', False)

def _get_genes(self):
genes = set()
for genomic_feature in self.genomic_features:
gene_id = genomic_feature._get_gene_id()
if gene_id:
genes.add(gene_id)

return genes

def _get_present_phenotypes(self):
terms = set()
for feature in self.features:
if feature.observed:
terms.add(feature._get_id())

return terms

def _get_implied_present_phenotypes(self):
terms = set()
for feature in self.features:
if feature.observed:
terms.update(feature._get_implied_terms())

return terms

def to_json(self):
data = {
'id': self.id,
'contact': {
'name': self.contact['name'],
'href': self.contact['href'],
}
}

if self.label:
data['label'] = self.label

if self.age_of_onset:
data['ageOfOnset'] = self.age_of_onset

phenotype_ids = self._get_present_phenotypes()
if phenotype_ids:
data['features'] = [{'id': id} for id in phenotype_ids]

gene_ids = self._get_genes()
if gene_ids:
data['genomicFeatures'] = [{'gene': {'id': gene_id}} for gene_id in gene_ids]

if self.disorders:
data['disorders'] = self.disorders

if self.test:
data['test'] = True

return data


class MatchRequest:
def __init__(self, request):
self.patient = Patient(request['patient'])
self._data = request


class MatchResult:
def __init__(self, match, score):
self.match = match
self.score = score

def to_json(self):
response = {}
response['score'] = {'patient': self.score}
response['patient'] = self.match.to_json()
return response


def match(request, backend=None):
assert isinstance(request, MatchRequest), "Argument to match must be MatchResponse object"

if not backend:
backend = DatastoreConnection()

matches = []
# Unpack patient and query backend
patient = request.patient
for score, patient in backend.find_similar_patients(patient):
match = MatchResult(patient, score)
matches.append(match)

response = MatchResponse(matches)
return response


class MatchResponse:
def __init__(self, response):
self._data = response

def to_json(self):
response = {}
response['results'] = [match.to_json() for match in self._data]
return response
Loading

0 comments on commit abc78a7

Please sign in to comment.