-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial version with basic match functionality
- Loading branch information
Showing
9 changed files
with
1,209 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ __pycache__/ | |
# Distribution / packaging | ||
.Python | ||
env/ | ||
.virtualenv/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,69 @@ | ||
# reference-server | ||
A simple illustrative reference server for the Matchmaker Exchange API | ||
# Matchmaker Exchange Reference Server | ||
A simple illustrative reference server for the Matchmaker Exchange API. | ||
|
||
The server is backed by elasticsearch, and creates local indexes of the Human Phenotype Ontology, Ensembl-Entrez-HGNC gene symbol mappings, and the MME API benchmark set of 50 rare disease patients. | ||
|
||
## Dependencies | ||
- Python 3.X (not yet tested on 2.7 but should be easy to get working) | ||
- elasticsearch 2.X | ||
|
||
|
||
## Quickstart | ||
|
||
1. Start up a local elasticsearch cluster, for example: | ||
|
||
```bash | ||
$ wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.1.1/elasticsearch-2.1.1.tar.gz | ||
$ tar -xzf elasticsearch-2.1.1.tar.gz | ||
$ cd elasticsearch-2.1.1/ | ||
$ ./bin/elasticsearch | ||
``` | ||
|
||
1. Set up your Python virtual environment and install necessary Python packages, for example: | ||
|
||
```bash | ||
$ virtualenv -p python3 --prompt="(mme-server)" .virtualenv | ||
$ source .virtualenv/bin/activate | ||
$ pip install -r requirements.txt | ||
``` | ||
|
||
1. Download and index vocabularies and sample data: | ||
|
||
```bash | ||
$ python datastore.py | ||
``` | ||
|
||
1. Run tests: | ||
|
||
```bash | ||
$ python test.py | ||
``` | ||
|
||
1. Start up MME reference server: | ||
|
||
```bash | ||
$ python server.py | ||
``` | ||
|
||
By default, the server listens globally (`--host 0.0.0.0`) on port 8000 (`--port 8000`). | ||
|
||
1. Try it out: | ||
|
||
```bash | ||
$ curl -XPOST -d '{"patient":{ \ | ||
"id":"1", \ | ||
"contact": {"name":"Jane Doe", "href":"mailto:[email protected]"}, \ | ||
"features":[{"id":"HP:0000522"}], \ | ||
"genomicFeatures":[{"gene":{"id":"NGLY1"}}] \ | ||
}}' localhost:8000/match | ||
``` | ||
|
||
|
||
## TODO | ||
- Avoid costly/redundant parsing `api.Patient` objects when generating MatchResponse objects from patients in database | ||
- Inspect `Accepts` header for API versioning | ||
- Add `Content-Type` header to responses | ||
- Handle errors with proper HTTP statuses and JSON message bodies | ||
- Add tests for gene index | ||
- Add end-to-end API query tests | ||
- Add parser tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
""" | ||
The API module: | ||
Contains API methods and classes for API objects. | ||
Handles parsing of API requests into API objects, and serializing API objects into API responses. | ||
Also contains some code to help convert API objects to their database representations. | ||
""" | ||
from __future__ import with_statement, division, unicode_literals | ||
|
||
import json | ||
|
||
from datastore import DatastoreConnection | ||
|
||
|
||
class Feature: | ||
# Connection to backend to validate vocabulary terms | ||
db = DatastoreConnection() | ||
|
||
def __init__(self, data): | ||
self._observed = data.get('observed', 'yes') == 'yes' | ||
# TODO: parse ageOfOnset | ||
self.term = self.db.get_vocabulary_term(data['id']) | ||
|
||
def _get_implied_terms(self): | ||
return self.term['term_category'] | ||
|
||
def _get_id(self): | ||
return self.term['id'] | ||
|
||
@property | ||
def observed(self): | ||
return self._observed | ||
|
||
|
||
class GenomicFeature: | ||
# Connection to backend to validate vocabulary terms | ||
db = DatastoreConnection() | ||
|
||
def __init__(self, data): | ||
self.term = None | ||
gene_id = data.get('gene', {}).get('id') | ||
# TODO: parse additional genomicFeature fields | ||
if gene_id: | ||
self.term = self.db.get_vocabulary_term(gene_id) | ||
|
||
def _get_gene_id(self): | ||
if self.term: | ||
return self.term['id'] | ||
|
||
|
||
class Patient: | ||
def __init__(self, data): | ||
self.id = data['id'] | ||
self.contact = data['contact'] | ||
assert self.contact['name'] and self.contact['href'] | ||
|
||
features_json = data.get('features', []) | ||
genomic_features_json = data.get('genomicFeatures', []) | ||
|
||
assert features_json or genomic_features_json, "At least one of 'features' or 'genomicFeatures' must be provided" | ||
|
||
# Parse phenotype terms | ||
features = [Feature(feature_json) for feature_json in features_json] | ||
|
||
# Parse genomic features | ||
genomic_features = [GenomicFeature(gf_json) for gf_json in genomic_features_json] | ||
|
||
assert features or genomic_features, "Was unable to parse any phenotype or gene terms" | ||
|
||
disorders = data.get('disorders', []) | ||
self.label = data.get('label') | ||
self.age_of_onset = data.get('ageOfOnset') | ||
self.features = features | ||
self.genomic_features = genomic_features | ||
self.disorders = disorders | ||
self.test = data.get('test', False) | ||
|
||
def _get_genes(self): | ||
genes = set() | ||
for genomic_feature in self.genomic_features: | ||
gene_id = genomic_feature._get_gene_id() | ||
if gene_id: | ||
genes.add(gene_id) | ||
|
||
return genes | ||
|
||
def _get_present_phenotypes(self): | ||
terms = set() | ||
for feature in self.features: | ||
if feature.observed: | ||
terms.add(feature._get_id()) | ||
|
||
return terms | ||
|
||
def _get_implied_present_phenotypes(self): | ||
terms = set() | ||
for feature in self.features: | ||
if feature.observed: | ||
terms.update(feature._get_implied_terms()) | ||
|
||
return terms | ||
|
||
def to_json(self): | ||
data = { | ||
'id': self.id, | ||
'contact': { | ||
'name': self.contact['name'], | ||
'href': self.contact['href'], | ||
} | ||
} | ||
|
||
if self.label: | ||
data['label'] = self.label | ||
|
||
if self.age_of_onset: | ||
data['ageOfOnset'] = self.age_of_onset | ||
|
||
phenotype_ids = self._get_present_phenotypes() | ||
if phenotype_ids: | ||
data['features'] = [{'id': id} for id in phenotype_ids] | ||
|
||
gene_ids = self._get_genes() | ||
if gene_ids: | ||
data['genomicFeatures'] = [{'gene': {'id': gene_id}} for gene_id in gene_ids] | ||
|
||
if self.disorders: | ||
data['disorders'] = self.disorders | ||
|
||
if self.test: | ||
data['test'] = True | ||
|
||
return data | ||
|
||
|
||
class MatchRequest: | ||
def __init__(self, request): | ||
self.patient = Patient(request['patient']) | ||
self._data = request | ||
|
||
|
||
class MatchResult: | ||
def __init__(self, match, score): | ||
self.match = match | ||
self.score = score | ||
|
||
def to_json(self): | ||
response = {} | ||
response['score'] = {'patient': self.score} | ||
response['patient'] = self.match.to_json() | ||
return response | ||
|
||
|
||
def match(request, backend=None): | ||
assert isinstance(request, MatchRequest), "Argument to match must be MatchResponse object" | ||
|
||
if not backend: | ||
backend = DatastoreConnection() | ||
|
||
matches = [] | ||
# Unpack patient and query backend | ||
patient = request.patient | ||
for score, patient in backend.find_similar_patients(patient): | ||
match = MatchResult(patient, score) | ||
matches.append(match) | ||
|
||
response = MatchResponse(matches) | ||
return response | ||
|
||
|
||
class MatchResponse: | ||
def __init__(self, response): | ||
self._data = response | ||
|
||
def to_json(self): | ||
response = {} | ||
response['results'] = [match.to_json() for match in self._data] | ||
return response |
Oops, something went wrong.