diff --git a/.gitignore b/.gitignore
index ba74660..d520285 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ __pycache__/
 # Distribution / packaging
 .Python
 env/
+.virtualenv/
 build/
 develop-eggs/
 dist/
diff --git a/README.md b/README.md
index 4069408..6f34bb5 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,69 @@
-# reference-server
-A simple illustrative reference server for the Matchmaker Exchange API
+# Matchmaker Exchange Reference Server
+A simple illustrative reference server for the Matchmaker Exchange API.
+
+The server is backed by elasticsearch, and creates local indexes of the Human Phenotype Ontology, Ensembl-Entrez-HGNC gene symbol mappings, and the MME API benchmark set of 50 rare disease patients.
+
+## Dependencies
+- Python 3.X (not yet tested on 2.7 but should be easy to get working)
+- elasticsearch 2.X
+
+
+## Quickstart
+
+1. Start up a local elasticsearch cluster, for example:
+
+    ```bash
+    $ wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.1.1/elasticsearch-2.1.1.tar.gz
+    $ tar -xzf elasticsearch-2.1.1.tar.gz
+    $ cd elasticsearch-2.1.1/
+    $ ./bin/elasticsearch
+    ```
+
+1. Set up your Python virtual environment and install necessary Python packages, for example:
+
+    ```bash
+    $ virtualenv -p python3 --prompt="(mme-server)" .virtualenv
+    $ source .virtualenv/bin/activate
+    $ pip install -r requirements.txt
+    ```
+
+1. Download and index vocabularies and sample data:
+
+    ```bash
+    $ python datastore.py
+    ```
+
+1. Run tests:
+
+    ```bash
+    $ python test.py
+    ```
+
+1. Start up MME reference server:
+
+    ```bash
+    $ python server.py
+    ```
+
+    By default, the server listens globally (`--host 0.0.0.0`) on port 8000 (`--port 8000`).
+
+1. Try it out:
+
+    ```bash
+    $ curl -XPOST -d '{"patient":{ \
+      "id":"1", \
+      "contact": {"name":"Jane Doe", "href":"mailto:jdoe@example.edu"}, \
+      "features":[{"id":"HP:0000522"}], \
+      "genomicFeatures":[{"gene":{"id":"NGLY1"}}] \
+      }}' localhost:8000/match
+    ```
+
+
+## TODO
+- Avoid costly/redundant parsing `api.Patient` objects when generating MatchResponse objects from patients in database
+- Inspect `Accepts` header for API versioning
+- Add `Content-Type` header to responses
+- Handle errors with proper HTTP statuses and JSON message bodies
+- Add tests for gene index
+- Add end-to-end API query tests
+- Add parser tests
diff --git a/api.py b/api.py
new file mode 100644
index 0000000..cf65101
--- /dev/null
+++ b/api.py
@@ -0,0 +1,178 @@
+"""
+The API module:
+
+Contains API methods and classes for API objects.
+Handles parsing of API requests into API objects, and serializing API objects into API responses.
+
+Also contains some code to help convert API objects to their database representations.
+"""
+from __future__ import with_statement, division, unicode_literals
+
+import json
+
+from datastore import DatastoreConnection
+
+
+class Feature:
+    # Connection to backend to validate vocabulary terms
+    db = DatastoreConnection()
+
+    def __init__(self, data):
+        self._observed = data.get('observed', 'yes') == 'yes'
+        # TODO: parse ageOfOnset
+        self.term = self.db.get_vocabulary_term(data['id'])
+
+    def _get_implied_terms(self):
+        return self.term['term_category']
+
+    def _get_id(self):
+        return self.term['id']
+
+    @property
+    def observed(self):
+        return self._observed
+
+
+class GenomicFeature:
+    # Connection to backend to validate vocabulary terms
+    db = DatastoreConnection()
+
+    def __init__(self, data):
+        self.term = None
+        gene_id = data.get('gene', {}).get('id')
+        # TODO: parse additional genomicFeature fields
+        if gene_id:
+            self.term = self.db.get_vocabulary_term(gene_id)
+
+    def _get_gene_id(self):
+        if self.term:
+            return self.term['id']
+
+
+class Patient:
+    def __init__(self, data):
+        self.id = data['id']
+        self.contact = data['contact']
+        assert self.contact['name'] and self.contact['href']
+
+        features_json = data.get('features', [])
+        genomic_features_json = data.get('genomicFeatures', [])
+
+        assert features_json or genomic_features_json, "At least one of 'features' or 'genomicFeatures' must be provided"
+
+        # Parse phenotype terms
+        features = [Feature(feature_json) for feature_json in features_json]
+
+        # Parse genomic features
+        genomic_features = [GenomicFeature(gf_json) for gf_json in genomic_features_json]
+
+        assert features or genomic_features, "Was unable to parse any phenotype or gene terms"
+
+        disorders = data.get('disorders', [])
+        self.label = data.get('label')
+        self.age_of_onset = data.get('ageOfOnset')
+        self.features = features
+        self.genomic_features = genomic_features
+        self.disorders = disorders
+        self.test = data.get('test', False)
+
+    def _get_genes(self):
+        genes = set()
+        for genomic_feature in self.genomic_features:
+            gene_id = genomic_feature._get_gene_id()
+            if gene_id:
+                genes.add(gene_id)
+
+        return genes
+
+    def _get_present_phenotypes(self):
+        terms = set()
+        for feature in self.features:
+            if feature.observed:
+                terms.add(feature._get_id())
+
+        return terms
+
+    def _get_implied_present_phenotypes(self):
+        terms = set()
+        for feature in self.features:
+            if feature.observed:
+                terms.update(feature._get_implied_terms())
+
+        return terms
+
+    def to_json(self):
+        data = {
+            'id': self.id,
+            'contact': {
+                'name': self.contact['name'],
+                'href': self.contact['href'],
+            }
+        }
+
+        if self.label:
+            data['label'] = self.label
+
+        if self.age_of_onset:
+            data['ageOfOnset'] = self.age_of_onset
+
+        phenotype_ids = self._get_present_phenotypes()
+        if phenotype_ids:
+            data['features'] = [{'id': id} for id in phenotype_ids]
+
+        gene_ids = self._get_genes()
+        if gene_ids:
+            data['genomicFeatures'] = [{'gene': {'id': gene_id}} for gene_id in gene_ids]
+
+        if self.disorders:
+            data['disorders'] = self.disorders
+
+        if self.test:
+            data['test'] = True
+
+        return data
+
+
+class MatchRequest:
+    def __init__(self, request):
+        self.patient = Patient(request['patient'])
+        self._data = request
+
+
+class MatchResult:
+    def __init__(self, match, score):
+        self.match = match
+        self.score = score
+
+    def to_json(self):
+        response = {}
+        response['score'] = {'patient': self.score}
+        response['patient'] = self.match.to_json()
+        return response
+
+
+def match(request, backend=None):
+    assert isinstance(request, MatchRequest), "Argument to match must be MatchResponse object"
+
+    if not backend:
+        backend = DatastoreConnection()
+
+    matches = []
+    # Unpack patient and query backend
+    patient = request.patient
+    for score, patient in backend.find_similar_patients(patient):
+        match = MatchResult(patient, score)
+        matches.append(match)
+
+    response = MatchResponse(matches)
+    return response
+
+
+class MatchResponse:
+    def __init__(self, response):
+        self._data = response
+
+    def to_json(self):
+        response = {}
+        response['results'] = [match.to_json() for match in self._data]
+        return response
diff --git a/datastore.py b/datastore.py
new file mode 100644
index 0000000..69efff6
--- /dev/null
+++ b/datastore.py
@@ -0,0 +1,339 @@
+"""
+Module for interfacing with the backend database (elasticsearch).
+
+Stores:
+* Patient records (`patients` index)
+* Human Phenotype Ontology (HPO) (`hpo` index)
+* Ensembl-Entrez-HGNC-GeneSymbol mappings (`genes` index)
+"""
+
+from __future__ import with_statement, division, unicode_literals
+
+import sys
+import os
+import json
+import logging
+
+try:
+    from urllib import urlretrieve
+except ImportError:
+    from urllib.request import urlretrieve
+
+from elasticsearch import Elasticsearch
+from parsers import OBOParser, GeneParser
+
+logger = logging.getLogger(__name__)
+
+# Matchmaker Exchange benchmark dataset of 50 patients
+TEST_DATA_URL = 'https://raw.githubusercontent.com/ga4gh/mme-apis/hotfix/v1.0b/testing/benchmark_patients.json'
+DEFAULT_DATA_FILENAME = 'data.json'
+
+# Human Phenotype Ontology
+HPO_URL = 'http://purl.obolibrary.org/obo/hp.obo'
+DEFAULT_HPO_FILENAME = 'hp.obo'
+
+# Gene identifier mappings, from HGNC: www.genenames.org
+# Ensembl Genes 83 (GRCh38.p5)
+# Columns:
+#   HGNC ID
+#   Approved Symbol
+#   Approved Name
+#   Synonyms
+#   Entrez Gene ID (supplied by NCBI)
+#   Ensembl Gene ID (supplied by Ensembl)
+GENE_URL = 'http://www.genenames.org/cgi-bin/download?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_aliases&col=md_eg_id&col=md_ensembl_id&status=Approved&status_opt=2&where=&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit'
+DEFAULT_GENE_FILENAME = 'genes.tsv'
+
+
+class PatientManager:
+    TYPE_NAME = 'patient'
+    INDEX_CONFIG = {
+        'mappings': {
+            TYPE_NAME: {
+                '_all': {
+                    'enabled': False,
+                },
+                'properties': {
+                    'phenotype': {
+                        'type': 'string',
+                        'index': 'not_analyzed',
+                    },
+                    'gene': {
+                        'type': 'string',
+                        'index': 'not_analyzed',
+                    },
+                    'doc': {
+                        'type': 'object',
+                        'enabled': False,
+                        'include_in_all': False,
+                    }
+                }
+            }
+        }
+    }
+
+    def __init__(self, backend, index='patients'):
+        self._db = backend
+        self._index = index
+
+    def index(self, filename):
+        """Populate the database with patient data from the given file"""
+        from api import Patient
+
+        if self._db.indices.exists(index=self._index):
+            logging.warning("Patient index already exists: '{}'".format(self._index))
+            return
+        else:
+            logging.info("Creating patient ElasticSearch index: '{}'".format(self._index))
+            self._db.indices.create(index=self._index, body=self.INDEX_CONFIG)
+
+        with open(filename) as ifp:
+            data = json.load(ifp)
+
+        logging.info("Found data for {} patient records".format(len(data)))
+        for record in data:
+            patient = Patient(record)
+            self.add_patient(patient)
+
+        # Update index before returning record count
+        self._db.indices.refresh(index=self._index)
+        n = self._db.count(index=self._index, doc_type=self.TYPE_NAME)
+        logger.info('Datastore now contains {} records'.format(n['count']))
+
+    def add_patient(self, patient):
+        """Add the provided api.Patient object to the datastore"""
+        id = patient.id
+        data = self._patient_to_index(patient)
+        self._db.index(index=self._index, doc_type=self.TYPE_NAME, id=id, body=data)
+        logging.info("Indexed patient: '{}'".format(id))
+
+    def _patient_to_index(self, patient):
+        genes = patient._get_genes()
+        phenotypes = patient._get_implied_present_phenotypes()
+
+        return {
+            'phenotype': list(phenotypes),
+            'gene': list(genes),
+            'doc': patient.to_json(),
+        }
+
+    def find_similar_patients(self, patient, n=5):
+        """Return the n most similar patients to the given query api.Patient"""
+        from api import Patient
+
+        query_parts = []
+        for id in patient._get_implied_present_phenotypes():
+            query_parts.append({'match': {'phenotype': id}})
+
+        for gene_id in patient._get_genes():
+            query_parts.append({'match': {'gene': gene_id}})
+
+        query = {
+            'query': {
+                'bool': {
+                    'should': [
+                        query_parts
+                    ]
+                }
+            }
+        }
+
+        result = self._db.search(index=self._index, body=query)
+
+        scored_patients = []
+        for hit in result['hits']['hits'][:n]:
+            # Just use the ElasticSearch TF/IDF score, normalized to [0, 1]
+            score = 1 - 1 / (1 + hit['_score'])
+            scored_patients.append((score, Patient(hit['_source']['doc'])))
+
+        return scored_patients
+
+
+class VocabularyManager:
+    TERM_TYPE_NAME = 'term'
+    META_TYPE_NAME = 'meta'
+    INDEX_CONFIG = {
+        'mappings': {
+            TERM_TYPE_NAME: {
+                '_all': {
+                    'enabled': False,
+                },
+                'properties': {
+                    'id': {
+                        'type': 'string',
+                        'index': 'not_analyzed',
+                    },
+                    'name': {
+                        'type': 'string',
+                    },
+                    'synonym': {
+                        'type': 'string',
+                    },
+                    'alt_id': {
+                        'type': 'string',
+                        'index': 'not_analyzed',
+                    },
+                    'is_a': {
+                        'type': 'string',
+                        'index': 'not_analyzed',
+                    },
+                    'term_category': {
+                        'type': 'string',
+                        'index': 'not_analyzed',
+                    },
+                }
+            }
+        }
+    }
+
+    def __init__(self, backend):
+        self._db = backend
+
+    def index(self, index, filename, Parser):
+        """Index terms from the given file
+
+        :param index: the id of the index
+        :param filename: the path to the vocabulary file
+        :param Parser: the Parser class to use to parse the vocabulary file
+        """
+        parser = Parser(filename)
+
+        if self._db.indices.exists(index=index):
+            logging.warning('Vocabulary index already exists: {!r} ... skipping'.format(index))
+            return
+        else:
+            logging.info("Creating index: {!r}".format(index))
+            self._db.indices.create(index=index, body=self.INDEX_CONFIG)
+
+        logging.info("Parsing vocabulary from: {!r}".format(filename))
+        commands = []
+        for term in parser:
+            id = term['id']
+            command = [
+                {'index': {'_id': id}},
+                term,
+            ]
+            commands.extend(command)
+
+        data = "".join([json.dumps(command) + "\n" for command in commands])
+        self._db.bulk(data, index=index, doc_type=self.TERM_TYPE_NAME, refresh=True)
+
+        n = self._db.count(index=index, doc_type=self.TERM_TYPE_NAME)
+        logger.info('Index now contains {} terms'.format(n['count']))
+
+    def get_term(self, id, index='_all'):
+        """Get vocabulary term by ID"""
+        query = {
+            'query': {
+                'filtered': {
+                    'filter': {
+                        'bool': {
+                            'should': [
+                                {'term': {'id': id}},
+                                {'term': {'alt_id': id}},
+                            ]
+                        }
+                    }
+                }
+            }
+        }
+        results = self._db.search(index=index, doc_type=self.TERM_TYPE_NAME, body=query)
+        if results['hits']['total'] == 1:
+            return results['hits']['hits'][0]['_source']
+        else:
+            logger.error("Unable to uniquely resolve term: {!r}".format(id))
+
+
+class DatastoreConnection:
+    def __init__(self):
+        self._es = Elasticsearch()
+        self._patients = PatientManager(self)
+        self._vocabularies = VocabularyManager(self)
+
+    def index_patients(self, filename):
+        return self._patients.index(filename)
+
+    def index_hpo(self, filename):
+        return self._vocabularies.index(index='hpo', filename=filename, Parser=OBOParser)
+
+    def index_genes(self, filename):
+        return self._vocabularies.index(index='genes', filename=filename, Parser=GeneParser)
+
+    def get_vocabulary_term(self, id, index='_all'):
+        return self._vocabularies.get_term(id, index=index)
+
+    def find_similar_patients(self, patient, n=5):
+        """Return the n most similar patients to the given query api.Patient"""
+        return self._patients.find_similar_patients(patient=patient, n=n)
+
+    def search(self, *args, **kwargs):
+        """Expose ElasticSearch method"""
+        return self._es.search(*args, **kwargs)
+
+    def bulk(self, *args, **kwargs):
+        """Expose ElasticSearch method"""
+        return self._es.bulk(*args, **kwargs)
+
+    def index(self, *args, **kwargs):
+        """Expose ElasticSearch method"""
+        return self._es.index(*args, **kwargs)
+
+    def count(self, *args, **kwargs):
+        """Expose ElasticSearch method"""
+        return self._es.count(*args, **kwargs)
+
+    @property
+    def indices(self):
+        """Expose ElasticSearch property"""
+        return self._es.indices
+
+
+def initialize_backend(data_filename, hpo_filename, gene_filename):
+    backend = DatastoreConnection()
+    backend.index_hpo(hpo_filename)
+    backend.index_genes(gene_filename)
+    # Patients must be indexed AFTER vocabularies
+    backend.index_patients(data_filename)
+
+
+def fetch_resource(url, filename):
+    if os.path.isfile(filename):
+        logger.info('Found local resource: {}'.format(filename))
+    else:
+        logger.info('Downloading file from: {}'.format(url))
+        urlretrieve(url, filename)
+        logger.info('Saved file to: {}'.format(filename))
+
+
+def parse_args(args):
+    from argparse import ArgumentParser
+
+    description = "Initialize datastore with example data and necessary vocabularies"
+
+    parser = ArgumentParser(description=description)
+    parser.add_argument("--data-file", default=DEFAULT_DATA_FILENAME,
+                        dest="data_filename", metavar="FILE",
+                        help="Load data from the following file (will download test data if file does not exist)")
+    parser.add_argument("--hpo-file", default=DEFAULT_HPO_FILENAME,
+                        dest="hpo_filename", metavar="FILE",
+                        help="Load HPO from the following file (will download if file does not exist)")
+    parser.add_argument("--gene-file", default=DEFAULT_GENE_FILENAME,
+                        dest="gene_filename", metavar="FILE",
+                        help="Load gene mappings from the following file (will download if file does not exist)")
+
+    return parser.parse_args(args)
+
+
+def main(args=sys.argv[1:]):
+    args = parse_args(args)
+    logging.basicConfig(level='INFO')
+
+    fetch_resource(TEST_DATA_URL, args.data_filename)
+    fetch_resource(HPO_URL, args.hpo_filename)
+    fetch_resource(GENE_URL, args.gene_filename)
+
+    initialize_backend(args.data_filename, args.hpo_filename, args.gene_filename)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/obo.py b/obo.py
new file mode 100644
index 0000000..ecb4276
--- /dev/null
+++ b/obo.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+"""
+Downloaded from: https://github.com/ntamas/gfam/blob/master/gfam/go/obo.py
+
+A very simple and not 100% compliant parser for the OBO file format.
+
+This parser is supplied "as is"; it is not an official parser, it
+might puke on perfectly valid OBO files, it might parse perfectly
+invalid OBO files, it might steal your kitten or set your garden shed
+on fire. Apart from that, it should be working, or at least
+it should be in a suitable condition to parse the Gene Ontology,
+which is my only test case anyway.
+
+Usage example::
+
+    from obo import Parser
+    parser = Parser(open("hp.obo"))
+    hp_ontology = {}
+    for stanza in parser:
+        hp_ontology[stanza.tags["id"][0]] = stanza.tags
+"""
+
+__author__ = "Tamas Nepusz"
+__email__ = "tamas@cs.rhul.ac.uk"
+__copyright__ = "Copyright (c) 2009, Tamas Nepusz"
+__license__ = "MIT"
+__version__ = "0.1"
+
+__all__ = ["ParseError", "Stanza", "Parser", "Value"]
+
+try:
+    from io import StringIO
+except ImportError:
+    from cStringIO import StringIO
+
+import re
+import tokenize
+import logging
+
+
+class ParseError(Exception):
+    pass
+
+
+# pylint:disable-msg=R0903
+# R0903: too few public methods
+class Value(object):
+    """Class representing a value and its modifiers in the OBO file
+
+    This class has two member variables. `value` is the value itself,
+    `modifiers` are the corresponding modifiers in a tuple. Currently
+    the modifiers are not parsed in any way, but this might change in
+    the future.
+    """
+
+    __slots__ = ["value", "modifiers"]
+
+    def __init__(self, value, modifiers=()):
+        self.value = str(value)
+        if modifiers:
+            self.modifiers = tuple(modifiers)
+        else:
+            self.modifiers = None
+
+    def __str__(self):
+        """Returns the value itself (without modifiers)"""
+        return str(self.value)
+
+    def __repr__(self):
+        """Returns a Python representation of this object"""
+        return "{:s}({!r}, {!r})".format(self.__class__.__name__,
+                                         self.value, self.modifiers)
+
+
+# pylint:disable-msg=R0903
+# R0903: too few public methods
+class Stanza(object):
+    """Class representing an OBO stanza.
+
+    An OBO stanza looks like this::
+
+      [name]
+      tag: value
+      tag: value
+      tag: value
+
+    Values may optionally have modifiers, see the OBO specification
+    for more details. This class stores the stanza name in the
+    `name` member variable and the tags and values in a Python
+    dict called `tags`. Given a valid stanza, you can do stuff like
+    this:
+
+      >>> stanza.name
+      "Term"
+      >>> stanza.tags["id"]
+      ['GO:0015036']
+      >>> stanza.tags["name"]
+      ['disulfide oxidoreductase activity']
+
+    Note that the `tags` dict contains lists associated to each
+    tag name. This is because theoretically there could be more than
+    a single value associated to a tag in the OBO file format.
+    """
+
+    __slots__ = ["name", "tags"]
+
+    def __init__(self, name, tags=None):
+        self.name = name
+        if tags:
+            self.tags = dict(tags)
+        else:
+            self.tags = dict()
+
+    def __repr__(self):
+        """Returns a Python representation of this object"""
+        return "{:s}({!r}, {!r})".format(self.__class__.__name__,
+                                         self.name, self.tags)
+
+
+# pylint:disable-msg=R0903
+# R0903: too few public methods
+class Parser(object):
+    """The main attraction, the OBO parser."""
+
+    def __init__(self, file_handle):
+        """Creates an OBO parser that reads the given file-like object.
+        If you want to create a parser that reads an OBO file, do this:
+
+          >>> import obo
+          >>> parser = obo.Parser(open("hp.obo"))
+
+        Only the headers are read when creating the parser. You can
+        access these right after construction as follows:
+
+          >>> parser.headers["format-version"]
+          ['1.2']
+
+        To read the stanzas in the file, you must iterate over the
+        parser as if it were a list. The iterator yields `Stanza`
+        objects.
+        """
+        self.file_handle = file_handle
+        self.line_re = re.compile(r"\s*(?P<tag>[^:]+):\s*(?P<value>.*)")
+        self.lineno = 0
+        self.headers = {}
+        self._extra_line = None
+        self._read_headers()
+
+    def _lines(self):
+        """Iterates over the lines of the file, removing
+        comments and trailing newlines and merging multi-line
+        tag-value pairs into a single line"""
+        while True:
+            self.lineno += 1
+            line = self.file_handle.readline()
+            if not line:
+                break
+
+            line = line.strip()
+            if not line:
+                yield line
+                continue
+
+            if line[0] == '!':
+                continue
+            if line[-1] == '\\':
+                # This line is continued in the next line
+                lines = [line[:-1]]
+                finished = False
+                while not finished:
+                    self.lineno += 1
+                    line = self.file_handle.readline()
+                    if line[0] == '!':
+                        continue
+                    line = line.strip()
+                    if line[-1] == '\\':
+                        lines.append(line[:-1])
+                    else:
+                        lines.append(line)
+                        finished = True
+                line = " ".join(lines)
+            else:
+                in_quotes = False
+                escape = False
+                comment_char_index = None
+                for index, char in enumerate(line):
+                    if escape:
+                        escape = False
+                        continue
+                    if char == '"':
+                        in_quotes = not in_quotes
+                    elif char == '\\' and in_quotes:
+                        escape = True
+                    elif char == '!' and not in_quotes:
+                        comment_char_index = index
+                        break
+                if comment_char_index is not None:
+                    line = line[0:comment_char_index].strip()
+
+            yield line
+
+    def _parse_line(self, line):
+        """Parses a single line consisting of a tag-value pair
+        and optional modifiers. Returns the tag name and the
+        value as a `Value` object."""
+        match = self.line_re.match(line)
+        if not match:
+            return False
+        tag, value_and_mod = match.group("tag"), match.group("value")
+
+        # If the value starts with a quotation mark, we parse it as a
+        # Python string -- luckily this is the same as an OBO string
+        if value_and_mod and value_and_mod[0] == '"':
+            gen = tokenize.generate_tokens(StringIO(value_and_mod).readline)
+            for toknum, tokval, _, (_, ecol), _ in gen:
+                if toknum == tokenize.STRING:
+                    value = eval(tokval)
+                    mod = (value_and_mod[ecol:].strip(),)
+                    break
+                raise ParseError("cannot parse string literal", self.lineno)
+        else:
+            value = value_and_mod
+            mod = None
+
+        value = Value(value, mod)
+        return tag, value
+
+    def _read_headers(self):
+        """Reads the headers from the OBO file"""
+        for line in self._lines():
+            if not line or line[0] == '[':
+                # We have reached the end of headers
+                self._extra_line = line
+                return
+            key, value = self._parse_line(line)
+            try:
+                self.headers[key].append(value.value)
+            except KeyError:
+                self.headers[key] = [value.value]
+
+    def stanzas(self):
+        """Iterates over the stanzas in this OBO file,
+        yielding a `Stanza` object for each stanza."""
+        stanza = None
+        if self._extra_line and self._extra_line[0] == '[':
+            stanza = Stanza(self._extra_line[1:-1])
+        for line in self._lines():
+            if not line:
+                continue
+            if line[0] == '[':
+                if stanza:
+                    yield stanza
+                stanza = Stanza(line[1:-1])
+                continue
+            tag, value = self._parse_line(line)
+            try:
+                stanza.tags[tag].append(value)
+            except KeyError:
+                stanza.tags[tag] = [value]
+
+    def __iter__(self):
+        return self.stanzas()
+
+
+def test():
+    """Simple smoke testing for the OBO parser"""
+    logging.basicConfig(level='INFO')
+    parser = Parser("hp.obo")
+    for i, _ in enumerate(parser):
+        if i % 1000 == 0:
+            logging.info("{:d} stanzas processed".format(i))
+    logging.info("Parsing successful, {:d} stanzas".format(i))
+
+
+if __name__ == "__main__":
+    import sys
+
+    sys.exit(test())
diff --git a/parsers.py b/parsers.py
new file mode 100644
index 0000000..c667269
--- /dev/null
+++ b/parsers.py
@@ -0,0 +1,138 @@
+"""
+Module for providing file and dataset parsing functionality.
+"""
+
+from __future__ import with_statement, division, unicode_literals
+
+from csv import DictReader
+from collections import defaultdict
+
+from obo import Parser as BaseOBOParser
+
+
+class BaseParser:
+    def __init__(self, filename):
+        self._filename = filename
+
+    def documents(self):
+        raise NotImplementedError()
+
+    def __iter__(self):
+        return self.documents()
+
+
+class OBOParser(BaseParser):
+    def documents(self):
+        parser = BaseOBOParser(open(self._filename))
+
+        # Parse all terms first
+        terms = {}
+
+        def get_tag_strings(stanza, tag):
+            return list(map(str, stanza.tags.get(tag, [])))
+
+        for stanza in parser:
+            id = str(stanza.tags['id'][0])
+            name = str(stanza.tags['name'][0])
+            alt_id = get_tag_strings(stanza, 'alt_id')
+            synonym = get_tag_strings(stanza, 'synonym')
+            is_a = get_tag_strings(stanza, 'is_a')
+            is_obsolete = get_tag_strings(stanza, 'is_obsolete')
+            # Skip obsolete terms
+            if is_obsolete and 'true' in is_obsolete:
+                continue
+
+            terms[id] = {
+                'id': id,
+                'name': name,
+                'synonym': synonym,
+                'alt_id': alt_id,
+                'is_a': is_a,
+                'term_category': [],  # Added later
+            }
+
+        # Then compute ancestor paths for each term
+        def get_ancestors(node_id, ancestors=None):
+            if ancestors is None:
+                ancestors = set()
+            ancestors.add(node_id)
+            for parent_id in terms[node_id]['is_a']:
+                get_ancestors(parent_id, ancestors)
+            return ancestors
+
+        for id in terms:
+            term = terms[id]
+            term['term_category'] = list(get_ancestors(id))
+
+            yield term
+
+
+class TSVParser(BaseParser):
+    def _documents(self, columns):
+        with open(self._filename) as ifp:
+            reader = DictReader(ifp, delimiter='\t')
+            for row in reader:
+                term = defaultdict(list)
+                for column in columns:
+                    key = column['column']
+                    field = column['field']
+                    prefix = column.get('prefix')
+                    delimiter = column.get('delimiter')
+                    length = column.get('length')
+
+                    value = row[key]
+                    # Split multivalued fields
+                    if delimiter:
+                        values = value.split(delimiter)
+                    else:
+                        values = [value]
+
+                    # Ensure all lengths are correct
+                    if length:
+                        for value in values:
+                            assert len(value) == length or not value
+
+                    # Prepend prefix to all values
+                    if prefix:
+                        values = ['{}:{}'.format(prefix, value) for value in values]
+
+                    term[field].extend(values)
+
+                # ElasticSearch does not allow id field to be singleton list
+                assert len(term['id']) == 1
+                term['id'] = term['id'][0]
+                yield term
+
+
+class GeneParser(TSVParser):
+    def documents(self):
+        columns = [
+            {
+                'column': 'Ensembl ID(supplied by Ensembl)',
+                'field': 'id',
+                'length': 15,
+            },
+            {
+                'column': 'Approved Name',
+                'field': 'name',
+            },
+            {
+                'column': 'Approved Symbol',
+                'field': 'alt_id',
+            },
+            {
+                'column': 'Synonyms',
+                'field': 'alt_id',
+                'delimiter': ', ',
+            },
+            {
+                'column': 'Entrez Gene ID(supplied by NCBI)',
+                'field': 'alt_id',
+                'prefix': 'NCBIGene',
+            },
+            {
+                'column': 'HGNC ID',
+                'field': 'alt_id',
+            },
+        ]
+        return TSVParser._documents(self, columns)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..d513307
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+Flask==0.10.1
+elasticsearch>=2.0.0,<3.0.0
diff --git a/server.py b/server.py
new file mode 100644
index 0000000..917e8a9
--- /dev/null
+++ b/server.py
@@ -0,0 +1,67 @@
+"""
+Usage: python server.py
+
+This is a minimum working example of a Matchmaker Exchange server.
+It is intended strictly as a useful reference, and should not be
+used in a production setting.
+"""
+
+from __future__ import with_statement, division, unicode_literals
+
+import sys
+import logging
+import json
+
+import api
+from flask import Flask, request, make_response, after_this_request
+
+DEFAULT_HOST = '0.0.0.0'
+DEFAULT_PORT = 8000
+API_MIME_TYPE = 'application/vnd.ga4gh.matchmaker.v1.0+json'
+
+# Global flask application
+app = Flask(__name__)
+# Logger
+logger = logging.getLogger(__name__)
+
+
+# def add_content_type_header(response):
+#     response.headers['Content-Type'] = API_MIME_TYPE
+
+@app.route('/match', methods=['POST'])
+def match():
+    """Return patients similar to the query patient"""
+    logging.info("Getting flask request data")
+    data = request.get_json(force=True)
+    logging.info("Parsing query")
+    query = api.MatchRequest(data)
+    logging.info("Finding similar patients")
+    matches = api.match(query)
+    logging.info("Serializing response")
+    return (json.dumps(matches.to_json()), 200, {})
+
+
+def parse_args(args):
+    from argparse import ArgumentParser
+
+    description = __doc__.strip()
+
+    parser = ArgumentParser(description=description)
+    parser.add_argument("-p", "--port", default=DEFAULT_PORT,
+                        dest="port", type=int, metavar="PORT",
+                        help="The port the server will listen on")
+    parser.add_argument("--host", default=DEFAULT_HOST,
+                        dest="host", metavar="IP",
+                        help="The host the server will listen to (0.0.0.0 to listen globally)")
+
+    return parser.parse_args(args)
+
+
+def main(args=sys.argv[1:]):
+    args = parse_args(args)
+    logging.basicConfig(level='INFO')
+    app.run(host=args.host, port=args.port)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..b98337d
--- /dev/null
+++ b/test.py
@@ -0,0 +1,137 @@
+import unittest
+from unittest import TestCase
+
+import api
+from elasticsearch import Elasticsearch
+from datastore import DatastoreConnection
+
+class ElasticSearchTests(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.es = Elasticsearch()
+
+    # Unittest test backwards compatibility to Python 2.X
+    try:
+        assertCountEqual = TestCase.assertCountEqual
+    except AttributeError:
+        assertCountEqual = TestCase.assertItemsEqual
+
+    def test_patient_indexed(self):
+        record = self.es.get(index='patients', id='P0001135')
+        self.assertTrue(record['found'])
+        self.assertCountEqual(record['_source']['gene'], ['ENSG00000151092'])  # NGLY1
+
+    def test_hpo_indexed(self):
+        term = self.es.get(index='hpo', id='HP:0000252')
+        self.assertTrue(term['found'])
+        doc = term['_source']
+        self.assertEqual(doc['name'], 'Microcephaly')
+        self.assertAlmostEqual(len(doc['alt_id']), 4, delta=1)
+        self.assertIn('small head', doc['synonym'])
+        self.assertCountEqual(doc['is_a'], ['HP:0040195', 'HP:0007364'])
+        self.assertAlmostEqual(len(doc['term_category']), 19, delta=2)
+
+    def test_gene_filter(self):
+        query = {
+            'query': {
+                'filtered': {
+                    'filter': {
+                        'term': {
+                            'gene': 'ENSG00000151092',  # NGLY1
+                        }
+                    }
+                }
+            }
+        }
+        results = self.es.search(index='patients', body=query)
+        self.assertEqual(results['hits']['total'], 8, "Expected 8 cases with NGLY1 gene")
+
+    def test_phenotype_filter(self):
+        query = {
+            'query': {
+                'filtered': {
+                    'filter': {
+                        'term': {
+                            'phenotype': 'HP:0000118'
+                        }
+                    }
+                }
+            }
+        }
+        results = self.es.search(index='patients', body=query)
+        self.assertEqual(results['hits']['total'], 50, "Expected 50 cases with some phenotypic abnormality")
+
+    def test_fuzzy_search(self):
+        query = {
+            'query': {
+                'bool': {
+                    'should': [
+                        {'match': {'phenotype': 'HP:0000252'}},  # Microcephaly
+                        {'match': {'phenotype': 'HP:0000522'}},  # Alacrima
+                        {'match': {'gene': 'NGLY1'}},
+                    ]
+                }
+            }
+        }
+        results = self.es.search(index='patients', body=query)
+        self.assertEqual(results['hits']['hits'][0]['_id'], 'P0001070')
+
+
+class DatastoreTests(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.backend = DatastoreConnection()
+
+    def test_get_term(self):
+        # Lookup term using alias
+        term = self.backend.get_vocabulary_term('HP:0001366')
+
+        self.assertEqual(term['id'], 'HP:0000252')
+        self.assertEqual(term['name'], 'Microcephaly')
+        self.assertEqual(len(term['is_a']), 2)
+        self.assertAlmostEqual(len(term['term_category']), 20, delta=5)
+
+
+class MatchRequestTests(TestCase):
+    def setUp(self):
+        self._request_template = {
+            'patient': {
+                'id': '1',
+                'contact': {
+                    'name': 'First Last',
+                    'institution': 'Contact Institution',
+                    'href': 'first.last@example.com',
+                },
+                'features': [],
+                'genomicFeatures': [],
+            }
+        }
+
+    def test_gene_symbol_match(self):
+        data = self._request_template
+        data['patient']['features'].extend([
+            {
+                'id': 'HP:0000252',
+                'label': 'Microcephaly',
+            },
+            {
+                'id': 'HP:0000522',
+                'label': 'Alacrima',
+            },
+        ])
+        data['patient']['genomicFeatures'].append({
+            'gene': {
+                'id': 'NGLY1'
+            }
+        })
+
+        request = api.MatchRequest(data)
+        response = api.match(request)
+
+        self.assertTrue(isinstance(response, api.MatchResponse))
+        results = response.to_json()['results']
+        self.assertTrue(results)
+
+
+if __name__ == '__main__':
+    unittest.main()