From 3c418ff011385108b0e1d1e902158878daf75bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Brand=C3=A3o?= <555migalves555@gmail.com> Date: Thu, 20 Jul 2023 16:52:12 +0100 Subject: [PATCH 1/5] documentation for nlp annotators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miguel Brandão <555migalves555@gmail.com> --- .../examples/dummy_nlp_annotator/README.MD | 168 ++++++++++ .../simple_geo_nlp_annotator/README.MD | 310 ++++++++++++++++++ 2 files changed, 478 insertions(+) create mode 100644 deepsearch/model/examples/dummy_nlp_annotator/README.MD create mode 100644 deepsearch/model/examples/simple_geo_nlp_annotator/README.MD diff --git a/deepsearch/model/examples/dummy_nlp_annotator/README.MD b/deepsearch/model/examples/dummy_nlp_annotator/README.MD new file mode 100644 index 00000000..c0b86f6b --- /dev/null +++ b/deepsearch/model/examples/dummy_nlp_annotator/README.MD @@ -0,0 +1,168 @@ +# DummyNlpAnnotator +## Introduction +This is an example dummy NLP kind annotator it supports text data and annotates entities. + +## Running the Annotator +To run this example make sure you've installed the full environment including the optional installs provided in poetry + + poetry install --all-extras + +Then simply start the server with + + python -m deepsearch.model.examples.dummy_nlp_annotator.main + +## Simple Interaction with the Annotator + +You can direcly access the API via a browser to the provided url on the console upon running the application, usually: + + http://127.0.0.1:8000 +This will take you to the landing page. Here you will likely find that you are not authenticated, however you can still check if the API is responsive by accessing the /health endpoint + + http://127.0.0.1:8000/health +It will be easier to interact with the application via the provided documentation endpoint + + http://127.0.0.1:8000/docs + +## Security +By default, the API requires an API-key to be used with every request to most endpoints, this key is defined on: + + deepsearch/model/examples/dummy_nlp_annotator/main.py +this API key must be provided on the authorization header, sample request headers to /: + + {'host': '127.0.0.1:8000', 'connection': 'keep-alive', 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', 'accept': 'application/json', 'sec-ch-ua-mobile': '?0', 'authorization': 'example123', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 'sec-ch-ua-platform': '"Linux"', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'http://127.0.0.1:8000/docs', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.9'} + +## Advanced Interaction with the Annotator +On the /docs endpoint after inserting the api key you may see the following information about the API server + +on endpoint: + + - / - A list of all the annotators hosted on this server, in this example you will find only "DummyNLPAnnotator" on each annotator you will find its annotation capabilities as well as the kind of annotator it is (NLPAnnotator) which in turn tells you how to make requests to the annotator + - /model/{model_name} - You will find the annotation capabilities for the given annotator as well as it's kind. + - /model/{model_name}/predict - You can make POST requests to have the model annotate your data, refer to [Sample Requests](#Sample-Requests) + +## Sample Requests + +```python + { + "apiVersion": "string", + "kind": "NLPModel", + "metadata": { + "annotations": { + "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id": "string", + "deepsearch.res.ibm.com/x-attempt-number": "string", + "deepsearch.res.ibm.com/x-max-attempts": "string" + } + }, + "spec": { + "findEntities": { + "entityNames": ["entity_foo", "entity_bar"], + "objectType": "text", + "texts": [ + "A piece of text", + "Yet another piece of text" + ] + } + } + } +``` + + - You may alter entityNames to have any number of the entity types the annotator declares it can annotate, or an empty list to annotate all. + - This annotator has declared that it can only annotate text, as such the objectType must be text + - texts may be as long or as short as you need it. + - The x-deadline must lie some time in the future + - This annotator has declared that it is of kind NLPModel as such the kind for the request must match + - refer to the /docs for details on the NLPRequest type + +Will result in the following output: + +```python +{ + "entities":[ + { + "entity_foo":[ + { + "type":"entity_foo", + "match":"a 'entity_foo' match in 'A piece of text'", + "original":"a 'entity_foo' original in 'A piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_foo", + "match":"another 'entity_foo' match in 'A piece of text'", + "original":"another 'entity_foo' original in 'A piece of text'", + "range":[ + 12, + 42 + ] + } + ], + "entity_bar":[ + { + "type":"entity_bar", + "match":"a 'entity_bar' match in 'A piece of text'", + "original":"a 'entity_bar' original in 'A piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_bar", + "match":"another 'entity_bar' match in 'A piece of text'", + "original":"another 'entity_bar' original in 'A piece of text'", + "range":[ + 12, + 42 + ] + } + ] + }, + { + "entity_foo":[ + { + "type":"entity_foo", + "match":"a 'entity_foo' match in 'Yet another piece of text'", + "original":"a 'entity_foo' original in 'Yet another piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_foo", + "match":"another 'entity_foo' match in 'Yet another piece of text'", + "original":"another 'entity_foo' original in 'Yet another piece of text'", + "range":[ + 12, + 42 + ] + } + ], + "entity_bar":[ + { + "type":"entity_bar", + "match":"a 'entity_bar' match in 'Yet another piece of text'", + "original":"a 'entity_bar' original in 'Yet another piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_bar", + "match":"another 'entity_bar' match in 'Yet another piece of text'", + "original":"another 'entity_bar' original in 'Yet another piece of text'", + "range":[ + 12, + 42 + ] + } + ] + } + ] +} +``` \ No newline at end of file diff --git a/deepsearch/model/examples/simple_geo_nlp_annotator/README.MD b/deepsearch/model/examples/simple_geo_nlp_annotator/README.MD new file mode 100644 index 00000000..6a32a10e --- /dev/null +++ b/deepsearch/model/examples/simple_geo_nlp_annotator/README.MD @@ -0,0 +1,310 @@ +# SimpleGeoNLPAnnotator +## Introduction +This is an example SimpleGeoNLPAnnotator NLP kind annotator it supports text data and annotates entities and relationships. + +## Running the Annotator +To run this example make sure you've installed the full environment including the optional installs provided in poetry + + poetry install --all-extras + +Then simply start the server with + + python -m deepsearch.model.examples.simple_geo_nlp_annotator.main + +## Simple Interaction with the Annotator + +You can direcly access the API via a browser to the provided url on the console upon running the application, usually: + + http://127.0.0.1:8000 +This will take you to the landing page. Here you will likely find that you are not authenticated, however you can still check if the API is responsive by accessing the /health endpoint + + http://127.0.0.1:8000/health +It will be easier to interact with the application via the provided documentation endpoint + + http://127.0.0.1:8000/docs + +## Security +By default, the API requires an API-key to be used with every request to most endpoints, this key is defined on: + + deepsearch/model/examples/simple_geo_nlp_annotator/main.py +this API key must be provided on the authorization header, sample request headers to /: + + {'host': '127.0.0.1:8000', 'connection': 'keep-alive', 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', 'accept': 'application/json', 'sec-ch-ua-mobile': '?0', 'authorization': 'example123', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 'sec-ch-ua-platform': '"Linux"', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'http://127.0.0.1:8000/docs', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.9'} + +## Advanced Interaction with the Annotator +On the /docs endpoint after inserting the api key you may see the following information about the API server + +on endpoint: + + - / - A list of all the annotators hosted on this server, in this example you will find only "SimpleGeoNLPAnnotator" on each annotator you will find its annotation capabilities as well as the kind of annotator it is (NLPAnnotator) which in turn tells you how to make requests to the annotator + - /model/{model_name} - You will find the annotation capabilities for the given annotator as well as it's kind. + - /model/{model_name}/predict - You can make POST requests to have the model annotate your data, refer to [Sample Requests](#Sample-Requests) + +## Sample Requests + +### Sample Entity Annotation + +```python +{ + "apiVersion": "string", + "kind": "NLPModel", + "metadata": { + "annotations": { + "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id": "string", + "deepsearch.res.ibm.com/x-attempt-number": "string", + "deepsearch.res.ibm.com/x-max-attempts": "string" + } + }, + "spec": { + "findEntities": { + "entityNames": ["cities", "countries", "provinces"], + "objectType": "text", + "texts": [ + "Lisbon, Madrid, Paris and Zurich are Capitals of european countries", + "Berlin is the capital of Germany" + ] + } + } +} +``` + + - You may alter propertyNames to have any number of the property types the annotator declares it can annotate, or an empty list to annotate all. + - This annotator has declared that it can only annotate text, as such the objectType must be text + - texts may be as long or as short as you need it. + - The x-deadline must lie some time in the future + - This annotator has declared that it is of kind NLPModel as such the kind for the request must match + - refer to the /docs for details on the NLPRequest type + +Will result in the following output: + +```python +{ + "entities": [ + { + "cities": [ + { + "type": "cities", + "match": "Lisbon", + "original": "Lisbon", + "range": [ + 0, + 6 + ] + }, + { + "type": "cities", + "match": "Madrid", + "original": "Madrid", + "range": [ + 8, + 14 + ] + }, + { + "type": "cities", + "match": "Paris", + "original": "Paris", + "range": [ + 16, + 21 + ] + } + ], + "countries": [] + }, + { + "cities": [ + { + "type": "cities", + "match": "Berlin", + "original": "Berlin", + "range": [ + 0, + 6 + ] + } + ], + "countries": [ + { + "type": "countries", + "match": "Germany", + "original": "Germany", + "range": [ + 25, + 32 + ] + } + ] + } + ] +} +``` + +### Sample Relationship Annotation + +```python +{ + "apiVersion":"string", + "kind":"NLPModel", + "metadata":{ + "annotations":{ + "deepsearch.res.ibm.com/x-deadline":"2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id":"string", + "deepsearch.res.ibm.com/x-attempt-number":"string", + "deepsearch.res.ibm.com/x-max-attempts":"string" + } + }, + "spec":{ + "findRelationships":{ + "relationshipNames": null, + "objectType":"text", + "texts":[ + "Lisbon, Madrid, Paris and Zurich are Capitals of european countries", + "Berlin is the capital of Germany" + ], + "entities":[ + { + "cities":[ + { + "type":"cities", + "match":"Lisbon", + "original":"Lisbon", + "range":[ + 0, + 6 + ] + }, + { + "type":"cities", + "match":"Madrid", + "original":"Madrid", + "range":[ + 8, + 14 + ] + }, + { + "type":"cities", + "match":"Paris", + "original":"Paris", + "range":[ + 16, + 21 + ] + } + ], + "countries":[ + + ] + }, + { + "cities":[ + { + "type":"cities", + "match":"Berlin", + "original":"Berlin", + "range":[ + 0, + 6 + ] + } + ], + "countries":[ + { + "type":"countries", + "match":"Germany", + "original":"Germany", + "range":[ + 25, + 32 + ] + } + ] + } + ] + } + } +} +``` + - Note that for relationship annotation it is required that you provide an annotation of the entities of those same pieces of text + - You may alter propertyNames to have any number of the property types the annotator declares it can annotate, or an empty list to annotate all. + - This annotator has declared that it can only annotate text, as such the objectType must be text + - texts may be as long or as short as you need it. + - The x-deadline must lie some time in the future + - This annotator has declared that it is of kind NLPModel as such the kind for the request must match + - refer to the /docs for details on the NLPRequest type + +Will result in the following output: + +```python +{ + "relationships": [ + { + "cities-to-countries": { + "header": [ + "cities", + "countries", + "weight", + "source" + ], + "data": [] + }, + "cities-to-provincies": { + "header": [ + "cities", + "provincies", + "weight", + "source" + ], + "data": [] + }, + "provincies-to-countries": { + "header": [ + "provincies", + "countries", + "weight", + "source" + ], + "data": [] + } + }, + { + "cities-to-countries": { + "header": [ + "cities", + "countries", + "weight", + "source" + ], + "data": [ + [ + "cities.0", + "countries.0", + 1, + "entities" + ] + ] + }, + "cities-to-provincies": { + "header": [ + "cities", + "provincies", + "weight", + "source" + ], + "data": [] + }, + "provincies-to-countries": { + "header": [ + "provincies", + "countries", + "weight", + "source" + ], + "data": [] + } + } + ] +} +``` \ No newline at end of file From 4b6ea818f2e34d5a6849a2f47159d55248dacef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Brand=C3=A3o?= <555migalves555@gmail.com> Date: Thu, 20 Jul 2023 17:06:29 +0100 Subject: [PATCH 2/5] Dummy QA generator docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miguel Brandão <555migalves555@gmail.com> --- .../examples/dummy_qa_generator/README.MD | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 deepsearch/model/examples/dummy_qa_generator/README.MD diff --git a/deepsearch/model/examples/dummy_qa_generator/README.MD b/deepsearch/model/examples/dummy_qa_generator/README.MD new file mode 100644 index 00000000..6f66f737 --- /dev/null +++ b/deepsearch/model/examples/dummy_qa_generator/README.MD @@ -0,0 +1,78 @@ +# DummyNlpAnnotator +## Introduction +This is an example dummy QA kind annotator. + +## Running the Annotator +To run this example make sure you've installed the full environment including the optional installs provided in poetry + + poetry install --all-extras + +Then simply start the server with + + python -m deepsearch.model.examples.dummy_qa_generator.main + +## Simple Interaction with the Annotator + +You can direcly access the API via a browser to the provided url on the console upon running the application, usually: + + http://127.0.0.1:8000 +This will take you to the landing page. Here you will likely find that you are not authenticated, however you can still check if the API is responsive by accessing the /health endpoint + + http://127.0.0.1:8000/health +It will be easier to interact with the application via the provided documentation endpoint + + http://127.0.0.1:8000/docs + +## Security +By default, the API requires an API-key to be used with every request to most endpoints, this key is defined on: + + deepsearch/model/examples/dummy_qa_generator/main.py +this API key must be provided on the authorization header, sample request headers to /: + + {'host': '127.0.0.1:8000', 'connection': 'keep-alive', 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', 'accept': 'application/json', 'sec-ch-ua-mobile': '?0', 'authorization': 'example123', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 'sec-ch-ua-platform': '"Linux"', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'http://127.0.0.1:8000/docs', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.9'} + +## Advanced Interaction with the Annotator +On the /docs endpoint after inserting the api key you may see the following information about the API server + +on endpoint: + + - / - A list of all the annotators hosted on this server, in this example you will find only "DummyQAGenerator" on each annotator you will find its annotation capabilities as well as the kind of annotator it is (QAGenModel) which in turn tells you how to make requests to the annotator + - /model/{model_name} - You will find the annotation capabilities for the given annotator as well as it's kind. + - /model/{model_name}/predict - You can make POST requests to have the model generate your data, refer to [Sample Requests](#Sample-Requests) + +## Sample Requests + +```python +{ + "apiVersion": "string", + "kind": "QAGenModel", + "metadata": { + "annotations": { + "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id": "string", + "deepsearch.res.ibm.com/x-attempt-number": "string", + "deepsearch.res.ibm.com/x-max-attempts": "string" + } + }, + "spec": { + "generateAnswers": { + "contexts": [ + ["What is the best model"] + ], + "questions": [ + "If you are a dummy repeat what I said!" + ] + } + } +} +``` + +Will result in the following output: + +```python +{ + "answers": [ + "If you are a dummy repeat what I said!" + ] +} +``` \ No newline at end of file From fd8e0ee84d06814c8c6ad8ba6431d2a14066624f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Brand=C3=A3o?= <555migalves555@gmail.com> Date: Mon, 24 Jul 2023 11:00:15 +0100 Subject: [PATCH 3/5] refactored docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miguel Brandão <555migalves555@gmail.com> --- .../examples/dummy_nlp_annotator/README.MD | 159 +------- .../examples/dummy_qa_generator/README.MD | 66 +-- .../simple_geo_nlp_annotator/README.MD | 298 +------------- docs/guide/index.md | 3 + docs/guide/model.md | 381 ++++++++++++++++++ mkdocs.yml | 1 + 6 files changed, 391 insertions(+), 517 deletions(-) create mode 100644 docs/guide/model.md diff --git a/deepsearch/model/examples/dummy_nlp_annotator/README.MD b/deepsearch/model/examples/dummy_nlp_annotator/README.MD index c0b86f6b..4b1e444c 100644 --- a/deepsearch/model/examples/dummy_nlp_annotator/README.MD +++ b/deepsearch/model/examples/dummy_nlp_annotator/README.MD @@ -1,7 +1,4 @@ # DummyNlpAnnotator -## Introduction -This is an example dummy NLP kind annotator it supports text data and annotates entities. - ## Running the Annotator To run this example make sure you've installed the full environment including the optional installs provided in poetry @@ -11,158 +8,6 @@ Then simply start the server with python -m deepsearch.model.examples.dummy_nlp_annotator.main -## Simple Interaction with the Annotator - -You can direcly access the API via a browser to the provided url on the console upon running the application, usually: - - http://127.0.0.1:8000 -This will take you to the landing page. Here you will likely find that you are not authenticated, however you can still check if the API is responsive by accessing the /health endpoint - - http://127.0.0.1:8000/health -It will be easier to interact with the application via the provided documentation endpoint - - http://127.0.0.1:8000/docs - -## Security -By default, the API requires an API-key to be used with every request to most endpoints, this key is defined on: - - deepsearch/model/examples/dummy_nlp_annotator/main.py -this API key must be provided on the authorization header, sample request headers to /: - - {'host': '127.0.0.1:8000', 'connection': 'keep-alive', 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', 'accept': 'application/json', 'sec-ch-ua-mobile': '?0', 'authorization': 'example123', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 'sec-ch-ua-platform': '"Linux"', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'http://127.0.0.1:8000/docs', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.9'} - -## Advanced Interaction with the Annotator -On the /docs endpoint after inserting the api key you may see the following information about the API server - -on endpoint: - - - / - A list of all the annotators hosted on this server, in this example you will find only "DummyNLPAnnotator" on each annotator you will find its annotation capabilities as well as the kind of annotator it is (NLPAnnotator) which in turn tells you how to make requests to the annotator - - /model/{model_name} - You will find the annotation capabilities for the given annotator as well as it's kind. - - /model/{model_name}/predict - You can make POST requests to have the model annotate your data, refer to [Sample Requests](#Sample-Requests) - -## Sample Requests - -```python - { - "apiVersion": "string", - "kind": "NLPModel", - "metadata": { - "annotations": { - "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", - "deepsearch.res.ibm.com/x-transaction-id": "string", - "deepsearch.res.ibm.com/x-attempt-number": "string", - "deepsearch.res.ibm.com/x-max-attempts": "string" - } - }, - "spec": { - "findEntities": { - "entityNames": ["entity_foo", "entity_bar"], - "objectType": "text", - "texts": [ - "A piece of text", - "Yet another piece of text" - ] - } - } - } -``` - - - You may alter entityNames to have any number of the entity types the annotator declares it can annotate, or an empty list to annotate all. - - This annotator has declared that it can only annotate text, as such the objectType must be text - - texts may be as long or as short as you need it. - - The x-deadline must lie some time in the future - - This annotator has declared that it is of kind NLPModel as such the kind for the request must match - - refer to the /docs for details on the NLPRequest type - -Will result in the following output: +## Interaction with the Annotator -```python -{ - "entities":[ - { - "entity_foo":[ - { - "type":"entity_foo", - "match":"a 'entity_foo' match in 'A piece of text'", - "original":"a 'entity_foo' original in 'A piece of text'", - "range":[ - 1, - 5 - ] - }, - { - "type":"entity_foo", - "match":"another 'entity_foo' match in 'A piece of text'", - "original":"another 'entity_foo' original in 'A piece of text'", - "range":[ - 12, - 42 - ] - } - ], - "entity_bar":[ - { - "type":"entity_bar", - "match":"a 'entity_bar' match in 'A piece of text'", - "original":"a 'entity_bar' original in 'A piece of text'", - "range":[ - 1, - 5 - ] - }, - { - "type":"entity_bar", - "match":"another 'entity_bar' match in 'A piece of text'", - "original":"another 'entity_bar' original in 'A piece of text'", - "range":[ - 12, - 42 - ] - } - ] - }, - { - "entity_foo":[ - { - "type":"entity_foo", - "match":"a 'entity_foo' match in 'Yet another piece of text'", - "original":"a 'entity_foo' original in 'Yet another piece of text'", - "range":[ - 1, - 5 - ] - }, - { - "type":"entity_foo", - "match":"another 'entity_foo' match in 'Yet another piece of text'", - "original":"another 'entity_foo' original in 'Yet another piece of text'", - "range":[ - 12, - 42 - ] - } - ], - "entity_bar":[ - { - "type":"entity_bar", - "match":"a 'entity_bar' match in 'Yet another piece of text'", - "original":"a 'entity_bar' original in 'Yet another piece of text'", - "range":[ - 1, - 5 - ] - }, - { - "type":"entity_bar", - "match":"another 'entity_bar' match in 'Yet another piece of text'", - "original":"another 'entity_bar' original in 'Yet another piece of text'", - "range":[ - 12, - 42 - ] - } - ] - } - ] -} -``` \ No newline at end of file +refer to [https://ds4sd.github.io/deepsearch-toolkit/guide/](https://ds4sd.github.io/deepsearch-toolkit/guide/model/) diff --git a/deepsearch/model/examples/dummy_qa_generator/README.MD b/deepsearch/model/examples/dummy_qa_generator/README.MD index 6f66f737..98d9f2ff 100644 --- a/deepsearch/model/examples/dummy_qa_generator/README.MD +++ b/deepsearch/model/examples/dummy_qa_generator/README.MD @@ -11,68 +11,6 @@ Then simply start the server with python -m deepsearch.model.examples.dummy_qa_generator.main -## Simple Interaction with the Annotator +## Interaction with the Annotator -You can direcly access the API via a browser to the provided url on the console upon running the application, usually: - - http://127.0.0.1:8000 -This will take you to the landing page. Here you will likely find that you are not authenticated, however you can still check if the API is responsive by accessing the /health endpoint - - http://127.0.0.1:8000/health -It will be easier to interact with the application via the provided documentation endpoint - - http://127.0.0.1:8000/docs - -## Security -By default, the API requires an API-key to be used with every request to most endpoints, this key is defined on: - - deepsearch/model/examples/dummy_qa_generator/main.py -this API key must be provided on the authorization header, sample request headers to /: - - {'host': '127.0.0.1:8000', 'connection': 'keep-alive', 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', 'accept': 'application/json', 'sec-ch-ua-mobile': '?0', 'authorization': 'example123', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 'sec-ch-ua-platform': '"Linux"', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'http://127.0.0.1:8000/docs', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.9'} - -## Advanced Interaction with the Annotator -On the /docs endpoint after inserting the api key you may see the following information about the API server - -on endpoint: - - - / - A list of all the annotators hosted on this server, in this example you will find only "DummyQAGenerator" on each annotator you will find its annotation capabilities as well as the kind of annotator it is (QAGenModel) which in turn tells you how to make requests to the annotator - - /model/{model_name} - You will find the annotation capabilities for the given annotator as well as it's kind. - - /model/{model_name}/predict - You can make POST requests to have the model generate your data, refer to [Sample Requests](#Sample-Requests) - -## Sample Requests - -```python -{ - "apiVersion": "string", - "kind": "QAGenModel", - "metadata": { - "annotations": { - "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", - "deepsearch.res.ibm.com/x-transaction-id": "string", - "deepsearch.res.ibm.com/x-attempt-number": "string", - "deepsearch.res.ibm.com/x-max-attempts": "string" - } - }, - "spec": { - "generateAnswers": { - "contexts": [ - ["What is the best model"] - ], - "questions": [ - "If you are a dummy repeat what I said!" - ] - } - } -} -``` - -Will result in the following output: - -```python -{ - "answers": [ - "If you are a dummy repeat what I said!" - ] -} -``` \ No newline at end of file +refer to [https://ds4sd.github.io/deepsearch-toolkit/guide/](https://ds4sd.github.io/deepsearch-toolkit/guide/model/) \ No newline at end of file diff --git a/deepsearch/model/examples/simple_geo_nlp_annotator/README.MD b/deepsearch/model/examples/simple_geo_nlp_annotator/README.MD index 6a32a10e..fa5cb54b 100644 --- a/deepsearch/model/examples/simple_geo_nlp_annotator/README.MD +++ b/deepsearch/model/examples/simple_geo_nlp_annotator/README.MD @@ -11,300 +11,6 @@ Then simply start the server with python -m deepsearch.model.examples.simple_geo_nlp_annotator.main -## Simple Interaction with the Annotator +## Interaction with the Annotator -You can direcly access the API via a browser to the provided url on the console upon running the application, usually: - - http://127.0.0.1:8000 -This will take you to the landing page. Here you will likely find that you are not authenticated, however you can still check if the API is responsive by accessing the /health endpoint - - http://127.0.0.1:8000/health -It will be easier to interact with the application via the provided documentation endpoint - - http://127.0.0.1:8000/docs - -## Security -By default, the API requires an API-key to be used with every request to most endpoints, this key is defined on: - - deepsearch/model/examples/simple_geo_nlp_annotator/main.py -this API key must be provided on the authorization header, sample request headers to /: - - {'host': '127.0.0.1:8000', 'connection': 'keep-alive', 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', 'accept': 'application/json', 'sec-ch-ua-mobile': '?0', 'authorization': 'example123', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 'sec-ch-ua-platform': '"Linux"', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'http://127.0.0.1:8000/docs', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.9'} - -## Advanced Interaction with the Annotator -On the /docs endpoint after inserting the api key you may see the following information about the API server - -on endpoint: - - - / - A list of all the annotators hosted on this server, in this example you will find only "SimpleGeoNLPAnnotator" on each annotator you will find its annotation capabilities as well as the kind of annotator it is (NLPAnnotator) which in turn tells you how to make requests to the annotator - - /model/{model_name} - You will find the annotation capabilities for the given annotator as well as it's kind. - - /model/{model_name}/predict - You can make POST requests to have the model annotate your data, refer to [Sample Requests](#Sample-Requests) - -## Sample Requests - -### Sample Entity Annotation - -```python -{ - "apiVersion": "string", - "kind": "NLPModel", - "metadata": { - "annotations": { - "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", - "deepsearch.res.ibm.com/x-transaction-id": "string", - "deepsearch.res.ibm.com/x-attempt-number": "string", - "deepsearch.res.ibm.com/x-max-attempts": "string" - } - }, - "spec": { - "findEntities": { - "entityNames": ["cities", "countries", "provinces"], - "objectType": "text", - "texts": [ - "Lisbon, Madrid, Paris and Zurich are Capitals of european countries", - "Berlin is the capital of Germany" - ] - } - } -} -``` - - - You may alter propertyNames to have any number of the property types the annotator declares it can annotate, or an empty list to annotate all. - - This annotator has declared that it can only annotate text, as such the objectType must be text - - texts may be as long or as short as you need it. - - The x-deadline must lie some time in the future - - This annotator has declared that it is of kind NLPModel as such the kind for the request must match - - refer to the /docs for details on the NLPRequest type - -Will result in the following output: - -```python -{ - "entities": [ - { - "cities": [ - { - "type": "cities", - "match": "Lisbon", - "original": "Lisbon", - "range": [ - 0, - 6 - ] - }, - { - "type": "cities", - "match": "Madrid", - "original": "Madrid", - "range": [ - 8, - 14 - ] - }, - { - "type": "cities", - "match": "Paris", - "original": "Paris", - "range": [ - 16, - 21 - ] - } - ], - "countries": [] - }, - { - "cities": [ - { - "type": "cities", - "match": "Berlin", - "original": "Berlin", - "range": [ - 0, - 6 - ] - } - ], - "countries": [ - { - "type": "countries", - "match": "Germany", - "original": "Germany", - "range": [ - 25, - 32 - ] - } - ] - } - ] -} -``` - -### Sample Relationship Annotation - -```python -{ - "apiVersion":"string", - "kind":"NLPModel", - "metadata":{ - "annotations":{ - "deepsearch.res.ibm.com/x-deadline":"2038-01-18T00:00:00.000Z", - "deepsearch.res.ibm.com/x-transaction-id":"string", - "deepsearch.res.ibm.com/x-attempt-number":"string", - "deepsearch.res.ibm.com/x-max-attempts":"string" - } - }, - "spec":{ - "findRelationships":{ - "relationshipNames": null, - "objectType":"text", - "texts":[ - "Lisbon, Madrid, Paris and Zurich are Capitals of european countries", - "Berlin is the capital of Germany" - ], - "entities":[ - { - "cities":[ - { - "type":"cities", - "match":"Lisbon", - "original":"Lisbon", - "range":[ - 0, - 6 - ] - }, - { - "type":"cities", - "match":"Madrid", - "original":"Madrid", - "range":[ - 8, - 14 - ] - }, - { - "type":"cities", - "match":"Paris", - "original":"Paris", - "range":[ - 16, - 21 - ] - } - ], - "countries":[ - - ] - }, - { - "cities":[ - { - "type":"cities", - "match":"Berlin", - "original":"Berlin", - "range":[ - 0, - 6 - ] - } - ], - "countries":[ - { - "type":"countries", - "match":"Germany", - "original":"Germany", - "range":[ - 25, - 32 - ] - } - ] - } - ] - } - } -} -``` - - Note that for relationship annotation it is required that you provide an annotation of the entities of those same pieces of text - - You may alter propertyNames to have any number of the property types the annotator declares it can annotate, or an empty list to annotate all. - - This annotator has declared that it can only annotate text, as such the objectType must be text - - texts may be as long or as short as you need it. - - The x-deadline must lie some time in the future - - This annotator has declared that it is of kind NLPModel as such the kind for the request must match - - refer to the /docs for details on the NLPRequest type - -Will result in the following output: - -```python -{ - "relationships": [ - { - "cities-to-countries": { - "header": [ - "cities", - "countries", - "weight", - "source" - ], - "data": [] - }, - "cities-to-provincies": { - "header": [ - "cities", - "provincies", - "weight", - "source" - ], - "data": [] - }, - "provincies-to-countries": { - "header": [ - "provincies", - "countries", - "weight", - "source" - ], - "data": [] - } - }, - { - "cities-to-countries": { - "header": [ - "cities", - "countries", - "weight", - "source" - ], - "data": [ - [ - "cities.0", - "countries.0", - 1, - "entities" - ] - ] - }, - "cities-to-provincies": { - "header": [ - "cities", - "provincies", - "weight", - "source" - ], - "data": [] - }, - "provincies-to-countries": { - "header": [ - "provincies", - "countries", - "weight", - "source" - ], - "data": [] - } - } - ] -} -``` \ No newline at end of file +refer to [https://ds4sd.github.io/deepsearch-toolkit/guide/](https://ds4sd.github.io/deepsearch-toolkit/guide/model/) \ No newline at end of file diff --git a/docs/guide/index.md b/docs/guide/index.md index 885bf761..65073eaf 100644 --- a/docs/guide/index.md +++ b/docs/guide/index.md @@ -16,3 +16,6 @@ - [List and manage KGs](./kgs.md) - [Operate with manual API calls](./apis.md) - [Custom CLI plugins](./cli_plugins.md) + +## Custom models +- [Custom model examples](./model.md) diff --git a/docs/guide/model.md b/docs/guide/model.md new file mode 100644 index 00000000..35b21893 --- /dev/null +++ b/docs/guide/model.md @@ -0,0 +1,381 @@ +## Launching a model + +To run this example make sure you've installed the full environment including the optional installs provided in poetry + + poetry install --all-extras + +Then run the model with: + + python -m deepsearch.model.examples.. + +Illustrated by running the dummy_nlp_annotator example below + + python -m deepsearch.model.examples.dummy_nlp_annotator.main + +### Security + +By default, the API requires an API-key to be used with every request to most endpoints, this key is defined on a per model basis, as an example: + +```python + # deepsearch/model/examples/dummy_nlp_annotator/main.py + ... + + def run(): + -> settings = Settings(api_key="example123") <- + app = ModelApp(settings) + app.register_model(DummyNLPAnnotator()) + ... +``` +this API key must be provided on the authorization header for most application endpoints + +## A map of the annotator endpoints + + - / - A list of all the annotators hosted on this server with all their information. + - /model/{model_name} - You will find the annotation capabilities for the given annotator. + - /model/{model_name}/predict - You can make POST requests to have the model annotate your data, refer to the [Sample Requests](#Sample NLP kind models requests and responses) + - /health - An endpoint that will respond with a preset message letting you know that the webserver is healthy. + +### Annotator API endpoints guide + +You can direcly access the API via a browser to the provided url on the console upon running the application, usually: + + http://127.0.0.1:8000 +This will take you to the landing page. Here you will likely find that you are not authenticated, however you can still check if the API is responsive by accessing the /health endpoint + + http://127.0.0.1:8000/health +It will be easier to interact with the application prediction capabilities via the provided documentation endpoint + + http://127.0.0.1:8000/docs + +## Sample NLP kind models requests and responses + +### Entity annotation + +```json + { + "apiVersion": "string", + "kind": "NLPModel", + "metadata": { + "annotations": { + "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id": "string", + "deepsearch.res.ibm.com/x-attempt-number": "string", + "deepsearch.res.ibm.com/x-max-attempts": "string" + } + }, + "spec": { + "findEntities": { + "entityNames": ["entity_foo", "entity_bar"], + "objectType": "text", + "texts": [ + "A piece of text", + "Yet another piece of text" + ] + } + } + } +``` + +response + +```json +{ + "entities":[ + { + "entity_foo":[ + { + "type":"entity_foo", + "match":"a 'entity_foo' match in 'A piece of text'", + "original":"a 'entity_foo' original in 'A piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_foo", + "match":"another 'entity_foo' match in 'A piece of text'", + "original":"another 'entity_foo' original in 'A piece of text'", + "range":[ + 12, + 42 + ] + } + ], + "entity_bar":[ + { + "type":"entity_bar", + "match":"a 'entity_bar' match in 'A piece of text'", + "original":"a 'entity_bar' original in 'A piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_bar", + "match":"another 'entity_bar' match in 'A piece of text'", + "original":"another 'entity_bar' original in 'A piece of text'", + "range":[ + 12, + 42 + ] + } + ] + }, + { + "entity_foo":[ + { + "type":"entity_foo", + "match":"a 'entity_foo' match in 'Yet another piece of text'", + "original":"a 'entity_foo' original in 'Yet another piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_foo", + "match":"another 'entity_foo' match in 'Yet another piece of text'", + "original":"another 'entity_foo' original in 'Yet another piece of text'", + "range":[ + 12, + 42 + ] + } + ], + "entity_bar":[ + { + "type":"entity_bar", + "match":"a 'entity_bar' match in 'Yet another piece of text'", + "original":"a 'entity_bar' original in 'Yet another piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_bar", + "match":"another 'entity_bar' match in 'Yet another piece of text'", + "original":"another 'entity_bar' original in 'Yet another piece of text'", + "range":[ + 12, + 42 + ] + } + ] + } + ] +} +``` + +### Relationship annotation +request +```json +{ + "apiVersion":"string", + "kind":"NLPModel", + "metadata":{ + "annotations":{ + "deepsearch.res.ibm.com/x-deadline":"2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id":"string", + "deepsearch.res.ibm.com/x-attempt-number":"string", + "deepsearch.res.ibm.com/x-max-attempts":"string" + } + }, + "spec":{ + "findRelationships":{ + "relationshipNames": null, + "objectType":"text", + "texts":[ + "Lisbon, Madrid, Paris and Zurich are Capitals of european countries", + "Berlin is the capital of Germany" + ], + "entities":[ + { + "cities":[ + { + "type":"cities", + "match":"Lisbon", + "original":"Lisbon", + "range":[ + 0, + 6 + ] + }, + { + "type":"cities", + "match":"Madrid", + "original":"Madrid", + "range":[ + 8, + 14 + ] + }, + { + "type":"cities", + "match":"Paris", + "original":"Paris", + "range":[ + 16, + 21 + ] + } + ], + "countries":[ + + ] + }, + { + "cities":[ + { + "type":"cities", + "match":"Berlin", + "original":"Berlin", + "range":[ + 0, + 6 + ] + } + ], + "countries":[ + { + "type":"countries", + "match":"Germany", + "original":"Germany", + "range":[ + 25, + 32 + ] + } + ] + } + ] + } + } +} +``` + +response + +```json +{ + "relationships": [ + { + "cities-to-countries": { + "header": [ + "cities", + "countries", + "weight", + "source" + ], + "data": [] + }, + "cities-to-provincies": { + "header": [ + "cities", + "provincies", + "weight", + "source" + ], + "data": [] + }, + "provincies-to-countries": { + "header": [ + "provincies", + "countries", + "weight", + "source" + ], + "data": [] + } + }, + { + "cities-to-countries": { + "header": [ + "cities", + "countries", + "weight", + "source" + ], + "data": [ + [ + "cities.0", + "countries.0", + 1, + "entities" + ] + ] + }, + "cities-to-provincies": { + "header": [ + "cities", + "provincies", + "weight", + "source" + ], + "data": [] + }, + "provincies-to-countries": { + "header": [ + "provincies", + "countries", + "weight", + "source" + ], + "data": [] + } + } + ] +} +``` +### Property annotation + +## Sample QAGen kind models requests and responses + +### Generate +Request +```json +{ + "apiVersion": "string", + "kind": "QAGenModel", + "metadata": { + "annotations": { + "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id": "string", + "deepsearch.res.ibm.com/x-attempt-number": "string", + "deepsearch.res.ibm.com/x-max-attempts": "string" + } + }, + "spec": { + "generateAnswers": { + "contexts": [ + ["What is the best model"] + ], + "questions": [ + "If you are a dummy repeat what I said!" + ] + } + } +} +``` + +Response + +```json +{ + "answers": [ + "If you are a dummy repeat what I said!" + ] +} +``` + +## Important considerations + +- Each annotator has a kind, for example NLPModel, as such the kind for the request must match. +- For NLP Kind annotators under the spec you must define the appropriate types to be annotated, for the dummyNLPAnnotator +[refer to this example](#marker1) you will find on the request that we would like to find *entity_foo* and *entity_bar* an empty list will lead to +no annotations being made, a null object will lead to *all* possible annotations being made. +- Each annotator declared what sort of input it supports, a list constituted of any number of (text, table and image). +- The x-deadline on each request is already implemented and must lie some time in the future. +- refer to the /docs page on any annotator instance for more specification on the request types \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 49f024f1..f0dbcb98 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -60,6 +60,7 @@ nav: - Knowledge graphs: guide/kgs.md - APIs: guide/apis.md - Plugin system: guide/cli_plugins.md + - Custom model examples: guide/model.md - Example gallery: gallery/index.md - API reference: - Toolkit reference: api-reference.md From 1bcb73e15dc5d7a3c8307f6a5fe2e201aa680a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Brand=C3=A3o?= <555migalves555@gmail.com> Date: Wed, 26 Jul 2023 12:03:32 +0100 Subject: [PATCH 4/5] revised PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miguel Brandão <555migalves555@gmail.com> --- deepsearch/model/README.md | 83 --- .../{README.MD => README.md} | 0 .../{README.MD => README.md} | 0 .../{README.MD => README.md} | 0 docs/guide/index.md | 2 +- docs/guide/model.md | 648 +++++++++--------- mkdocs.yml | 2 +- 7 files changed, 331 insertions(+), 404 deletions(-) rename deepsearch/model/examples/dummy_nlp_annotator/{README.MD => README.md} (100%) rename deepsearch/model/examples/dummy_qa_generator/{README.MD => README.md} (100%) rename deepsearch/model/examples/simple_geo_nlp_annotator/{README.MD => README.md} (100%) diff --git a/deepsearch/model/README.md b/deepsearch/model/README.md index 2d7c7580..e69de29b 100644 --- a/deepsearch/model/README.md +++ b/deepsearch/model/README.md @@ -1,83 +0,0 @@ -# Model API -> Currently in **beta**. - -The Model API is a unified and extensible inference API across different model kinds. - -Built-in model kind support includes NLP annotators and QA generators. - -## Installation -To use the Model API, install including the `api` extra, i.e.: -- with poetry: -`poetry add "deepsearch-toolkit[api]"` -- with pip: `pip install "deepsearch-toolkit[api]"` - -## Basic usage -```python -from deepsearch.model.server.config import Settings -from deepsearch.model.server.model_app import ModelApp - -# (1) create an app -app = ModelApp(settings=Settings()) - -# (2) register your model(s) -model = ... # e.g. SimpleGeoNLPAnnotator() -app.register_model(model) - -# (3) run the app -app.run(host="127.0.0.1", port=8000) -``` - -### Settings -App configuration is done in [`Settings`](server/config.py) based on -[Pydantic Settings with dotenv support](https://docs.pydantic.dev/dev-v1/usage/settings/). - -E.g. the required API key can be injected via env var `DS_MODEL_API_KEY`. - -### OpenAPI -The OpenAPI UI is served under `/docs`, i.e. by default at http://127.0.0.1:8000/docs. - -## Developing a new model -To develop a new model class for an existing [kind](kinds/), inherit from the base model -class of that kind and implement the abstract methods and attributes. - -The framework will automatically use the correct controller for your model. - -To use a custom controller instead, pass it to `ModelApp.register_model()` via the -optional parameter `controller`. - -### Examples -- [Dummy NLP annotator](examples/dummy_nlp_annotator/) -- [Simple geo NLP annotator](examples/simple_geo_nlp_annotator/) -- [Dummy QA generator](examples/dummy_qa_generator/) - -Note: these examples configure the app with API key "example123"; when running them, use -the same key to access the protected endpoints. - -### Inference -As as example, an input payload for the `/predict` endpoint for the geography annotator -could look as follows (note that `deepsearch.res.ibm.com/x-deadline` should be a -future timestamp): -```json -{ - "apiVersion": "v1", - "kind": "NLPModel", - "metadata": { - "annotations": { - "deepsearch.res.ibm.com/x-deadline": "2024-04-20T12:26:01.479484+00:00", - "deepsearch.res.ibm.com/x-transaction-id": "abc", - "deepsearch.res.ibm.com/x-attempt-number": 5, - "deepsearch.res.ibm.com/x-max-attempts": 5 - } - }, - "spec": { - "findEntities": { - "entityNames": ["cities", "countries"], - "objectType": "text", - "texts": [ - "Bern, the capital city of Switzerland, is built around a crook in the Aare River.", - "Athens is a major coastal urban area in the Mediterranean and is both the capital and largest city of Greece." - ] - } - } -} -``` diff --git a/deepsearch/model/examples/dummy_nlp_annotator/README.MD b/deepsearch/model/examples/dummy_nlp_annotator/README.md similarity index 100% rename from deepsearch/model/examples/dummy_nlp_annotator/README.MD rename to deepsearch/model/examples/dummy_nlp_annotator/README.md diff --git a/deepsearch/model/examples/dummy_qa_generator/README.MD b/deepsearch/model/examples/dummy_qa_generator/README.md similarity index 100% rename from deepsearch/model/examples/dummy_qa_generator/README.MD rename to deepsearch/model/examples/dummy_qa_generator/README.md diff --git a/deepsearch/model/examples/simple_geo_nlp_annotator/README.MD b/deepsearch/model/examples/simple_geo_nlp_annotator/README.md similarity index 100% rename from deepsearch/model/examples/simple_geo_nlp_annotator/README.MD rename to deepsearch/model/examples/simple_geo_nlp_annotator/README.md diff --git a/docs/guide/index.md b/docs/guide/index.md index 65073eaf..97be0dc8 100644 --- a/docs/guide/index.md +++ b/docs/guide/index.md @@ -18,4 +18,4 @@ - [Custom CLI plugins](./cli_plugins.md) ## Custom models -- [Custom model examples](./model.md) +- [Model API](./model.md) \ No newline at end of file diff --git a/docs/guide/model.md b/docs/guide/model.md index 35b21893..32d70fe2 100644 --- a/docs/guide/model.md +++ b/docs/guide/model.md @@ -1,10 +1,17 @@ -## Launching a model +# Model API +> Currently in **beta**. -To run this example make sure you've installed the full environment including the optional installs provided in poetry +The Model API is a unified and extensible inference API across different model kinds. - poetry install --all-extras +Built-in model kind support includes NLP annotators and QA generators. -Then run the model with: +## Installation +To use the Model API, install including the `api` extra, i.e.: +- with poetry: +`poetry add "deepsearch-toolkit[api]"` +- with pip: `pip install "deepsearch-toolkit[api]"` + +To launch a model: python -m deepsearch.model.examples.. @@ -35,347 +42,350 @@ this API key must be provided on the authorization header for most application e - /model/{model_name}/predict - You can make POST requests to have the model annotate your data, refer to the [Sample Requests](#Sample NLP kind models requests and responses) - /health - An endpoint that will respond with a preset message letting you know that the webserver is healthy. -### Annotator API endpoints guide +### OpenAPI + +The OpenAPI UI is served under `/docs`, e.g. http://127.0.0.1:8000/docs. -You can direcly access the API via a browser to the provided url on the console upon running the application, usually: +## Developing a new model +To develop a new model class for an existing [kind](kinds/), inherit from the base model +class of that kind and implement the abstract methods and attributes. - http://127.0.0.1:8000 -This will take you to the landing page. Here you will likely find that you are not authenticated, however you can still check if the API is responsive by accessing the /health endpoint +The framework will automatically use the correct controller for your model. - http://127.0.0.1:8000/health -It will be easier to interact with the application prediction capabilities via the provided documentation endpoint +To use a custom controller instead, pass it to `ModelApp.register_model()` via the +optional parameter `controller`. - http://127.0.0.1:8000/docs +### Examples +- [Dummy NLP annotator](examples/dummy_nlp_annotator/) +- [Simple geo NLP annotator](examples/simple_geo_nlp_annotator/) +- [Dummy QA generator](examples/dummy_qa_generator/) ## Sample NLP kind models requests and responses ### Entity annotation - -```json - { - "apiVersion": "string", - "kind": "NLPModel", - "metadata": { - "annotations": { - "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", - "deepsearch.res.ibm.com/x-transaction-id": "string", - "deepsearch.res.ibm.com/x-attempt-number": "string", - "deepsearch.res.ibm.com/x-max-attempts": "string" - } - }, - "spec": { - "findEntities": { - "entityNames": ["entity_foo", "entity_bar"], - "objectType": "text", - "texts": [ - "A piece of text", - "Yet another piece of text" - ] - } - } - } -``` - -response - -```json -{ - "entities":[ - { - "entity_foo":[ - { - "type":"entity_foo", - "match":"a 'entity_foo' match in 'A piece of text'", - "original":"a 'entity_foo' original in 'A piece of text'", - "range":[ - 1, - 5 - ] - }, - { - "type":"entity_foo", - "match":"another 'entity_foo' match in 'A piece of text'", - "original":"another 'entity_foo' original in 'A piece of text'", - "range":[ - 12, - 42 - ] - } - ], - "entity_bar":[ - { - "type":"entity_bar", - "match":"a 'entity_bar' match in 'A piece of text'", - "original":"a 'entity_bar' original in 'A piece of text'", - "range":[ - 1, - 5 - ] - }, - { - "type":"entity_bar", - "match":"another 'entity_bar' match in 'A piece of text'", - "original":"another 'entity_bar' original in 'A piece of text'", - "range":[ - 12, - 42 - ] - } - ] - }, - { - "entity_foo":[ - { - "type":"entity_foo", - "match":"a 'entity_foo' match in 'Yet another piece of text'", - "original":"a 'entity_foo' original in 'Yet another piece of text'", - "range":[ - 1, - 5 - ] - }, - { - "type":"entity_foo", - "match":"another 'entity_foo' match in 'Yet another piece of text'", - "original":"another 'entity_foo' original in 'Yet another piece of text'", - "range":[ - 12, - 42 - ] - } - ], - "entity_bar":[ - { - "type":"entity_bar", - "match":"a 'entity_bar' match in 'Yet another piece of text'", - "original":"a 'entity_bar' original in 'Yet another piece of text'", - "range":[ - 1, - 5 - ] - }, - { - "type":"entity_bar", - "match":"another 'entity_bar' match in 'Yet another piece of text'", - "original":"another 'entity_bar' original in 'Yet another piece of text'", - "range":[ - 12, - 42 - ] - } - ] - } - ] -} -``` - -### Relationship annotation -request -```json -{ - "apiVersion":"string", - "kind":"NLPModel", - "metadata":{ - "annotations":{ - "deepsearch.res.ibm.com/x-deadline":"2038-01-18T00:00:00.000Z", - "deepsearch.res.ibm.com/x-transaction-id":"string", - "deepsearch.res.ibm.com/x-attempt-number":"string", - "deepsearch.res.ibm.com/x-max-attempts":"string" - } - }, - "spec":{ - "findRelationships":{ - "relationshipNames": null, - "objectType":"text", - "texts":[ - "Lisbon, Madrid, Paris and Zurich are Capitals of european countries", - "Berlin is the capital of Germany" - ], - "entities":[ - { - "cities":[ - { - "type":"cities", - "match":"Lisbon", - "original":"Lisbon", - "range":[ - 0, - 6 - ] - }, - { - "type":"cities", - "match":"Madrid", - "original":"Madrid", - "range":[ - 8, - 14 - ] - }, - { - "type":"cities", - "match":"Paris", - "original":"Paris", - "range":[ - 16, - 21 - ] - } - ], - "countries":[ - - ] +??? note "Entity annotation request payload" + ```json + { + "apiVersion": "string", + "kind": "NLPModel", + "metadata": { + "annotations": { + "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id": "string", + "deepsearch.res.ibm.com/x-attempt-number": "string", + "deepsearch.res.ibm.com/x-max-attempts": "string" + } }, - { - "cities":[ - { - "type":"cities", - "match":"Berlin", - "original":"Berlin", - "range":[ - 0, - 6 - ] - } - ], - "countries":[ - { - "type":"countries", - "match":"Germany", - "original":"Germany", - "range":[ - 25, - 32 - ] - } - ] + "spec": { + "findEntities": { + "entityNames": ["entity_foo", "entity_bar"], + "objectType": "text", + "texts": [ + "A piece of text", + "Yet another piece of text" + ] + } } - ] - } - } -} -``` + } + ``` -response +??? note "Entity annotation request response" + ```json + { + "entities":[ + { + "entity_foo":[ + { + "type":"entity_foo", + "match":"a 'entity_foo' match in 'A piece of text'", + "original":"a 'entity_foo' original in 'A piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_foo", + "match":"another 'entity_foo' match in 'A piece of text'", + "original":"another 'entity_foo' original in 'A piece of text'", + "range":[ + 12, + 42 + ] + } + ], + "entity_bar":[ + { + "type":"entity_bar", + "match":"a 'entity_bar' match in 'A piece of text'", + "original":"a 'entity_bar' original in 'A piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_bar", + "match":"another 'entity_bar' match in 'A piece of text'", + "original":"another 'entity_bar' original in 'A piece of text'", + "range":[ + 12, + 42 + ] + } + ] + }, + { + "entity_foo":[ + { + "type":"entity_foo", + "match":"a 'entity_foo' match in 'Yet another piece of text'", + "original":"a 'entity_foo' original in 'Yet another piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_foo", + "match":"another 'entity_foo' match in 'Yet another piece of text'", + "original":"another 'entity_foo' original in 'Yet another piece of text'", + "range":[ + 12, + 42 + ] + } + ], + "entity_bar":[ + { + "type":"entity_bar", + "match":"a 'entity_bar' match in 'Yet another piece of text'", + "original":"a 'entity_bar' original in 'Yet another piece of text'", + "range":[ + 1, + 5 + ] + }, + { + "type":"entity_bar", + "match":"another 'entity_bar' match in 'Yet another piece of text'", + "original":"another 'entity_bar' original in 'Yet another piece of text'", + "range":[ + 12, + 42 + ] + } + ] + } + ] + } + ``` -```json -{ - "relationships": [ +### Relationship annotation +??? note "Relationship annotation request payload" + ```json { - "cities-to-countries": { - "header": [ - "cities", - "countries", - "weight", - "source" - ], - "data": [] - }, - "cities-to-provincies": { - "header": [ - "cities", - "provincies", - "weight", - "source" - ], - "data": [] - }, - "provincies-to-countries": { - "header": [ - "provincies", - "countries", - "weight", - "source" - ], - "data": [] - } - }, + "apiVersion":"string", + "kind":"NLPModel", + "metadata":{ + "annotations":{ + "deepsearch.res.ibm.com/x-deadline":"2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id":"string", + "deepsearch.res.ibm.com/x-attempt-number":"string", + "deepsearch.res.ibm.com/x-max-attempts":"string" + } + }, + "spec":{ + "findRelationships":{ + "relationshipNames": null, + "objectType":"text", + "texts":[ + "Lisbon, Madrid, Paris and Zurich are Capitals of european countries", + "Berlin is the capital of Germany" + ], + "entities":[ + { + "cities":[ + { + "type":"cities", + "match":"Lisbon", + "original":"Lisbon", + "range":[ + 0, + 6 + ] + }, + { + "type":"cities", + "match":"Madrid", + "original":"Madrid", + "range":[ + 8, + 14 + ] + }, + { + "type":"cities", + "match":"Paris", + "original":"Paris", + "range":[ + 16, + 21 + ] + } + ], + "countries":[ + + ] + }, + { + "cities":[ + { + "type":"cities", + "match":"Berlin", + "original":"Berlin", + "range":[ + 0, + 6 + ] + } + ], + "countries":[ + { + "type":"countries", + "match":"Germany", + "original":"Germany", + "range":[ + 25, + 32 + ] + } + ] + } + ] + } + } + } + ``` + +??? note "Relationship annotation request response" + ```json { - "cities-to-countries": { - "header": [ - "cities", - "countries", - "weight", - "source" - ], - "data": [ - [ - "cities.0", - "countries.0", - 1, - "entities" - ] - ] - }, - "cities-to-provincies": { - "header": [ - "cities", - "provincies", - "weight", - "source" - ], - "data": [] - }, - "provincies-to-countries": { - "header": [ - "provincies", - "countries", - "weight", - "source" - ], - "data": [] - } + "relationships": [ + { + "cities-to-countries": { + "header": [ + "cities", + "countries", + "weight", + "source" + ], + "data": [] + }, + "cities-to-provincies": { + "header": [ + "cities", + "provincies", + "weight", + "source" + ], + "data": [] + }, + "provincies-to-countries": { + "header": [ + "provincies", + "countries", + "weight", + "source" + ], + "data": [] + } + }, + { + "cities-to-countries": { + "header": [ + "cities", + "countries", + "weight", + "source" + ], + "data": [ + [ + "cities.0", + "countries.0", + 1, + "entities" + ] + ] + }, + "cities-to-provincies": { + "header": [ + "cities", + "provincies", + "weight", + "source" + ], + "data": [] + }, + "provincies-to-countries": { + "header": [ + "provincies", + "countries", + "weight", + "source" + ], + "data": [] + } + } + ] } - ] -} -``` + ``` ### Property annotation - + TBD ## Sample QAGen kind models requests and responses ### Generate -Request -```json -{ - "apiVersion": "string", - "kind": "QAGenModel", - "metadata": { - "annotations": { - "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", - "deepsearch.res.ibm.com/x-transaction-id": "string", - "deepsearch.res.ibm.com/x-attempt-number": "string", - "deepsearch.res.ibm.com/x-max-attempts": "string" - } - }, - "spec": { - "generateAnswers": { - "contexts": [ - ["What is the best model"] - ], - "questions": [ - "If you are a dummy repeat what I said!" - ] +??? note "Genarate request payload" + ```json + { + "apiVersion": "string", + "kind": "QAGenModel", + "metadata": { + "annotations": { + "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", + "deepsearch.res.ibm.com/x-transaction-id": "string", + "deepsearch.res.ibm.com/x-attempt-number": "string", + "deepsearch.res.ibm.com/x-max-attempts": "string" + } + }, + "spec": { + "generateAnswers": { + "contexts": [ + ["What is the best model"] + ], + "questions": [ + "If you are a dummy repeat what I said!" + ] + } + } } - } -} -``` - -Response + ``` -```json -{ - "answers": [ - "If you are a dummy repeat what I said!" - ] -} -``` +??? note "Generate request response" + ```json + { + "answers": [ + "If you are a dummy repeat what I said!" + ] + } + ``` ## Important considerations - Each annotator has a kind, for example NLPModel, as such the kind for the request must match. - For NLP Kind annotators under the spec you must define the appropriate types to be annotated, for the dummyNLPAnnotator [refer to this example](#marker1) you will find on the request that we would like to find *entity_foo* and *entity_bar* an empty list will lead to -no annotations being made, a null object will lead to *all* possible annotations being made. -- Each annotator declared what sort of input it supports, a list constituted of any number of (text, table and image). +no annotations being made, a null object will lead to **all** possible annotations being made. +- Each annotator declares what sort of input it supports, a list constituted of any number of (text, table and image). - The x-deadline on each request is already implemented and must lie some time in the future. - refer to the /docs page on any annotator instance for more specification on the request types \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index f0dbcb98..96e692c7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -60,7 +60,7 @@ nav: - Knowledge graphs: guide/kgs.md - APIs: guide/apis.md - Plugin system: guide/cli_plugins.md - - Custom model examples: guide/model.md + - Model API: guide/model.md - Example gallery: gallery/index.md - API reference: - Toolkit reference: api-reference.md From a47fc092c31376cc62f5418c9b31fd8a4fa35fef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Brand=C3=A3o?= <555migalves555@gmail.com> Date: Tue, 8 Aug 2023 11:23:39 +0100 Subject: [PATCH 5/5] QAgen changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miguel Brandão <555migalves555@gmail.com> --- docs/guide/model.md | 49 +++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/docs/guide/model.md b/docs/guide/model.md index 32d70fe2..317fe6f1 100644 --- a/docs/guide/model.md +++ b/docs/guide/model.md @@ -348,26 +348,37 @@ optional parameter `controller`. ??? note "Genarate request payload" ```json { - "apiVersion": "string", - "kind": "QAGenModel", - "metadata": { - "annotations": { - "deepsearch.res.ibm.com/x-deadline": "2038-01-18T00:00:00.000Z", - "deepsearch.res.ibm.com/x-transaction-id": "string", - "deepsearch.res.ibm.com/x-attempt-number": "string", - "deepsearch.res.ibm.com/x-max-attempts": "string" - } - }, - "spec": { - "generateAnswers": { - "contexts": [ - ["What is the best model"] - ], - "questions": [ - "If you are a dummy repeat what I said!" + "apiVersion":"v1", + "kind":"QAGenModel", + "metadata":{ + "annotations":{ + "deepsearch.res.ibm.com/x-deadline":"2028-04-20T12:26:01.479484+00:00", + "deepsearch.res.ibm.com/x-transaction-id":"testing", + "deepsearch.res.ibm.com/x-attempt-number":5, + "deepsearch.res.ibm.com/x-max-attempts":5 + } + }, + "spec":{ + "generateAnswers":{ + "contexts":[ + [ + { + "text":"A textual transformation of a given table", + "type":"table", + "representation_type":"triplets" + }, + { + "text":"A raw paragraph as it appears on the raw text", + "type":"text", + "representation_type":"raw" + } ] - } - } + ], + "questions":[ + "42" + ] + } + } } ```