mozilla-ai · dpoulopoulos · Nov 25, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 21, 2024
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -12,8 +12,8 @@
 # patch the Sphinx run so that it can operate directly on the sources
 # see: https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#ensuring-the-code-can-be-imported
 module_paths = [
-    Path('..', '..', 'lumigator', 'python', 'mzai', 'sdk').resolve(),
-    Path('..', '..', 'lumigator', 'python', 'mzai', 'schemas').resolve()
+    Path("..", "..", "lumigator", "python", "mzai", "sdk").resolve(),
+    Path("..", "..", "lumigator", "python", "mzai", "schemas").resolve(),
 ]
 
 for path in module_paths:
@@ -22,12 +22,12 @@
 
 # import the modules that we want to document here to aboid the autodoc error
 # see: https://github.com/pydantic/pydantic/discussions/7763#discussioncomment-8417097
-from lumigator_sdk import jobs, lm_datasets  # noqa: F401, E402
+from lumigator_sdk import jobs, lm_datasets, models  # noqa: F401, E402
 
-project = 'Lumigator 🐊'
-copyright = '2024, Mozilla AI'
-author = 'Mozilla AI Engineering'
-release = '0.0.1'
+project = "Lumigator 🐊"
+copyright = "2024, Mozilla AI"
+author = "Mozilla AI Engineering"
+release = "0.0.1"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -38,7 +38,7 @@
     "sphinx.ext.napoleon",
     "myst_parser",
     "sphinx_design",
-    "sphinx_copybutton"
+    "sphinx_copybutton",
 ]
 
 # napoleon settings
@@ -49,9 +49,9 @@
     "colon_fence",
 ]
 
-templates_path = ['_templates']
+templates_path = ["_templates"]
 source_suffix = [".rst", ".md"]
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 copybutton_exclude = ".linenos, .gp, .go"
 
 # -- Options for HTML output -------------------------------------------------

diff --git a/docs/source/get-started/suggested-models.md b/docs/source/get-started/suggested-models.md
@@ -0,0 +1,187 @@
+# Suggested Models
+
+Lumigator supports any model uploaded to the [Hugging Face Hub](https://huggingface.co/models?pipeline_tag=summarization&sort=trending)
+and trained for *summarization*, provided the model is compatible with the required library versions
+(e.g., Transformers) and runtime dependencies (e.g., vLLM). Practical factors such as compute
+availability and system configurations may also impact the successful use of a model. To get
+started, [we have extensively tested a few models](https://blog.mozilla.ai/on-model-selection-for-text-summarization/)
+and created an endpoint to easily retrieve them.
+
+In this guide, we assume that you have already [installed Lumigator locally](quickstart), and have a
+running instance. To get a list of suggested models, you can use the following command:
+
+::::{tab-set}
+
+:::{tab-item} cURL
+:sync: tab1
+
+```console
+user@host:~/lumigator$ curl -s http://localhost:8000/api/v1/models/summarization | jq
+{
+  "total": 9,
+  "items": [
+    {
+      "name": "facebook/bart-large-cnn",
+      "uri": "hf://facebook/bart-large-cnn",
+      "description": "BART is a large-sized model fine-tuned on the CNN Daily Mail dataset.",
+      "info": {
+        "parameter_count": "406M",
+        "tensor_type": "F32",
+        "model_size": "1.63GB"
+      },
+      "tasks": [
+        {
+          "summarization": {
+...
+```
+
+:::
+
+:::{tab-item} Python SDK
+:sync: tab2
+```python
+from lumigator_sdk.lumigator import LumigatorClient
+
+# The default port for Lumigator is 8000
+lm_client = LumigatorClient("localhost:8000")
+lm_client.models.get_suggested_models("summarization")
+```
+:::
+
+::::
+
+```{note}
+Note that the default port for Lumigator is `8000`. If you are running Lumigator on a different
+port, you should replace `8000` with the correct port number.
+```
+
+The output will show a list of suggested models we have tested. The `uri` field is the one you
+should use when creating a new evaluation job. The response also includes other useful information,
+such the model size and the default parameters used for evaluation. These fields are not applicable
+to every model, but they are included for the ones we have tested.
+
+## Model Types and Parameters
+
+The following table shows the models we have tested and their respective types.
+The `HuggingFace` column shows if the model is on the Hugging Face Hub, `API` indicates availability
+via an external API, and `llamafile` shows if it is distributed as a
+[llamafile](https://github.com/Mozilla-Ocho/llamafile).
+
+```{note}
+Please note we do not, at present, launch a llamafile for you, Lumigator assumes you have already
+launched it.
+```
+
+| Model Type | Model                                    | HuggingFace | API | llamafile |
+|------------|------------------------------------------|-------------|-----|-----------|
+| seq2seq    | facebook/bart-large-cnn                  |      X      |     |           |
+| seq2seq    | longformer-qmsum-meeting-summarization   |      X      |     |           |
+| seq2seq    | mrm8488/t5-base-finetuned-summarize-news |      X      |     |           |
+| seq2seq    | Falconsai/text_summarization             |      X      |     |           |
+| causal     | gpt-4o-mini, gpt-4o                      |             |  X  |           |
+| causal     | open-mistral-7b                          |             |  X  |           |
+| causal     | Mistral-7B-Instruct                      |             |     |     X     |
+
+## Bart Large CNN
+
+The [`facebook/bart-large-cnn`](https://huggingface.co/facebook/bart-large-cnn) model is pre-trained
+on English language, and fine-tuned on [CNN Daily Mail](https://huggingface.co/datasets/cnn_dailymail).
+It was introduced in the paper
+[BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://arxiv.org/abs/1910.13461)
+by Lewis et al. and first released [here](https://github.com/pytorch/fairseq/tree/master/examples/bart).
+
+The model has 406M parameters (FP32), and the model size is 1.63GB. The default parameters used for
+evaluation are:
+
+| Parameter Name         | Description                                            | Value |
+|------------------------|--------------------------------------------------------|-------|
+| `max_length`           | Maximum length of the summary                          | 142   |
+| `min_length`           | Minimum length of the summary                          | 56    |
+| `length_penalty`       | Length penalty to apply during beam search             | 2.0   |
+| `early_stopping`       | Controls the stopping condition for beam-based methods | true  |
+| `no_repeat_ngram_size` | All n-grams of that size can only occur once           | 3     |
+| `num_beams`            | Number of beams for beam search                        | 4     |
+
+## Longformer QMSum Meeting Summarization
+
+The [`longformer-qmsum-meeting-summarization`](https://huggingface.co/mikeadimech/longformer-qmsum-meeting-summarization)
+model is a fine-tuned version of [alenai/led-base-16384](https://huggingface.co/allenai/led-base-16384)
+for summarization.
+
+As described in [Longformer: The Long-Document Transformer](https://arxiv.org/pdf/2004.05150.pdf) by
+Iz Beltagy, Matthew E. Peters, Arman Cohan, `led-base-16384` was initialized from `bart-base` since
+both models share the exact same architecture, but modified for long-range summarization and
+question answering.
+
+The model has 162M parameters (FP32), and the model size is 648MB. There are no
+summarization-specific parameters for this model.
+
+## T5 Base Finetuned Summarize News
+
+The [`mrm8488/t5-base-finetuned-summarize-news`](https://huggingface.co/mrm8488/t5-base-finetuned-summarize-news)
+model is a [Google's T5](https://ai.googleblog.com/2020/02/exploring-transfer-learning-with-t5.html)
+base fine-tuned on [News Summary](https://www.kaggle.com/sunnysai12345/news-summary) dataset for
+summarization downstream task.
+
+The model has 223M parameters (FP32), and the model size is 892MB. The default parameters used for
+evaluation are:
+
+| Parameter Name         | Description                                            | Value |
+|------------------------|--------------------------------------------------------|-------|
+| `max_length`           | Maximum length of the summary                          | 200   |
+| `min_length`           | Minimum length of the summary                          | 30    |
+| `length_penalty`       | Length penalty to apply during beam search             | 2.0   |
+| `early_stopping`       | Controls the stopping condition for beam-based methods | true  |
+| `no_repeat_ngram_size` | All n-grams of that size can only occur once           | 3     |
+| `num_beams`            | Number of beams for beam search                        | 4     |
+
+## Falconsai Text Summarization
+
+The [`Falconsai/text_summarization`](https://huggingface.co/Falconsai/text_summarization) model is
+a variant of the T5 transformer model, designed for the task of text summarization. It is adapted
+and fine-tuned to generate concise and coherent summaries of input text.
+
+The model has 60.5M parameters (FP32), and the model size is 242MB. The default parameters used for
+evaluation are:
+
+| Parameter Name         | Description                                            | Value |
+|------------------------|--------------------------------------------------------|-------|
+| `max_length`           | Maximum length of the summary                          | 200   |
+| `min_length`           | Minimum length of the summary                          | 30    |
+| `length_penalty`       | Length penalty to apply during beam search             | 2.0   |
+| `early_stopping`       | Controls the stopping condition for beam-based methods | true  |
+| `no_repeat_ngram_size` | All n-grams of that size can only occur once           | 3     |
+| `num_beams`            | Number of beams for beam search                        | 4     |
+
+## Mistral 7B Instruct
+
+The [mistralai/Mistral-7B-Instruct-v0.3]https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3)
+Large Language Model (LLM) is an instruct fine-tuned version of the
+[Mistral-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3).
+
+The model has 7.25B parameters (BF16), and the model size is 14.5GB. There are no
+summarization-specific parameters for this model.
+
+## GPT-4o Mini and GPT-4o
+
+The GPT-4o Mini and GPT-4o models are causal language models developed by OpenAI.
+
+There are no summarization-specific parameters for these models.
+
+## Open Mistral 7B
+
+The [Open Mistral 7B](https://mistral.ai/news/announcing-mistral-7b/) model is a causal language
+model developed by [Mistral AI](https://mistral.ai/). It is the smaller version of the
+Mistal AI family of models.
+
+There are no summarization-specific parameters for this model.
+
+## Mistral 7B Instruct Llamafile
+
+The [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
+model is a causal language model developed by [Mistral AI](https://mistral.ai/), packaged as a
+llamafile. A llamafile is an executable LLM that you can run on your own computer. It contains the
+weights for a given open LLM, as well as everything needed to actually run that model on your
+computer. There's nothing to install or configure.
+
+There are no summarization-specific parameters for this model.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -36,6 +36,7 @@ Hugging Face and local stores or accessed through APIs. It consists of:
 
    get-started/installation
    get-started/quickstart
+   get-started/suggested-models
 
 .. toctree::
    :maxdepth: 2

diff --git a/docs/source/reference/schemas.rst b/docs/source/reference/schemas.rst
@@ -1,14 +1,14 @@
 Schemas
 =======
 
-.. automodule:: schemas.completions
+.. automodule:: lumigator_schemas.completions
    :members:
 
-.. automodule:: schemas.datasets
+.. automodule:: lumigator_schemas.datasets
    :members:
 
-.. automodule:: schemas.jobs
+.. automodule:: lumigator_schemas.jobs
    :members:
 
-.. automodule:: schemas.extras
+.. automodule:: lumigator_schemas.extras
    :members:
diff --git a/docs/source/reference/sdk.rst b/docs/source/reference/sdk.rst
@@ -11,7 +11,7 @@ Lumigator Client
 The main entry point to the SDK is the `LumigatorClient` class. You can create an instance of this
 class by providing the Lumigator API host and your Ray cluster address.
 
-.. automodule:: sdk.lumigator
+.. automodule:: lumigator_sdk.lumigator
    :members:
    :undoc-members:
 
@@ -21,7 +21,7 @@ Health
 The `Health` class provides a simple interface to check the health of the Lumigator API and the
 status of the Ray jobs running on the cluster.
 
-.. automodule:: sdk.health
+.. automodule:: lumigator_sdk.health
    :members:
    :undoc-members:
 
@@ -30,7 +30,7 @@ Datasets
 
 The `Datasets` class provides a simple interface to create, update, delete, and list datasets.
 
-.. automodule:: sdk.lm_datasets
+.. automodule:: lumigator_sdk.lm_datasets
    :members:
    :undoc-members:
 
@@ -40,7 +40,7 @@ Jobs
 The `Jobs` class provides a simple interface to submit and monitor jobs. Currently, we support two
 types of jobs: Inference and Evaluation.
 
-.. automodule:: sdk.jobs
+.. automodule:: lumigator_sdk.jobs
    :members:
    :undoc-members:
 
@@ -50,7 +50,7 @@ Completions
  The `Completions` class provides a simple interface to request completions from external APIs.
  Currently, we support two APIs: OpenAI's and Mistral's.
 
-.. automodule:: sdk.completions
+.. automodule:: lumigator_sdk.completions
    :members:
    :undoc-members:
 
@@ -60,6 +60,6 @@ Base Client
 The `BaseClient` class provides a base class for the LumigatorClient. You can use this class to
 create your own client with custom methods.
 
-.. automodule:: sdk.client
+.. automodule:: lumigator_sdk.client
    :members:
    :undoc-members:
diff --git a/lumigator/python/mzai/backend/backend/api/router.py b/lumigator/python/mzai/backend/backend/api/router.py
@@ -1,6 +1,6 @@
 from fastapi import APIRouter
 
-from backend.api.routes import completions, datasets, experiments, health, jobs
+from backend.api.routes import completions, datasets, experiments, health, jobs, models
 from backend.api.tags import Tags
 
 API_V1_PREFIX = "/api/v1"
@@ -11,3 +11,4 @@
 api_router.include_router(jobs.router, prefix="/jobs", tags=[Tags.JOBS])
 api_router.include_router(experiments.router, prefix="/experiments", tags=[Tags.EXPERIMENTS])
 api_router.include_router(completions.router, prefix="/completions", tags=[Tags.COMPLETIONS])
+api_router.include_router(models.router, prefix="/models", tags=[Tags.MODELS])
diff --git a/lumigator/python/mzai/backend/backend/api/routes/models.py b/lumigator/python/mzai/backend/backend/api/routes/models.py
@@ -0,0 +1,48 @@
+from pathlib import Path
+
+import yaml
+from fastapi import APIRouter, HTTPException
+from lumigator_schemas.extras import ListingResponse
+from lumigator_schemas.models import ModelsResponse
+
+MODELS_PATH = Path(__file__).resolve().parents[2] / "models.yaml"
+
+router = APIRouter()
+
+
+def _get_supported_tasks(data: dict) -> list[str]:
+    tasks = set()
+    for model in data:
+        for task in model.get("tasks", []):
+            tasks.update(task.keys())
+
+    return list(tasks)
+
+
+@router.get("/{task_name}")
+def get_suggested_models(task_name: str) -> ListingResponse[ModelsResponse]:
+    """Get a list of suggested models for the given task.
+
+    Args:
+        task_name (str): The task name.
+
+    Returns:
+        ListingResponse[str]: A list of suggested models.
+    """
+    with Path(MODELS_PATH).open() as file:
+        data = yaml.safe_load(file)
+
+    supported_tasks = _get_supported_tasks(data)
+
+    # Currently, only summarization task is supported.
+    if task_name != "summarization":
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unsupported task. Choose from: {supported_tasks}",
+        )
+
+    return_data = {
+        "total": len(data),
+        "items": data,
+    }
+    return ListingResponse[ModelsResponse].model_validate(return_data)
diff --git a/lumigator/python/mzai/backend/backend/api/tags.py b/lumigator/python/mzai/backend/backend/api/tags.py
@@ -7,6 +7,7 @@ class Tags(str, Enum):
     JOBS = "jobs"
     COMPLETIONS = "completions"
     EXPERIMENTS = "experiments"
+    MODELS = "models"
 
 
 TAGS_METADATA = [
@@ -30,6 +31,10 @@ class Tags(str, Enum):
         "name": Tags.COMPLETIONS,
         "description": "Access models via external vendor endpoints",
     },
+    {
+        "name": Tags.MODELS,
+        "description": "Return a list of suggested models for a given task.",
+    },
 ]
 """Metadata to associate with route tags in the OpenAPI documentation.