diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index e92b54ddfe..1093d9bbaf 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -115,9 +115,7 @@ jobs:
- name: Install minimal dependencies
run: pip install -r requirements.min.txt
- name: Install package
- run: pip install -e .[dev,spark,fsspec]
- - name: Run pip-audit
- run: pip-audit --ignore-vuln PYSEC-2024-48 --ignore-vuln GHSA-jw8x-6495-233v --ignore-vuln GHSA-4hq2-rpgc-r8r7
+ run: pip install -e .[dev,spark,fsspec,llm]
- name: Run Tests
run: python -m pytest --durations=50
test:
@@ -155,7 +153,7 @@ jobs:
uses: ./.github/share-actions/get-bikes-dataset-cached
- name: Install package
- run: pip install -e .[dev,spark,fsspec]
+ run: pip install -e .[dev,spark,fsspec,llm]
- name: Run Tests
run: python -m pytest --durations=50
@@ -173,7 +171,7 @@ jobs:
cache: "pip"
cache-dependency-path: setup.py
- name: Install dependencies
- run: pip install -e ".[dev]"
+ run: pip install -e .
- name: Install wheel
run: pip install wheel
- name: Build package
diff --git a/.github/workflows/ui.yml b/.github/workflows/ui.yml
index 9bff9b1ae3..89dfb3bbb1 100644
--- a/.github/workflows/ui.yml
+++ b/.github/workflows/ui.yml
@@ -151,8 +151,8 @@ jobs:
uses: ./.github/share-actions/ui-node-pnpm-install
- name: Install Playwright Browsers
- working-directory: ui
- run: pnpm dlx playwright@1.43.0 install --with-deps
+ working-directory: ui/service
+ run: pnpm exec playwright install --with-deps chromium
- name: 🔍 Get bikes dataset cached
uses: ./.github/share-actions/get-bikes-dataset-cached
@@ -162,7 +162,7 @@ jobs:
- name: Wait UI to be ready to test
working-directory: ui/service
- run: pnpm wait-on tcp:127.0.0.1:8000 -t 200000
+ run: pnpm wait-on tcp:127.0.0.1:8000 -t 4m
- name: Run Service Playwright tests
working-directory: ui/service
diff --git a/docs/book/reference/all-metrics.md b/docs/book/reference/all-metrics.md
index d697ba09d1..484440cd06 100644
--- a/docs/book/reference/all-metrics.md
+++ b/docs/book/reference/all-metrics.md
@@ -272,6 +272,9 @@ Check for regular expression matches.
| **DoesNotContain()**
- Checks if the text does not contain any or all specified items.
- Returns True/False for every input.
Example use:
`DoesNotContain(items=["as a large language model"]` | **Required:**
`items: List[str]`
**Optional:**- `display_name`
- `mode = 'all'` or `'any'`
- `case_sensitive = True` or `False`
|
| **IncludesWords()** - Checks if the text includes **any** (default) or **all** specified words.
- Considers only vocabulary words (from NLTK vocabulary).
- By default, considers inflected and variant forms of the same word.
- Returns True/False for every input.
Example use:
`IncludesWords(words_list=['booking', 'hotel', 'flight']` | **Required:**
`words_list: List[str]`
**Optional:**- `display_name`
- `mode = 'any'` or `'all'`
- `lemmatize = True` or `False`
|
| **ExcludesWords()** - Checks if the text excludes all specified words.
- Considers only vocabulary words (from NLTK vocabulary).
- By default, considers inflected and variant forms of the same word.
- Returns True/False for every input.
Example use:
`ExcludesWords(words_list=['buy', 'sell', 'bet']`| **Required:**
`words_list: List[str]`
**Optional:**- `display_name`
- `mode = 'all'` or `'any'`
- `lemmatize = True` or `False`
|
+| **ItemMatch()** - Checks whether the text contains **any** (default) or **all** specified items that are specific to each row (represented as tuples)
- Returns True/False for each row.
Example use:
`ItemMatch(with_column="expected")`| **Required:**
`with_column: str`
**Optional:**- `display_name`
- `mode = 'all'` or `'any'`
- `case_sensitive = True` or `False`
|
+| **ItemNoMatch()** - Checks whether the text excludes **any** (default) or **all** specified items that are specific to each row (represented as tuples)
- Returns True/False for each row.
Example use:
`ItemMatch(with_column="forbidden")`| **Required:**
`with_column: str`
**Optional:**- `display_name`
- `mode = 'all'` or `'any'`
- `case_sensitive = True` or `False`
|
+| **JSONSchemaMatch()** - Checks if the text contains a JSON object matching the **expected_schema**. Supports exact (**exact=True**) or minimal (**exact=False**) matching, with optional strict type validation (**validate_types=True**).
- Returns True/False for each row.
Example use:
`JSONSchemaMatch(expected_schema={"name": str, "age": int}, exact_match=False, validate_types=True)`| **Required:**
`expected_schema: Dict[str, type]`
**Optional:**- `exact_match = True` or `False`
- `validate_types = True` or `False`
|
## Descriptors: Text stats
diff --git a/examples/data_generators.py b/examples/data_generators.py
new file mode 100644
index 0000000000..1cd8ef87e1
--- /dev/null
+++ b/examples/data_generators.py
@@ -0,0 +1,66 @@
+from evidently.experimental.dataset_generators.llm.questions import QADatasetFromSeedGenerator, QADatasetGenerator
+from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider
+from evidently.options.base import Options
+
+
+def generate_from_file():
+ file_path = "../cloud_quickstart_tracing.pdf"
+ data = DataCollectionProvider.from_files(file_path, chunk_size=50, chunk_overlap=20, splitter="simple")
+
+ generator = QADatasetGenerator(
+ data_collection=data,
+ provider="openai",
+ model="gpt-4o-mini",
+ num_questions=5,
+ options=Options.from_any_options(None)
+ )
+ generated = generator.generate()
+ for _, a in generated.iterrows():
+ print("Q", a["questions"])
+ if "answers" in a:
+ print("A", a["answers"])
+ if "context" in a:
+ print("C", a["context"])
+ print()
+
+
+def main():
+ data = DataCollectionProvider.from_chunks(chunks=["I am a banana", "My spoon is too big"])
+ generator = QADatasetGenerator(
+ data_collection=data,
+ provider="openai",
+ model="gpt-4o-mini",
+ num_questions=5,
+ options=Options.from_any_options(None)
+ )
+
+ generated = generator.generate()
+ for _, a in generated.iterrows():
+ print("Q", a["questions"])
+ if "answers" in a:
+ print("A", a["answers"])
+ if "context" in a:
+ print("C", a["context"])
+ print()
+
+ generator = QADatasetFromSeedGenerator(
+ seed_question="What is 'kek'?",
+ num_questions=5,
+ provider="openai",
+ model="gpt-4o-mini",
+ options=Options.from_any_options(None)
+ )
+
+ generated = generator.generate()
+ for _, a in generated.iterrows():
+ print("Q", a["questions"])
+ if "answers" in a:
+ print("A", a["answers"])
+ if "context" in a:
+ print("C", a["context"])
+ print()
+
+
+if __name__ == '__main__':
+ main()
+ # generate_from_file()
diff --git a/examples/how_to_questions/metrics/data_integrity/dataset_rouge_summary_metric.ipynb b/examples/how_to_questions/metrics/data_integrity/dataset_rouge_summary_metric.ipynb
new file mode 100644
index 0000000000..f0abc6298c
--- /dev/null
+++ b/examples/how_to_questions/metrics/data_integrity/dataset_rouge_summary_metric.ipynb
@@ -0,0 +1,117 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Evidently Dataset ROUGE Summary Metric"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from evidently.report import Report\n",
+ "from evidently.metrics import ROUGESummaryMetric"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "current_data = {\n",
+ " \"summary\": [\"hello there\", \"general kenobi\"],\n",
+ "}\n",
+ "\n",
+ "current_df = pd.DataFrame(current_data)\n",
+ "\n",
+ "reference_data = {\n",
+ " \"summary\": [\"hello there\", \"no de\"]\n",
+ "}\n",
+ "\n",
+ "current_df = pd.DataFrame(current_data)\n",
+ "reference_df = pd.DataFrame(reference_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "report = Report(metrics=[\n",
+ " ROUGESummaryMetric(column_name=\"summary\", rouge_n=2)\n",
+ "])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "report.run(current_data=current_df, reference_data=reference_df)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "report.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "report.as_dict()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "report.as_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.19"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb b/examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb
index 78a65bbee8..9b8be1b638 100644
--- a/examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb
+++ b/examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb
@@ -116,7 +116,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.13"
+ "version": "3.8.19"
}
},
"nbformat": 4,
diff --git a/requirements.dev.txt b/requirements.dev.txt
index 7f8701e3ca..606cca9714 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -16,6 +16,7 @@ pip-audit
pyspark
ruff==0.3.7
pre-commit==3.5.0
+evaluate==0.4.1
# service dependencies
litestar>=2.7.1
diff --git a/requirements.min.txt b/requirements.min.txt
index f858d8e507..cd8ee86f57 100644
--- a/requirements.min.txt
+++ b/requirements.min.txt
@@ -31,3 +31,5 @@ openai==1.16.2
evaluate==0.4.1
transformers[torch]==4.39.3
sentence-transformers==2.7.0
+rouge-score==0.1.2
+chromadb==0.4.0
diff --git a/setup.cfg b/setup.cfg
index 7f9f43d785..231d1f6f6c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -106,6 +106,15 @@ ignore_missing_imports = True
[mypy-litellm.*]
ignore_missing_imports = True
+[mypy-chromadb.*]
+ignore_missing_imports = True
+
+[mypy-llama_index.*]
+ignore_missing_imports = True
+
+[mypy-pypdf.*]
+ignore_missing_imports = True
+
[tool:pytest]
testpaths=tests
python_classes=*Test
diff --git a/setup.py b/setup.py
index 46e9cd43aa..329d5869fe 100644
--- a/setup.py
+++ b/setup.py
@@ -76,6 +76,7 @@
"deprecation>=2.1.0",
"uuid6>=2024.7.10",
"cryptography>=43.0.1",
+ "evaluate>=0.4.1",
],
extras_require={
"dev": [
@@ -96,12 +97,15 @@
"ruff==0.3.7",
"pre-commit==3.5.0",
"pytest-asyncio==0.23.7",
+ "evaluate>=0.4.1",
],
"llm": [
"openai>=1.16.2",
"evaluate>=0.4.1",
"transformers[torch]>=4.39.3",
"sentence-transformers>=2.7.0",
+ "rouge-score>=0.1.2",
+ "chromadb>=0.4.0",
],
"spark": ["pyspark>=3.4.0"],
"fsspec": [
diff --git a/src/evidently/descriptors/__init__.py b/src/evidently/descriptors/__init__.py
index 2520abdbe3..173a1be486 100644
--- a/src/evidently/descriptors/__init__.py
+++ b/src/evidently/descriptors/__init__.py
@@ -3,6 +3,7 @@
from .custom_descriptor import CustomPairColumnEval
from .hf_descriptor import HuggingFaceModel
from .hf_descriptor import HuggingFaceToxicityModel
+from .json_schema_match_descriptor import JSONSchemaMatch
from .llm_judges import BiasLLMEval
from .llm_judges import ContextQualityLLMEval
from .llm_judges import DeclineLLMEval
@@ -19,6 +20,8 @@
from .sentiment_descriptor import Sentiment
from .text_contains_descriptor import Contains
from .text_contains_descriptor import DoesNotContain
+from .text_contains_descriptor import ItemMatch
+from .text_contains_descriptor import ItemNoMatch
from .text_length_descriptor import TextLength
from .text_part_descriptor import BeginsWith
from .text_part_descriptor import EndsWith
@@ -47,6 +50,8 @@
"EndsWith",
"DoesNotContain",
"IncludesWords",
+ "ItemMatch",
+ "ItemNoMatch",
"ExcludesWords",
"TextLength",
"TriggerWordsPresence",
@@ -55,5 +60,6 @@
"SentenceCount",
"Sentiment",
"RegExp",
+ "JSONSchemaMatch",
"_registry",
]
diff --git a/src/evidently/descriptors/_registry.py b/src/evidently/descriptors/_registry.py
index 0f912a86fe..5ac97efc42 100644
--- a/src/evidently/descriptors/_registry.py
+++ b/src/evidently/descriptors/_registry.py
@@ -15,6 +15,11 @@
"evidently.descriptors.hf_descriptor.HuggingFaceToxicityModel",
"evidently:descriptor:HuggingFaceToxicityModel",
)
+register_type_alias(
+ FeatureDescriptor,
+ "evidently.descriptors.json_schema_match_descriptor.JSONSchemaMatch",
+ "evidently:descriptor:JSONSchemaMatch",
+)
register_type_alias(
FeatureDescriptor, "evidently.descriptors.llm_judges.BiasLLMEval", "evidently:descriptor:BiasLLMEval"
)
@@ -72,6 +77,12 @@
"evidently.descriptors.text_contains_descriptor.DoesNotContain",
"evidently:descriptor:DoesNotContain",
)
+register_type_alias(
+ FeatureDescriptor, "evidently.descriptors.text_contains_descriptor.ItemMatch", "evidently:descriptor:ItemMatch"
+)
+register_type_alias(
+ FeatureDescriptor, "evidently.descriptors.text_contains_descriptor.ItemNoMatch", "evidently:descriptor:ItemNoMatch"
+)
register_type_alias(
FeatureDescriptor, "evidently.descriptors.text_length_descriptor.TextLength", "evidently:descriptor:TextLength"
)
diff --git a/src/evidently/descriptors/json_schema_match_descriptor.py b/src/evidently/descriptors/json_schema_match_descriptor.py
new file mode 100644
index 0000000000..f85818370e
--- /dev/null
+++ b/src/evidently/descriptors/json_schema_match_descriptor.py
@@ -0,0 +1,23 @@
+from typing import Dict
+
+from evidently.features import json_schema_match_feature
+from evidently.features.generated_features import FeatureDescriptor
+from evidently.features.generated_features import GeneratedFeature
+
+
+class JSONSchemaMatch(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:JSONSchemaMatch"
+
+ expected_schema: Dict[str, type]
+ validate_types: bool = False
+ exact_match: bool = False
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return json_schema_match_feature.JSONSchemaMatch(
+ column_name=column_name,
+ expected_schema=self.expected_schema,
+ validate_types=self.validate_types,
+ exact_match=self.exact_match,
+ display_name=self.display_name,
+ )
diff --git a/src/evidently/descriptors/text_contains_descriptor.py b/src/evidently/descriptors/text_contains_descriptor.py
index 7e069970d5..4795c4f77f 100644
--- a/src/evidently/descriptors/text_contains_descriptor.py
+++ b/src/evidently/descriptors/text_contains_descriptor.py
@@ -39,3 +39,37 @@ def feature(self, column_name: str) -> GeneratedFeature:
self.mode,
self.display_name,
)
+
+
+class ItemMatch(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:ItemMatch"
+
+ with_column: str
+ mode: str = "any"
+ case_sensitive: bool = True
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return text_contains_feature.ItemMatch(
+ columns=[column_name, self.with_column],
+ case_sensitive=self.case_sensitive,
+ mode=self.mode,
+ display_name=self.display_name,
+ )
+
+
+class ItemNoMatch(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:ItemNoMatch"
+
+ with_column: str
+ mode: str = "any"
+ case_sensitive: bool = True
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return text_contains_feature.ItemNoMatch(
+ columns=[column_name, self.with_column],
+ case_sensitive=self.case_sensitive,
+ mode=self.mode,
+ display_name=self.display_name,
+ )
diff --git a/src/evidently/experimental/dataset_generators/__init__.py b/src/evidently/experimental/dataset_generators/__init__.py
new file mode 100644
index 0000000000..4bfe1f7c80
--- /dev/null
+++ b/src/evidently/experimental/dataset_generators/__init__.py
@@ -0,0 +1,3 @@
+from . import _registry
+
+__all__ = ["_registry"]
diff --git a/src/evidently/experimental/dataset_generators/_registry.py b/src/evidently/experimental/dataset_generators/_registry.py
new file mode 100644
index 0000000000..74a027ac6a
--- /dev/null
+++ b/src/evidently/experimental/dataset_generators/_registry.py
@@ -0,0 +1,67 @@
+from evidently.experimental.dataset_generators.base import BaseDatasetGenerator
+from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider
+from evidently.experimental.dataset_generators.llm.splitter import Splitter
+from evidently.pydantic_utils import register_type_alias
+from evidently.utils.llm.prompts import PromptTemplate
+
+register_type_alias(
+ BaseDatasetGenerator,
+ "evidently.experimental.dataset_generators.llm.questions.QADatasetFromSeedGenerator",
+ "evidently:dataset_generator:QADatasetFromSeedGenerator",
+)
+register_type_alias(
+ BaseDatasetGenerator,
+ "evidently.experimental.dataset_generators.llm.questions.QADatasetGenerator",
+ "evidently:dataset_generator:QADatasetGenerator",
+)
+register_type_alias(
+ DataCollectionProvider,
+ "evidently.experimental.dataset_generators.llm.index.ChunksDataCollectionProvider",
+ "evidently:data_collecton_provider:ChunksDataCollectionProvider",
+)
+register_type_alias(
+ DataCollectionProvider,
+ "evidently.experimental.dataset_generators.llm.index.FileDataCollectionProvider",
+ "evidently:data_collecton_provider:FileDataCollectionProvider",
+)
+
+register_type_alias(
+ PromptTemplate,
+ "evidently.experimental.dataset_generators.llm.prompts.BaselineAnswerPromptTemplate",
+ "evidently:prompt_template:BaselineAnswerPromptTemplate",
+)
+register_type_alias(
+ PromptTemplate,
+ "evidently.experimental.dataset_generators.llm.prompts.NaiveQuestionsFromContextPromptTemplate",
+ "evidently:prompt_template:NaiveQuestionsFromContextPromptTemplate",
+)
+register_type_alias(
+ PromptTemplate,
+ "evidently.experimental.dataset_generators.llm.prompts.QuestionsFromContextPromptTemplate",
+ "evidently:prompt_template:QuestionsFromContextPromptTemplate",
+)
+register_type_alias(
+ PromptTemplate,
+ "evidently.experimental.dataset_generators.llm.prompts.QuestionsFromSeedPromptTemplate",
+ "evidently:prompt_template:QuestionsFromSeedPromptTemplate",
+)
+register_type_alias(
+ PromptTemplate,
+ "evidently.experimental.dataset_generators.llm.prompts.ReformulateQuestionPromptTemplate",
+ "evidently:prompt_template:ReformulateQuestionPromptTemplate",
+)
+register_type_alias(
+ PromptTemplate,
+ "evidently.experimental.dataset_generators.llm.prompts.SimpleQuestionPromptTemplate",
+ "evidently:prompt_template:SimpleQuestionPromptTemplate",
+)
+register_type_alias(
+ Splitter,
+ "evidently.experimental.dataset_generators.llm.splitter.LlamaIndexSplitter",
+ "evidently:splitter:LlamaIndexSplitter",
+)
+register_type_alias(
+ Splitter,
+ "evidently.experimental.dataset_generators.llm.splitter.SimpleSplitter",
+ "evidently:splitter:SimpleSplitter",
+)
diff --git a/src/evidently/experimental/dataset_generators/base.py b/src/evidently/experimental/dataset_generators/base.py
new file mode 100644
index 0000000000..0aefc12c8e
--- /dev/null
+++ b/src/evidently/experimental/dataset_generators/base.py
@@ -0,0 +1,21 @@
+from abc import ABC
+from abc import abstractmethod
+
+import pandas as pd
+from typing_extensions import TypeAlias
+
+from evidently.options.base import Options
+from evidently.pydantic_utils import EvidentlyBaseModel
+
+DatasetGeneratorResult: TypeAlias = pd.DataFrame
+
+
+class BaseDatasetGenerator(EvidentlyBaseModel, ABC):
+ class Config:
+ is_base_type = True
+
+ options: Options
+
+ @abstractmethod
+ def generate(self) -> DatasetGeneratorResult:
+ raise NotImplementedError
diff --git a/src/evidently/experimental/dataset_generators/llm/__init__.py b/src/evidently/experimental/dataset_generators/llm/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/evidently/experimental/dataset_generators/llm/base.py b/src/evidently/experimental/dataset_generators/llm/base.py
new file mode 100644
index 0000000000..9710610657
--- /dev/null
+++ b/src/evidently/experimental/dataset_generators/llm/base.py
@@ -0,0 +1,22 @@
+from typing import Optional
+
+from evidently._pydantic_compat import PrivateAttr
+from evidently.experimental.dataset_generators.base import BaseDatasetGenerator
+from evidently.options.base import Options
+from evidently.utils.llm.wrapper import LLMWrapper
+from evidently.utils.llm.wrapper import get_llm_wrapper
+
+
+class BaseLLMDatasetGenerator(BaseDatasetGenerator):
+ provider: str
+ model: str
+ _llm_wrapper: Optional[LLMWrapper] = PrivateAttr(None)
+
+ def get_llm_wrapper(self, options: Options) -> LLMWrapper:
+ if self._llm_wrapper is None:
+ self._llm_wrapper = get_llm_wrapper(self.provider, self.model, options)
+ return self._llm_wrapper
+
+ @property
+ def wrapper(self):
+ return self.get_llm_wrapper(self.options)
diff --git a/src/evidently/experimental/dataset_generators/llm/index.py b/src/evidently/experimental/dataset_generators/llm/index.py
new file mode 100644
index 0000000000..1b5d2c2bf5
--- /dev/null
+++ b/src/evidently/experimental/dataset_generators/llm/index.py
@@ -0,0 +1,149 @@
+import abc
+import glob
+import os
+from pathlib import Path
+from typing import List
+from typing import Optional
+
+import chromadb
+from chromadb.types import Collection
+from chromadb.utils import embedding_functions
+
+from evidently.experimental.dataset_generators.llm.splitter import AnySplitter
+from evidently.experimental.dataset_generators.llm.splitter import Splitter
+from evidently.pydantic_utils import EvidentlyBaseModel
+
+Chunk = str
+DEFAULT_CHUNK_SIZE = 512
+DEFAULT_CHUNK_OVERLAP = 20
+
+
+def read_text(filename: str) -> str:
+ file_path = Path(filename)
+ if file_path.suffix.lower() == ".pdf":
+ try:
+ from pypdf import PdfReader
+ except ImportError as e:
+ raise ImportError("Please install pypdf to extract context from .pdf files") from e
+ reader = PdfReader(file_path)
+ text = ""
+ for page_num in range(len(reader.pages)):
+ page = reader.pages[page_num]
+ text += page.extract_text()
+ return text
+ else:
+ return Path(filename).read_text()
+
+
+class DataCollectionProvider(EvidentlyBaseModel, abc.ABC):
+ class Config:
+ is_base_type = True
+
+ chunk_size: int = DEFAULT_CHUNK_SIZE
+ chunk_overlap: int = DEFAULT_CHUNK_OVERLAP
+ splitter: AnySplitter = "llama_index"
+
+ @abc.abstractmethod
+ def get_data_collection(self) -> "DataCollection":
+ raise NotImplementedError
+
+ @classmethod
+ def from_files(
+ cls,
+ path: str,
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
+ chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
+ splitter: AnySplitter = "llama_index",
+ ) -> "DataCollectionProvider":
+ return FileDataCollectionProvider(
+ path=path, chunk_size=chunk_size, chunk_overlap=chunk_overlap, splitter=splitter
+ )
+
+ @classmethod
+ def from_chunks(cls, chunks: List[str]):
+ return ChunksDataCollectionProvider(chunks=chunks)
+
+
+class ChunksDataCollectionProvider(DataCollectionProvider):
+ class Config:
+ type_alias = "evidently:data_collecton_provider:ChunksDataCollectionProvider"
+
+ chunks: List[Chunk]
+
+ def get_data_collection(self):
+ dc = DataCollection(name="chunks", chunks=self.chunks)
+ dc.init_collection()
+ return dc
+
+
+class FileDataCollectionProvider(DataCollectionProvider):
+ class Config:
+ type_alias = "evidently:data_collecton_provider:FileDataCollectionProvider"
+
+ path: str
+
+ def get_data_collection(self):
+ file_path = Path(self.path)
+ paths = [self.path] if file_path.is_file() else glob.glob(os.path.join(self.path, "*"))
+
+ splitter = Splitter.from_any(self.splitter, self.chunk_size, self.chunk_overlap)
+ chunks = list(splitter.split([read_text(p) for p in paths]))
+
+ data_collection = DataCollection(name=file_path.name, chunks=chunks)
+ data_collection.init_collection()
+ return data_collection
+
+
+class DataCollection:
+ name: str
+ chunks: List[Chunk]
+ collection: Optional[Collection] = None
+
+ def __init__(self, name: str, chunks: List[str], collection: Optional["Collection"] = None):
+ self.name = name
+ self.chunks = chunks
+ self.collection = collection
+
+ def init_collection(self):
+ if self.collection is None:
+ # fixme: huggingface/tokenizers warns about clean_up_tokenization_spaces
+ import warnings
+
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ warnings.filterwarnings("ignore", category=FutureWarning)
+
+ default_embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
+ model_name="all-MiniLM-L6-v2",
+ )
+ chroma_client = chromadb.Client()
+ collection = chroma_client.get_or_create_collection(
+ name=self.name,
+ embedding_function=default_embedding_function,
+ )
+ for i, chunk in enumerate(self.chunks):
+ collection.upsert(
+ ids=str(i),
+ documents=chunk,
+ )
+ self.collection = collection
+
+ def find_relevant_chunks(self, question: str, n_results: int = 3) -> List[Chunk]:
+ """
+ Queries the collection with a given question and returns the relevant text chunks.
+
+ Args:
+ question (str): The query or question text to search for.
+ n_results (int): Number of results to retrieve. Default is 3.
+
+ Returns:
+ List[Chunk]: A list of relevant text chunks.
+ """
+ if self.collection is None:
+ raise ValueError("Collection is not initialized")
+ results = self.collection.query(
+ query_texts=question,
+ n_results=min(n_results, len(self.chunks)),
+ )
+
+ relevant_chunks = [chunk for document in results["documents"] for chunk in document]
+ return relevant_chunks
diff --git a/src/evidently/experimental/dataset_generators/llm/prompts.py b/src/evidently/experimental/dataset_generators/llm/prompts.py
new file mode 100644
index 0000000000..bb38038f57
--- /dev/null
+++ b/src/evidently/experimental/dataset_generators/llm/prompts.py
@@ -0,0 +1,95 @@
+from typing import ClassVar
+from typing import List
+
+from evidently.utils.llm.prompts import BlockPromptTemplate
+from evidently.utils.llm.prompts import PromptBlock
+from evidently.utils.llm.prompts import WithSystemPrompt
+from evidently.utils.llm.prompts import llm_call
+
+
+class SimpleQuestionPromptTemplate(BlockPromptTemplate):
+ class Config:
+ type_alias = "evidently:prompt_template:SimpleQuestionPromptTemplate"
+
+ blocks: ClassVar = [
+ "Please generate a {question_type} question about this:",
+ PromptBlock.input("context").anchored(),
+ PromptBlock.json_output(question="question text", answer="answer text"),
+ ]
+ question_type: str = "simple"
+
+
+class QuestionsFromSeedPromptTemplate(BlockPromptTemplate):
+ class Config:
+ type_alias = "evidently:prompt_template:QuestionsFromSeedPromptTemplate"
+
+ blocks: ClassVar = [
+ """Write for me {number} alternative questions quite similar to the question you got.
+ The question: """,
+ PromptBlock.input("seed_question").anchored(),
+ PromptBlock.string_list_output("questions"),
+ ]
+
+ @llm_call
+ def generate(self, seed_question: str, number: int) -> List[str]: ...
+
+
+class QuestionsFromContextPromptTemplate(WithSystemPrompt, BlockPromptTemplate):
+ class Config:
+ type_alias = "evidently:prompt_template:QuestionsFromContextPromptTemplate"
+
+ system_prompt: str = "You are an assistant who generates questions based on provided context"
+
+ @llm_call
+ def generate_questions(self, context: str, number: int) -> List[str]: ...
+
+
+class NaiveQuestionsFromContextPromptTemplate(QuestionsFromContextPromptTemplate):
+ class Config:
+ type_alias = "evidently:prompt_template:NaiveQuestionsFromContextPromptTemplate"
+
+ blocks: ClassVar = [
+ "Generate {number} conceptual questions based on the provided context and "
+ "can be answered from the information in the provided context.\n"
+ "Here is a context",
+ PromptBlock.input("context").anchored(),
+ "Remain faithful to the above context.\n"
+ "Avoid providing any preamble!\n"
+ "Avoid providing any closing statement!",
+ PromptBlock.string_list_output("questions"),
+ ]
+
+
+class ReformulateQuestionPromptTemplate(QuestionsFromContextPromptTemplate):
+ class Config:
+ type_alias = "evidently:prompt_template:ReformulateQuestionPromptTemplate"
+
+ blocks: ClassVar = [
+ """Write for me {number} alternative questions quite similar to the question you got.
+The question:""",
+ PromptBlock.input("context").anchored(),
+ PromptBlock.string_list_output("questions"),
+ ]
+ number: int
+ system_prompt: str = "You are a smart assistant who helps repharase questions"
+
+
+class BaselineAnswerPromptTemplate(WithSystemPrompt, BlockPromptTemplate):
+ class Config:
+ type_alias = "evidently:prompt_template:BaselineAnswerPromptTemplate"
+
+ blocks: ClassVar = [
+ "Your task is to answer the following query:",
+ PromptBlock.input("question").anchored(),
+ "You have access to the following documents which are meant to provide context as you answer the query:",
+ PromptBlock.input("context").anchored(),
+ """Please remain faithful to the underlying context,
+and deviate from it only if you haven't found the answer in the provided context.
+Avoid providing any preamble!
+Avoid providing any closing statement!""",
+ PromptBlock.string_output("answer"),
+ ]
+ system_prompt: str = "You are a helpful assistant that answer a given question directly without any preamble"
+
+ @llm_call
+ def generate_answers(self, question: str, context: str): ...
diff --git a/src/evidently/experimental/dataset_generators/llm/questions.py b/src/evidently/experimental/dataset_generators/llm/questions.py
new file mode 100644
index 0000000000..263d7f5fd7
--- /dev/null
+++ b/src/evidently/experimental/dataset_generators/llm/questions.py
@@ -0,0 +1,75 @@
+import random
+from typing import List
+from typing import Sequence
+from typing import Tuple
+
+import pandas as pd
+
+from evidently.experimental.dataset_generators.base import DatasetGeneratorResult
+from evidently.experimental.dataset_generators.llm.base import BaseLLMDatasetGenerator
+from evidently.experimental.dataset_generators.llm.index import Chunk
+from evidently.experimental.dataset_generators.llm.index import DataCollection
+from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider
+from evidently.experimental.dataset_generators.llm.prompts import BaselineAnswerPromptTemplate
+from evidently.experimental.dataset_generators.llm.prompts import NaiveQuestionsFromContextPromptTemplate
+from evidently.experimental.dataset_generators.llm.prompts import QuestionsFromContextPromptTemplate
+from evidently.experimental.dataset_generators.llm.prompts import QuestionsFromSeedPromptTemplate
+
+Question = str
+Answer = str
+GeneratedQuestion = Tuple[Question, Answer, Chunk]
+ChunkSet = List[Chunk]
+
+
+class QADatasetGenerator(BaseLLMDatasetGenerator):
+ class Config:
+ type_alias = "evidently:dataset_generator:QADatasetGenerator"
+
+ data_collection: DataCollectionProvider
+ num_questions: int
+ questions: QuestionsFromContextPromptTemplate = NaiveQuestionsFromContextPromptTemplate()
+ answers: BaselineAnswerPromptTemplate = BaselineAnswerPromptTemplate()
+
+ def generate(self) -> DatasetGeneratorResult:
+ documents = self.data_collection.get_data_collection()
+ chunk_set_count, chunks_in_set_count, questions_per_chunkset = self.get_chunks_and_question_count()
+ chunk_sets = self.generate_chunksets(documents, chunk_set_count, chunks_in_set_count)
+ questions: List[Question] = self.generate_questions(chunk_sets, questions_per_chunkset)
+ relevant_chunks = [documents.find_relevant_chunks(q) for q in questions]
+ answers = self.generate_answers(questions, relevant_chunks)
+ return pd.DataFrame({"questions": questions, "answers": answers, "context": relevant_chunks})
+
+ def get_chunks_and_question_count(self) -> Tuple[int, int, int]:
+ return 1, 1, self.num_questions
+
+ def generate_chunksets(self, documents: DataCollection, count: int, chunks_per_set: int) -> List[ChunkSet]:
+ return [[random.choice(documents.chunks) for _ in range(chunks_per_set)] for _ in range(count)]
+
+ def generate_questions(self, chunk_sets: Sequence[List[Chunk]], questions_per_chunkset: int) -> List[Question]:
+ questions = self.wrapper.run_batch_sync(
+ self.questions.generate_questions(context="\n\n".join(chunks), number=questions_per_chunkset)
+ for chunks in chunk_sets
+ )
+ return [q for qs in questions for q in qs]
+
+ def generate_answers(self, questions: List[Question], relevant_chunks: List[List[Chunk]]) -> List[str]:
+ return self.wrapper.run_batch_sync(
+ self.answers.generate_answers(question=question, context="\n".join(chunks))
+ for question, chunks in zip(questions, relevant_chunks)
+ )
+
+
+class QADatasetFromSeedGenerator(BaseLLMDatasetGenerator):
+ class Config:
+ type_alias = "evidently:dataset_generator:QADatasetFromSeedGenerator"
+
+ seed_question: str
+ num_questions: int
+ prompt: QuestionsFromSeedPromptTemplate = QuestionsFromSeedPromptTemplate()
+
+ def generate(self) -> DatasetGeneratorResult:
+ response = self.wrapper.run_sync(
+ self.prompt.generate(number=self.num_questions, seed_question=self.seed_question)
+ )
+
+ return pd.DataFrame({"questions": response})
diff --git a/src/evidently/experimental/dataset_generators/llm/splitter.py b/src/evidently/experimental/dataset_generators/llm/splitter.py
new file mode 100644
index 0000000000..e4b775eb29
--- /dev/null
+++ b/src/evidently/experimental/dataset_generators/llm/splitter.py
@@ -0,0 +1,130 @@
+import re
+from abc import ABC
+from abc import abstractmethod
+from enum import Enum
+from typing import ClassVar
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Union
+
+from evidently._pydantic_compat import PrivateAttr
+from evidently.pydantic_utils import EvidentlyBaseModel
+
+
+class TextSource:
+ @classmethod
+ def from_any(cls, text_source: "AnyTextSource"):
+ if isinstance(text_source, TextSource):
+ return text_source
+ if isinstance(text_source, str):
+ return StrSource(text_source)
+ raise NotImplementedError(f"Cannot create TextSource from {text_source.__class__.__name__}")
+
+ @abstractmethod
+ def get_text(self) -> str:
+ raise NotImplementedError
+
+
+class StrSource(TextSource):
+ def __init__(self, value: str):
+ self.value = value
+
+ def get_text(self) -> str:
+ return self.value
+
+
+AnyTextSource = Union[str, bytes, TextSource]
+
+Chunk = str
+Split = str
+
+
+class Splitters(str, Enum):
+ Simple = "simple"
+ LlamaIndex = "llama_index"
+
+
+AnySplitter = Union[str, Splitters, "Splitter"]
+
+
+class Splitter(EvidentlyBaseModel, ABC):
+ class Config:
+ is_base_type = True
+
+ chunk_size: int
+ chunk_overlap: int
+
+ def split(self, texts: Union[AnyTextSource, List[AnyTextSource]]) -> Sequence[Chunk]:
+ if not isinstance(texts, list):
+ texts = [texts]
+
+ for text in texts:
+ yield from self.split_text(TextSource.from_any(text))
+
+ @abstractmethod
+ def split_text(self, text: TextSource) -> Sequence[Chunk]:
+ raise NotImplementedError
+
+ @classmethod
+ def from_any(cls, splitter: AnySplitter, chunk_size: int, chunk_overlap: int, **kwargs):
+ if isinstance(splitter, Splitter):
+ return splitter
+ if isinstance(splitter, str):
+ splitter = Splitters(splitter)
+ if isinstance(splitter, Splitters):
+ if splitter == Splitters.Simple:
+ return SimpleSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+ if splitter == Splitters.LlamaIndex:
+ return LlamaIndexSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
+ raise ValueError(f"Unknown splitter {splitter}")
+ raise NotImplementedError(f"Cannot create splitter from {splitter.__class__.__name__}")
+
+
+class SimpleSplitter(Splitter):
+ class Config:
+ type_alias = "evidently:splitter:SimpleSplitter"
+
+ split_re: ClassVar = re.compile(r"([^,.;。?!]+[,.;。?!]?)")
+
+ def split_text(self, text: TextSource) -> Sequence[Chunk]:
+ current_splits: List[str] = []
+ current_size = 0
+ for split in self.split_re.split(text.get_text()):
+ split_size = len(split)
+ if len(current_splits) > 0 and current_size + split_size > self.chunk_size:
+ yield "".join(current_splits)
+ while current_size > self.chunk_overlap and len(current_splits) > 0:
+ last, *current_splits = current_splits
+ last_size = len(last)
+ current_size -= last_size
+ current_size += split_size
+ current_splits.append(split)
+ if current_size > 0:
+ yield "".join(current_splits)
+
+
+class LlamaIndexSplitter(Splitter):
+ class Config:
+ type_alias = "evidently:splitter:LlamaIndexSplitter"
+
+ separator: str = " "
+ paragraph_separator: Optional[str] = None
+ _splitter = PrivateAttr(None)
+
+ @property
+ def splitter(self):
+ if self._splitter is None:
+ from llama_index.core.node_parser import SentenceSplitter
+ from llama_index.core.node_parser.text.sentence import DEFAULT_PARAGRAPH_SEP
+
+ self._splitter = SentenceSplitter(
+ chunk_size=self.chunk_size,
+ chunk_overlap=self.chunk_overlap,
+ separator=self.separator,
+ paragraph_separator=self.paragraph_separator or DEFAULT_PARAGRAPH_SEP,
+ )
+ return self._splitter
+
+ def split_text(self, text: TextSource) -> Sequence[Chunk]:
+ yield from self.splitter.split_text(text.get_text())
diff --git a/src/evidently/features/_registry.py b/src/evidently/features/_registry.py
index ba2e101f5f..b3f579a981 100644
--- a/src/evidently/features/_registry.py
+++ b/src/evidently/features/_registry.py
@@ -27,6 +27,11 @@
"evidently.features.hf_feature.HuggingFaceToxicityFeature",
"evidently:feature:HuggingFaceToxicityFeature",
)
+register_type_alias(
+ GeneratedFeatures,
+ "evidently.features.json_schema_match_feature.JSONSchemaMatch",
+ "evidently:feature:JSONSchemaMatch",
+)
register_type_alias(GeneratedFeatures, "evidently.features.llm_judge.LLMJudge", "evidently:feature:LLMJudge")
register_type_alias(
GeneratedFeatures,
@@ -52,6 +57,12 @@
register_type_alias(
GeneratedFeatures, "evidently.features.text_contains_feature.DoesNotContain", "evidently:feature:DoesNotContain"
)
+register_type_alias(
+ GeneratedFeatures, "evidently.features.text_contains_feature.ItemMatch", "evidently:feature:ItemMatch"
+)
+register_type_alias(
+ GeneratedFeatures, "evidently.features.text_contains_feature.ItemNoMatch", "evidently:feature:ItemNoMatch"
+)
register_type_alias(
GeneratedFeatures, "evidently.features.text_length_feature.TextLength", "evidently:feature:TextLength"
)
diff --git a/src/evidently/features/json_schema_match_feature.py b/src/evidently/features/json_schema_match_feature.py
new file mode 100644
index 0000000000..e81e8ba01b
--- /dev/null
+++ b/src/evidently/features/json_schema_match_feature.py
@@ -0,0 +1,76 @@
+import json
+from typing import ClassVar
+from typing import Dict
+from typing import Optional
+
+import pandas as pd
+
+from evidently.base_metric import ColumnName
+from evidently.core import ColumnType
+from evidently.features.generated_features import GeneratedFeature
+from evidently.utils.data_preprocessing import DataDefinition
+
+
+class JSONSchemaMatch(GeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:JSONSchemaMatch"
+
+ __feature_type__: ClassVar = ColumnType.Categorical
+ column_name: str
+ expected_schema: Dict[str, type]
+ validate_types: bool
+ exact_match: bool
+
+ def __init__(
+ self,
+ column_name: str,
+ expected_schema: Dict[str, type],
+ validate_types: bool = False,
+ exact_match: bool = False,
+ display_name: Optional[str] = None,
+ ):
+ self.column_name = column_name
+ self.validate_types = validate_types if not exact_match else True
+ self.expected_schema = expected_schema
+ self.exact_match = exact_match
+ self.display_name = display_name
+ super().__init__()
+
+ def _feature_column_name(self) -> str:
+ match_type = "exact" if self.exact_match else "minimal"
+ return f"{self.column_name}_json_schema_{match_type}_match"
+
+ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
+ calculated = data.apply(lambda row: self.match_json_schema(row[self.column_name]), axis=1)
+ return pd.DataFrame({self._feature_column_name(): calculated})
+
+ def match_json_schema(self, json_text: str) -> bool:
+ try:
+ json_obj = json.loads(json_text)
+ except json.JSONDecodeError:
+ return False
+
+ if self.exact_match:
+ return self._exact_match(json_obj)
+ else:
+ return self._minimal_match(json_obj)
+
+ def _minimal_match(self, json_obj: Dict) -> bool:
+ for key, expected_type in self.expected_schema.items():
+ if key not in json_obj or json_obj[key] is None:
+ return False
+ if self.validate_types and expected_type and not isinstance(json_obj[key], expected_type):
+ return False
+ return True
+
+ def _exact_match(self, json_obj: Dict) -> bool:
+ if set(json_obj.keys()) != set(self.expected_schema.keys()):
+ return False
+ return self._minimal_match(json_obj)
+
+ def _as_column(self) -> ColumnName:
+ match_type = "exact" if self.exact_match else "minimal"
+ return self._create_column(
+ self._feature_column_name(),
+ default_display_name=f"JSONSchemaMatch {match_type} match",
+ )
diff --git a/src/evidently/features/text_contains_feature.py b/src/evidently/features/text_contains_feature.py
index 31bd3a0975..6b909b95c0 100644
--- a/src/evidently/features/text_contains_feature.py
+++ b/src/evidently/features/text_contains_feature.py
@@ -112,3 +112,109 @@ def comparison(self, item: str, string: str):
if self.case_sensitive:
return item in string
return item.casefold() in string.casefold()
+
+
+class ItemMatch(GeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:ItemMatch"
+
+ __feature_type__: ClassVar = ColumnType.Categorical
+ columns: List[str]
+ case_sensitive: bool
+ mode: str
+
+ def __init__(
+ self,
+ columns: List[str],
+ case_sensitive: bool = True,
+ mode: str = "any",
+ display_name: Optional[str] = None,
+ ):
+ if len(columns) != 2:
+ raise ValueError("two columns must be provided")
+ self.columns = columns
+ self.display_name = display_name
+ self.case_sensitive = case_sensitive
+ if mode not in ["any", "all"]:
+ raise ValueError("mode must be either 'any' or 'all'")
+ self.mode = mode
+ super().__init__()
+
+ def _feature_column_name(self) -> str:
+ return f"{self.columns[0]}_{self.columns[1]}" + "_item_match_" + str(self.case_sensitive) + "_" + self.mode
+
+ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
+ if self.mode == "any":
+ calculated = data.apply(
+ lambda row: any(self.comparison(word, row[self.columns[0]]) for word in row[self.columns[1]]),
+ axis=1,
+ )
+ else:
+ calculated = data.apply(
+ lambda row: all(self.comparison(word, row[self.columns[0]]) for word in row[self.columns[1]]),
+ axis=1,
+ )
+ return pd.DataFrame({self._feature_column_name(): calculated})
+
+ def _as_column(self) -> ColumnName:
+ return self._create_column(
+ self._feature_column_name(),
+ default_display_name=f"Text contains {self.mode} of defined items",
+ )
+
+ def comparison(self, item: str, string: str):
+ if self.case_sensitive:
+ return item in string
+ return item.casefold() in string.casefold()
+
+
+class ItemNoMatch(GeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:ItemNoMatch"
+
+ __feature_type__: ClassVar = ColumnType.Categorical
+ columns: List[str]
+ case_sensitive: bool
+ mode: str
+
+ def __init__(
+ self,
+ columns: List[str],
+ case_sensitive: bool = True,
+ mode: str = "any",
+ display_name: Optional[str] = None,
+ ):
+ self.columns = columns
+ self.display_name = display_name
+ self.case_sensitive = case_sensitive
+ if mode not in ["any", "all"]:
+ raise ValueError("mode must be either 'any' or 'all'")
+ self.mode = mode
+ super().__init__()
+
+ def _feature_column_name(self) -> str:
+ return f"{self.columns[0]}_{self.columns[1]}" + "_item_no_match_" + str(self.case_sensitive) + "_" + self.mode
+
+ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
+ if self.mode == "any":
+ calculated = data.apply(
+ lambda row: not any(self.comparison(word, row[self.columns[0]]) for word in row[self.columns[1]]),
+ axis=1,
+ )
+ else:
+ calculated = data.apply(
+ lambda row: not all(self.comparison(word, row[self.columns[0]]) for word in row[self.columns[1]]),
+ axis=1,
+ )
+ return pd.DataFrame({self._feature_column_name(): calculated})
+
+ def _as_column(self) -> ColumnName:
+ return self._create_column(
+ self._feature_column_name(),
+ default_display_name=f"Text does not contain {self.mode} of defined items",
+ )
+
+ def comparison(self, item: str, string: str):
+ if self.case_sensitive:
+ return item in string
+ return item.casefold() in string.casefold()
diff --git a/src/evidently/metrics/__init__.py b/src/evidently/metrics/__init__.py
index c88a28babf..773b77626c 100644
--- a/src/evidently/metrics/__init__.py
+++ b/src/evidently/metrics/__init__.py
@@ -32,6 +32,7 @@
from .data_integrity.column_summary_metric import ColumnSummaryMetric
from .data_integrity.dataset_missing_values_metric import DatasetMissingValuesMetric
from .data_integrity.dataset_summary_metric import DatasetSummaryMetric
+from .data_integrity.rouge_summary_metric import ROUGESummaryMetric
from .data_quality.column_category_metric import ColumnCategoryMetric
from .data_quality.column_correlations_metric import ColumnCorrelationsMetric
from .data_quality.column_distribution_metric import ColumnDistributionMetric
@@ -99,6 +100,7 @@
"ColumnSummaryMetric",
"DatasetMissingValuesMetric",
"DatasetSummaryMetric",
+ "ROUGESummaryMetric",
"ColumnCategoryMetric",
"ColumnCorrelationsMetric",
"ColumnDistributionMetric",
diff --git a/src/evidently/metrics/_registry.py b/src/evidently/metrics/_registry.py
index 1ed0ce8345..26f6e58a8a 100644
--- a/src/evidently/metrics/_registry.py
+++ b/src/evidently/metrics/_registry.py
@@ -138,6 +138,13 @@
"evidently.metrics.data_integrity.dataset_summary_metric.DatasetSummaryMetric",
"evidently:metric:DatasetSummaryMetric",
)
+
+register_type_alias(
+ Metric,
+ "evidently.metrics.data_integrity.rouge_summary_metric.ROUGESummaryMetric",
+ "evidently:metric:ROUGESummaryMetric",
+)
+
register_type_alias(
Metric,
"evidently.metrics.data_quality.column_category_metric.ColumnCategoryMetric",
@@ -570,6 +577,11 @@
"evidently.metrics.data_integrity.dataset_summary_metric.DatasetSummaryMetricResult",
"evidently:metric_result:DatasetSummaryMetricResult",
)
+register_type_alias(
+ MetricResult,
+ "evidently.metrics.data_integrity.rouge_summary_metric.ROUGESummaryMetricResult",
+ "evidently:metric_result:ROUGESummaryMetricResult",
+)
register_type_alias(
MetricResult,
"evidently.metrics.data_quality.column_category_metric.CategoryStat",
diff --git a/src/evidently/metrics/data_integrity/rouge_summary_metric.py b/src/evidently/metrics/data_integrity/rouge_summary_metric.py
new file mode 100644
index 0000000000..c9c53aeb2b
--- /dev/null
+++ b/src/evidently/metrics/data_integrity/rouge_summary_metric.py
@@ -0,0 +1,103 @@
+from typing import List
+
+import evaluate
+import pandas as pd
+
+from evidently.base_metric import InputData
+from evidently.base_metric import Metric
+from evidently.base_metric import MetricResult
+from evidently.core import IncludeTags
+from evidently.model.widget import BaseWidgetInfo
+from evidently.options.base import AnyOptions
+from evidently.renderers.base_renderer import MetricRenderer
+from evidently.renderers.base_renderer import default_renderer
+from evidently.renderers.html_widgets import header_text
+from evidently.renderers.html_widgets import table_data
+from evidently.renderers.html_widgets import text_widget
+
+
+class ROUGESummaryMetricResult(MetricResult):
+ class Config:
+ type_alias = "evidently:metric_result:ROUGESummaryMetricResult"
+ field_tags = {
+ "current": {IncludeTags.Current},
+ "reference": {IncludeTags.Reference},
+ "rouge_type": {IncludeTags.Parameter},
+ "per_row_scores": {IncludeTags.Parameter},
+ "summary_score": {IncludeTags.Parameter},
+ }
+
+ current: list
+ reference: list
+ rouge_type: str
+ per_row_scores: list
+ summary_score: float
+
+
+class ROUGESummaryMetric(Metric[ROUGESummaryMetricResult]):
+ class Config:
+ type_alias = "evidently:metric:ROUGESummaryMetric"
+ arbitrary_types_allowed = True
+
+ column_name: str
+ rouge_n: int
+
+ def __init__(self, column_name: str, rouge_n: int, options: AnyOptions = None):
+ self.column_name = column_name
+ self.rouge_n = rouge_n
+ super().__init__(options=options)
+
+ def _calculate_summary_rouge(self, current: pd.Series, reference: pd.Series):
+ rouge_evaluator = evaluate.load("rouge")
+
+ current = current.astype(str).tolist()
+ reference = reference.astype(str).tolist()
+
+ rouge_scores = rouge_evaluator.compute(
+ rouge_types=[f"rouge{self.rouge_n}"], predictions=current, references=reference, use_aggregator=False
+ )
+
+ per_row_rouge_scores = rouge_scores[f"rouge{self.rouge_n}"]
+
+ summary_rouge_score = sum(per_row_rouge_scores) / len(per_row_rouge_scores)
+
+ return per_row_rouge_scores, summary_rouge_score, current, reference
+
+ def calculate(self, data: InputData) -> ROUGESummaryMetricResult:
+ if data.current_data is None or data.reference_data is None:
+ raise ValueError("The current data or the reference data is None.")
+ if len(data.current_data[self.column_name]) == 0 or len(data.reference_data[self.column_name]) == 0:
+ raise ValueError("The current data or the reference data is empty.")
+
+ per_row_rouge_scores, summary_rouge_score, current, reference = self._calculate_summary_rouge(
+ data.current_data[self.column_name], data.reference_data[self.column_name]
+ )
+
+ result = ROUGESummaryMetricResult(
+ rouge_type=f"ROUGE-{self.rouge_n}",
+ per_row_scores=per_row_rouge_scores,
+ summary_score=summary_rouge_score,
+ current=current,
+ reference=reference,
+ )
+ return result
+
+
+@default_renderer(wrap_type=ROUGESummaryMetric)
+class ROUGESummaryMetricRenderer(MetricRenderer):
+ @staticmethod
+ def _get_table(metric) -> BaseWidgetInfo:
+ column_names = ["Metric", "current", "reference", "score"]
+ rows = []
+ for i in range(len(metric.current)):
+ rows.append([metric.rouge_type, metric.current[i], metric.reference[i], metric.per_row_scores[i]])
+ # rows.append(["metric.rouge_type", 1, "metric.current[i]", "metric.reference[i]", 2.4])
+ return table_data(title="", column_names=column_names, data=rows)
+
+ def render_html(self, obj: ROUGESummaryMetric) -> List[BaseWidgetInfo]:
+ metric = obj.get_result()
+ return [
+ header_text(label="ROUGE Metric"),
+ self._get_table(metric),
+ text_widget(text=f"{metric.summary_score}", title="Overall ROUGE score"),
+ ]
diff --git a/src/evidently/suite/base_suite.py b/src/evidently/suite/base_suite.py
index 3022c3cdac..2e109afeaf 100644
--- a/src/evidently/suite/base_suite.py
+++ b/src/evidently/suite/base_suite.py
@@ -500,6 +500,7 @@ def __iter__(self) -> Iterator[Tuple[str, str, DatasetID]]:
class SnapshotLinks(BaseModel):
datasets: DatasetInputOutputLinks = DatasetInputOutputLinks()
computation_config_id: Optional[ComputationConfigID] = None
+ task_id: Optional[str] = None
class Snapshot(BaseModel):
diff --git a/src/evidently/ui/config.py b/src/evidently/ui/config.py
index 52bdc36adb..53a35f00a0 100644
--- a/src/evidently/ui/config.py
+++ b/src/evidently/ui/config.py
@@ -117,8 +117,13 @@ def load_config(config_type: Type[TConfig], box: dict) -> TConfig:
continue
if section in ("renamed_vars", "dict_itemiterator"):
continue
- if section in config_type.__fields__:
- component = parse_obj_as(config_type.__fields__[section].type_, component_dict)
+ if section == "additional_components":
+ for subsection, compoennt_subdict in component_dict.items():
+ component = parse_obj_as(SECTION_COMPONENT_TYPE_MAPPING.get(subsection, Component), compoennt_subdict)
+ components[subsection] = component
+ elif section in config_type.__fields__:
+ type_ = config_type.__fields__[section].type_
+ component = parse_obj_as(type_, component_dict)
named_components[section] = component
elif section in SECTION_COMPONENT_TYPE_MAPPING:
component = parse_obj_as(SECTION_COMPONENT_TYPE_MAPPING[section], component_dict)
diff --git a/src/evidently/utils/llm/__init__.py b/src/evidently/utils/llm/__init__.py
new file mode 100644
index 0000000000..4bfe1f7c80
--- /dev/null
+++ b/src/evidently/utils/llm/__init__.py
@@ -0,0 +1,3 @@
+from . import _registry
+
+__all__ = ["_registry"]
diff --git a/src/evidently/utils/llm/_registry.py b/src/evidently/utils/llm/_registry.py
new file mode 100644
index 0000000000..63f06a4ade
--- /dev/null
+++ b/src/evidently/utils/llm/_registry.py
@@ -0,0 +1,21 @@
+from evidently.pydantic_utils import register_type_alias
+from evidently.utils.llm.prompts import PromptBlock
+from evidently.utils.llm.prompts import PromptTemplate
+
+register_type_alias(PromptBlock, "evidently.utils.llm.prompts.Anchor", "evidently:prompt_block:Anchor")
+register_type_alias(
+ PromptBlock, "evidently.utils.llm.prompts.JsonOutputFormatBlock", "evidently:prompt_block:JsonOutputFormatBlock"
+)
+register_type_alias(
+ PromptBlock, "evidently.utils.llm.prompts.NoopOutputFormat", "evidently:prompt_block:NoopOutputFormat"
+)
+register_type_alias(PromptBlock, "evidently.utils.llm.prompts.SimpleBlock", "evidently:prompt_block:SimpleBlock")
+register_type_alias(
+ PromptBlock, "evidently.utils.llm.prompts.StringFormatBlock", "evidently:prompt_block:StringFormatBlock"
+)
+register_type_alias(
+ PromptBlock, "evidently.utils.llm.prompts.StringListFormatBlock", "evidently:prompt_block:StringListFormatBlock"
+)
+register_type_alias(
+ PromptTemplate, "evidently.utils.llm.prompts.BlockPromptTemplate", "evidently:prompt_template:BlockPromptTemplate"
+)
diff --git a/src/evidently/utils/llm/base.py b/src/evidently/utils/llm/base.py
new file mode 100644
index 0000000000..2abf77b571
--- /dev/null
+++ b/src/evidently/utils/llm/base.py
@@ -0,0 +1,20 @@
+import dataclasses
+from typing import Any
+from typing import Dict
+
+
+@dataclasses.dataclass
+class LLMMessage:
+ role: str
+ content: str
+
+ @classmethod
+ def user(cls, message: str):
+ return LLMMessage("user", message)
+
+ @classmethod
+ def system(cls, message: str):
+ return LLMMessage("system", message)
+
+
+LLMResponse = Dict[str, Any]
diff --git a/src/evidently/utils/llm/errors.py b/src/evidently/utils/llm/errors.py
new file mode 100644
index 0000000000..606fb62542
--- /dev/null
+++ b/src/evidently/utils/llm/errors.py
@@ -0,0 +1,13 @@
+from evidently.errors import EvidentlyError
+
+
+class EvidentlyLLMError(EvidentlyError):
+ pass
+
+
+class LLMResponseParseError(EvidentlyLLMError):
+ pass
+
+
+class LLMRequestError(EvidentlyLLMError):
+ pass
diff --git a/src/evidently/utils/llm/prompts.py b/src/evidently/utils/llm/prompts.py
new file mode 100644
index 0000000000..bc0eed4749
--- /dev/null
+++ b/src/evidently/utils/llm/prompts.py
@@ -0,0 +1,275 @@
+import inspect
+import json
+import re
+from abc import ABC
+from abc import abstractmethod
+from functools import wraps
+from typing import Any
+from typing import Callable
+from typing import ClassVar
+from typing import Dict
+from typing import Generic
+from typing import Iterator
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Tuple
+from typing import Type
+from typing import TypeVar
+from typing import Union
+
+import typing_inspect
+
+from evidently.pydantic_utils import EvidentlyBaseModel
+from evidently.utils.llm.base import LLMMessage
+from evidently.utils.llm.errors import LLMResponseParseError
+from evidently.utils.llm.wrapper import LLMRequest
+
+TResult = TypeVar("TResult")
+
+
+class PromptBlock(EvidentlyBaseModel):
+ class Config:
+ is_base_type = True
+
+ def render(self):
+ # )))
+ result = self._render()
+ for field in self.__fields__:
+ placeholder = f"{{{field}}}"
+ if placeholder in result:
+ result = result.replace(placeholder, getattr(self, field))
+ return result
+
+ @abstractmethod
+ def _render(self) -> str:
+ raise NotImplementedError
+
+ @classmethod
+ def simple(cls, value: str):
+ return SimpleBlock(value=value)
+
+ @classmethod
+ def input(cls, placeholder_name: str = "input"):
+ return SimpleBlock(value=f"{{{placeholder_name}}}")
+
+ @classmethod
+ def json_output(cls, **fields: Union[str, Tuple[str, str]]):
+ return JsonOutputFormatBlock(fields=fields)
+
+ @classmethod
+ def string_list_output(cls, of_what: str):
+ return StringListFormatBlock(of_what=of_what)
+
+ @classmethod
+ def string_output(cls, what: str):
+ return StringFormatBlock(what=what)
+
+ def anchored(self, start: str = "__start__", end: str = "__end__"):
+ return Anchor(start=start, block=self, end=end)
+
+
+class Anchor(PromptBlock):
+ class Config:
+ type_alias = "evidently:prompt_block:Anchor"
+
+ start: str
+ block: PromptBlock
+ end: str
+
+ def _render(self) -> str:
+ return f"{self.start}\n{self.block.render()}\n{self.end}"
+
+
+class SimpleBlock(PromptBlock):
+ class Config:
+ type_alias = "evidently:prompt_block:SimpleBlock"
+
+ value: str
+
+ def _render(self) -> str:
+ return self.value
+
+
+class OutputFormatBlock(PromptBlock, ABC, Generic[TResult]):
+ @abstractmethod
+ def parse_response(self, response: str) -> TResult:
+ raise NotImplementedError
+
+
+class NoopOutputFormat(OutputFormatBlock[str]):
+ class Config:
+ type_alias = "evidently:prompt_block:NoopOutputFormat"
+
+ def _render(self) -> str:
+ return ""
+
+ def parse_response(self, response: str) -> str:
+ return response
+
+
+class JsonOutputFormatBlock(OutputFormatBlock[Dict[str, Any]]):
+ class Config:
+ type_alias = "evidently:prompt_block:JsonOutputFormatBlock"
+
+ fields: Dict[str, Union[Tuple[str, str], str]]
+
+ def _render(self) -> str:
+ values = []
+ example_rows = []
+ for field, descr in self.fields.items():
+ if isinstance(descr, tuple):
+ descr, field_key = descr
+ else:
+ field_key = field
+ values.append(field)
+ example_rows.append(f'"{field_key}": "{descr}"')
+
+ example_rows_str = "\n".join(example_rows)
+ return f"Return {', '.join(values)} formatted as json without formatting as follows:\n{{{{\n{example_rows_str}\n}}}}"
+
+ def parse_response(self, response: str) -> Dict[str, Any]:
+ try:
+ return json.loads(response)
+ except json.JSONDecodeError as e:
+ raise LLMResponseParseError(f"Failed to parse response '{response}' as json") from e
+
+
+class StringListFormatBlock(OutputFormatBlock[List[str]]):
+ class Config:
+ type_alias = "evidently:prompt_block:StringListFormatBlock"
+
+ of_what: str
+
+ def _render(self) -> str:
+ return f"""Return a list of {self.of_what}.
+This should be only a list of string {self.of_what}, each one on a new line with no enumeration"""
+
+ def parse_response(self, response: str) -> List[str]:
+ return response.split("\n")
+
+
+class StringFormatBlock(OutputFormatBlock[str]):
+ class Config:
+ type_alias = "evidently:prompt_block:StringFormatBlock"
+
+ what: str
+
+ def _render(self) -> str:
+ return f"""Return {self.what} only."""
+
+ def parse_response(self, response: str) -> str:
+ return response
+
+
+def llm_call(f: Callable) -> Callable[..., LLMRequest]:
+ sig = inspect.getfullargspec(f)
+ response_type = sig.annotations.get("return", str)
+
+ @wraps(f)
+ def inner(self: PromptTemplate, *args, **kwargs):
+ kwargs = inspect.getcallargs(f, *args, **kwargs, self=self)
+ del kwargs["self"]
+ template = self.get_template()
+ placeholders = self.list_placeholders(template)
+ if set(placeholders) != set(kwargs.keys()):
+ raise TypeError(
+ f"{f} arg signature ({list(kwargs)}) does not correspond to placeholders in prompt ({placeholders})"
+ )
+
+ output_format = self.get_output_format()
+ prompt_response_type = _get_genric_arg(output_format.__class__)
+ if prompt_response_type != response_type:
+ raise TypeError(
+ f"{f} response type ({response_type}) does not correspond to prompt output type {prompt_response_type}"
+ )
+
+ # todo: validate kwargs against sig.annotations
+ # todo: define response parser with validation against response_type
+
+ return LLMRequest(
+ messages=self.get_messages(kwargs, template=template),
+ response_parser=self.parse,
+ response_type=response_type,
+ )
+
+ return inner
+
+
+def _get_genric_arg(cls: Type):
+ return typing_inspect.get_args(next(b for b in cls.__orig_bases__ if typing_inspect.is_generic_type(b)))[0]
+
+
+placeholders_re = re.compile(r"\{([a-zA-Z0-9_]+)}")
+
+
+class PromptTemplate(EvidentlyBaseModel):
+ class Config:
+ is_base_type = True
+
+ # __run_func__ : ClassVar[Callable]
+ @abstractmethod
+ def get_blocks(self) -> Sequence[PromptBlock]:
+ raise NotImplementedError
+
+ def iterate(self, values: Sequence[Dict[str, str]]) -> Iterator[str]:
+ template = self.get_template()
+ for vals in values:
+ yield self.render(vals, template)
+
+ def render(self, values: dict, template: Optional[str] = None):
+ return (template or self.get_template()).format(**values)
+
+ def get_template(self) -> str:
+ return "\n".join(block.render() for block in self.get_blocks())
+
+ def list_placeholders(self, template: Optional[str] = None):
+ template = template or self.get_template()
+ return list(placeholders_re.findall(template))
+
+ def get_output_format(self) -> OutputFormatBlock:
+ output: Optional[OutputFormatBlock] = next(
+ (b for b in self.get_blocks() if isinstance(b, OutputFormatBlock)), None
+ )
+ return output if output is not None else NoopOutputFormat() # type: ignore[return-value]
+
+ def parse(self, response: str, keys: Optional[List[str]] = None) -> Dict[str, Any]:
+ output = self.get_output_format()
+ parsed = output.parse_response(response)
+ if keys is not None and set(keys) != set(parsed.keys()):
+ raise LLMResponseParseError(f"Keys {keys} are required but got {list(parsed.keys())}")
+ return parsed
+
+ def get_messages(self, values, template: Optional[str] = None) -> List[LLMMessage]:
+ return [LLMMessage.user(self.render(values, template))]
+
+
+class WithSystemPrompt(PromptTemplate, ABC):
+ system_prompt: str
+
+ def get_messages(self, values, template: Optional[str] = None) -> List[LLMMessage]:
+ msgs = super().get_messages(values, template)
+ msgs.insert(0, LLMMessage.system(self.system_prompt))
+ return msgs
+
+
+AnyBlock = Union[str, PromptBlock, Callable]
+
+
+class BlockPromptTemplate(PromptTemplate):
+ class Config:
+ type_alias = "evidently:prompt_template:BlockPromptTemplate"
+
+ blocks: ClassVar[List[AnyBlock]]
+
+ def get_blocks(self) -> Sequence[PromptBlock]:
+ return [self._to_block(b) for b in self.blocks]
+
+ def _to_block(self, block: AnyBlock) -> PromptBlock:
+ if isinstance(block, PromptBlock):
+ return block
+ if isinstance(block, str):
+ return PromptBlock.simple(block)
+ # if callable(block): todo
+ # return PromptBlock.func(block)
+ raise NotImplementedError(f"Cannot create promt block from {block}")
diff --git a/src/evidently/utils/llm/wrapper.py b/src/evidently/utils/llm/wrapper.py
new file mode 100644
index 0000000000..ef26cdb68d
--- /dev/null
+++ b/src/evidently/utils/llm/wrapper.py
@@ -0,0 +1,215 @@
+import asyncio
+import dataclasses
+import datetime
+from abc import ABC
+from abc import abstractmethod
+from asyncio import Lock
+from asyncio import Semaphore
+from asyncio import sleep
+from typing import Callable
+from typing import ClassVar
+from typing import Dict
+from typing import Generic
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Tuple
+from typing import Type
+from typing import TypeVar
+
+from evidently._pydantic_compat import SecretStr
+from evidently.options.base import Options
+from evidently.options.option import Option
+from evidently.ui.base import sync_api
+from evidently.utils.llm.base import LLMMessage
+from evidently.utils.llm.errors import LLMRequestError
+
+TResult = TypeVar("TResult")
+
+
+class RateLimiter:
+ def __init__(self, rate: Optional[int], interval: datetime.timedelta):
+ self.rate = rate
+ self.interval = interval
+ self.enters: List[datetime.datetime] = []
+ self.lock = Lock()
+
+ async def __aenter__(self):
+ if self.rate is None:
+ return
+ while True:
+ async with self.lock:
+ await self._clean()
+ if len(self.enters) < self.rate:
+ self.enters.append(datetime.datetime.now())
+ break
+ await sleep(0.1)
+
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ async def _clean(self):
+ now = datetime.datetime.now()
+ self.enters = [e for e in self.enters if now - e < self.interval]
+
+
+@dataclasses.dataclass
+class LLMRequest(Generic[TResult]):
+ messages: List[LLMMessage]
+ response_parser: Callable[[str], TResult]
+ response_type: Type[TResult]
+ retries: int = 1
+
+
+class LLMWrapper(ABC):
+ __used_options__: ClassVar[List[Type[Option]]] = []
+
+ @abstractmethod
+ async def complete(self, messages: List[LLMMessage]) -> str:
+ raise NotImplementedError
+
+ async def complete_batch(
+ self, messages_batch: List[List[LLMMessage]], batch_size: Optional[int] = None, rpm_limit: Optional[int] = None
+ ) -> List[str]:
+ if batch_size is None:
+ batch_size = self.get_batch_size()
+ if rpm_limit is None:
+ rpm_limit = self.get_rpm_limit()
+ rate_limiter = RateLimiter(rate=rpm_limit, interval=datetime.timedelta(minutes=1))
+ semaphore = Semaphore(batch_size)
+
+ async def work(messages: List[LLMMessage]) -> str:
+ async with semaphore, rate_limiter:
+ return await self.complete(messages)
+
+ return await asyncio.gather(*[work(msgs) for msgs in messages_batch])
+
+ async def run(self, request: LLMRequest[TResult]) -> TResult:
+ num_retries = request.retries
+ error = None
+ while num_retries >= 0:
+ num_retries -= 1
+ try:
+ response = await self.complete(request.messages)
+ return request.response_parser(response)
+ except Exception as e:
+ error = e
+ raise error
+
+ async def run_batch(
+ self, requests: Sequence[LLMRequest[TResult]], batch_size: Optional[int] = None, rpm_limit: Optional[int] = None
+ ) -> List[TResult]:
+ if batch_size is None:
+ batch_size = self.get_batch_size()
+ if rpm_limit is None:
+ rpm_limit = self.get_rpm_limit()
+ rate_limiter = RateLimiter(rate=rpm_limit, interval=datetime.timedelta(minutes=1))
+ semaphore = Semaphore(batch_size)
+
+ async def work(request: LLMRequest[TResult]) -> TResult:
+ async with semaphore, rate_limiter:
+ return await self.run(request)
+
+ return await asyncio.gather(*[work(r) for r in requests])
+
+ def get_batch_size(self) -> int:
+ return 100
+
+ def get_rpm_limit(self) -> Optional[int]:
+ return None
+
+ def get_used_options(self) -> List[Type[Option]]:
+ return self.__used_options__
+
+ complete_batch_sync = sync_api(complete_batch)
+ run_sync = sync_api(run)
+ run_batch_sync = sync_api(run_batch)
+
+
+LLMProvider = str
+LLMModel = str
+LLMWrapperProvider = Callable[[LLMModel, Options], LLMWrapper]
+_wrappers: Dict[Tuple[LLMProvider, Optional[LLMModel]], LLMWrapperProvider] = {}
+
+
+def llm_provider(name: LLMProvider, model: Optional[LLMModel]):
+ def dec(f: LLMWrapperProvider):
+ _wrappers[(name, model)] = f
+ return f
+
+ return dec
+
+
+def get_llm_wrapper(provider: LLMProvider, model: LLMModel, options: Options) -> LLMWrapper:
+ key: Tuple[str, Optional[str]] = (provider, model)
+ if key in _wrappers:
+ return _wrappers[key](model, options)
+ key = (provider, None)
+ if key in _wrappers:
+ return _wrappers[key](model, options)
+ raise ValueError(f"LLM wrapper for provider {provider} model {model} not found")
+
+
+class OpenAIKey(Option):
+ api_key: Optional[SecretStr] = None
+ rpm_limit: int = 500
+
+ def __init__(self, api_key: Optional[str] = None):
+ self.api_key = SecretStr(api_key) if api_key is not None else None
+ super().__init__()
+
+ def get_api_key(self) -> Optional[str]:
+ if self.api_key is None:
+ return None
+ return self.api_key.get_secret_value()
+
+
+@llm_provider("openai", None)
+class OpenAIWrapper(LLMWrapper):
+ __used_options__: ClassVar = [OpenAIKey]
+
+ def __init__(self, model: str, options: Options):
+ import openai
+
+ self.model = model
+ self.options = options.get(OpenAIKey)
+ self._clients: Dict[int, openai.AsyncOpenAI] = {}
+
+ @property
+ def client(self):
+ import openai
+
+ try:
+ loop = asyncio.get_running_loop()
+ except RuntimeError as e:
+ raise RuntimeError("Cannot access OpenAIWrapper client without loop") from e
+ loop_id = id(loop)
+ if loop_id not in self._clients:
+ self._clients[loop_id] = openai.AsyncOpenAI(api_key=self.options.get_api_key())
+ return self._clients[loop_id]
+
+ async def complete(self, messages: List[LLMMessage]) -> str:
+ import openai
+
+ messages = [{"role": msg.role, "content": msg.content} for msg in messages]
+ try:
+ response = await self.client.chat.completions.create(model=self.model, messages=messages) # type: ignore[arg-type]
+ except openai.OpenAIError as e:
+ raise LLMRequestError("Failed to call OpenAI complete API") from e
+ content = response.choices[0].message.content
+ assert content is not None # todo: better error
+ return content
+
+ def get_rpm_limit(self) -> Optional[int]:
+ return self.options.rpm_limit
+
+
+@llm_provider("litellm", None)
+class LiteLLMWrapper(LLMWrapper):
+ def __init__(self, model: str):
+ self.model = model
+
+ async def complete(self, messages: List[LLMMessage]) -> str:
+ from litellm import completion
+
+ return completion(model=self.model, messages=messages).choices[0].message.content
diff --git a/tests/dataset_generator/__init__.py b/tests/dataset_generator/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/features/test_json_schema_match_feature.py b/tests/features/test_json_schema_match_feature.py
new file mode 100644
index 0000000000..42b2e704f7
--- /dev/null
+++ b/tests/features/test_json_schema_match_feature.py
@@ -0,0 +1,147 @@
+from typing import Any
+from typing import Dict
+
+import pandas as pd
+import pytest
+
+from evidently.features.json_schema_match_feature import JSONSchemaMatch
+
+
+@pytest.mark.parametrize(
+ ("column_value, expected_schema, validate_types, exact_match, expected_output"),
+ [
+ # Invalid JSON
+ ('{"name": "Invalid json"]', {"name": str, "age": int}, False, False, False),
+ # Exact Match
+ ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, True, True, True),
+ ('{"name": "Jane", "age": 25}', {"name": str, "age": int, "city": str}, True, True, False),
+ ('{"name": "Jane", "age": 25, "city": "New York"}', {"name": str, "age": int}, True, True, False),
+ ('{"name": "Jane", "age": 25}', {"name": int, "age": int}, True, True, False),
+ # Minimal Match without type validation
+ ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, False, False, True),
+ ('{"name": "Jane", "age": 25, "city": "New York"}', {"name": str, "age": int}, False, False, True),
+ ('{"name": "Jane", "age": null, "city": "New York"}', {"name": str, "age": int}, False, False, False),
+ # Minimal Match with type validation
+ ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, True, False, True),
+ (
+ '{"name": "Jane", "age": "25"}',
+ {"name": str, "age": int},
+ True,
+ False,
+ False,
+ ), # Fail due to type mismatch (age as string)
+ ],
+)
+def test_match_json_schema(
+ column_value: str, expected_schema: Dict[str, type], validate_types: bool, exact_match: bool, expected_output: bool
+):
+ schema_match = JSONSchemaMatch(
+ expected_schema=expected_schema,
+ validate_types=validate_types,
+ exact_match=exact_match,
+ column_name="TestColumnName",
+ )
+ result = schema_match.match_json_schema(json_text=column_value)
+ assert result == expected_output
+
+
+@pytest.mark.parametrize(
+ ("json_obj, expected_schema, validate_types, expected_output"),
+ [
+ # Minimal Match with type validation
+ ({"name": "Jane", "age": 25}, {"name": str, "age": int}, True, True),
+ ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, True, False),
+ ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int}, True, True),
+ ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int, "region": str}, True, False),
+ ({"name": "Jane", "age": None, "city": "New York"}, {"name": str, "age": int}, True, False),
+ # Minimal Match without type validation
+ ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, False, True),
+ ({"name": "Jane", "age": None, "city": "New York"}, {"name": str, "age": int}, False, False),
+ ],
+)
+def test_minimal_match(
+ json_obj: Dict[str, Any], expected_schema: Dict[str, type], validate_types: bool, expected_output: bool
+):
+ schema_match = JSONSchemaMatch(
+ expected_schema=expected_schema, validate_types=validate_types, exact_match=False, column_name="TestColumnName"
+ )
+ result = schema_match._minimal_match(json_obj)
+ assert result == expected_output
+
+
+@pytest.mark.parametrize(
+ ("json_obj, expected_schema, validate_types, expected_output"),
+ [
+ # Exact Match
+ ({"name": "Jane", "age": 25}, {"name": str, "age": int}, True, True),
+ ({"name": "Jane", "age": 25}, {"name": str, "age": int}, False, True),
+ ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, True, False),
+ ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int}, True, False),
+ ({"name": "Jane", "age": 25}, {"name": str, "age": int, "city": str}, True, False),
+ (
+ {"name": "Jane", "age": 25, "city": ["New York", "California"]},
+ {"name": str, "age": int, "city": list},
+ True,
+ True,
+ ),
+ (
+ {"name": "Jane", "age": 25, "city": ["New York", "California"]},
+ {"name": str, "age": int, "city": dict},
+ True,
+ False,
+ ),
+ ],
+)
+def test_exact_match(
+ json_obj: Dict[str, Any], expected_schema: Dict[str, type], validate_types: bool, expected_output: bool
+):
+ schema_match = JSONSchemaMatch(
+ expected_schema=expected_schema, validate_types=validate_types, exact_match=False, column_name="TestColumnName"
+ )
+ result = schema_match._exact_match(json_obj)
+ assert result == expected_output
+
+
+test_data = pd.DataFrame(
+ {
+ "TestColumnName": [
+ '{"name": "John", "age": 30, "city": "New York"}',
+ '{"name": "Jane", "age": null, "city": "London"}',
+ '{"name": "Mike", "age": 25, "city": "San Francisco"}',
+ '{"name": "Invalid json"]',
+ '{"name": "Anna", "age": "22", "country": "Canada"}',
+ ]
+ }
+)
+
+
+@pytest.mark.parametrize(
+ ("expected_schema, validate_types, exact_match, expected_output"),
+ [
+ # Minimal Match without type validation
+ ({"name": str, "age": int}, False, False, [True, False, True, False, True]),
+ # Minimal Match with type validation
+ ({"name": str, "age": int}, True, False, [True, False, True, False, False]),
+ # Exact Match
+ ({"name": str, "age": int, "city": str}, True, True, [True, False, True, False, False]),
+ ],
+)
+def test_generate_feature(
+ expected_schema: Dict[str, type], validate_types: bool, exact_match: bool, expected_output: list
+):
+ schema_match = JSONSchemaMatch(
+ expected_schema=expected_schema,
+ validate_types=validate_types,
+ exact_match=exact_match,
+ column_name="TestColumnName",
+ )
+ result = schema_match.generate_feature(test_data, None)
+ assert result[schema_match._feature_column_name()].tolist() == expected_output
+
+
+def test_generate_feature_column_name_dne():
+ schema_match = JSONSchemaMatch(
+ expected_schema={"test": str}, validate_types=False, exact_match=False, column_name="DNEColumn"
+ )
+ with pytest.raises(KeyError):
+ schema_match.generate_feature(test_data, None)
diff --git a/tests/features/test_text_contains_feature.py b/tests/features/test_text_contains_feature.py
index 51dceeb680..3b590c9de4 100644
--- a/tests/features/test_text_contains_feature.py
+++ b/tests/features/test_text_contains_feature.py
@@ -5,6 +5,8 @@
from evidently.features.text_contains_feature import Contains
from evidently.features.text_contains_feature import DoesNotContain
+from evidently.features.text_contains_feature import ItemMatch
+from evidently.features.text_contains_feature import ItemNoMatch
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.utils.data_preprocessing import create_data_definition
@@ -61,3 +63,83 @@ def test_text_not_contains_feature(items: List[str], case: bool, mode: str, expe
column_expected = feature_generator._feature_column_name()
expected_df = pd.DataFrame({column_expected: expected})
assert result.equals(expected_df)
+
+
+@pytest.mark.parametrize(
+ ("case", "mode", "expected"),
+ [
+ (True, "any", [False, True, False, True, False]),
+ (True, "all", [False, True, False, False, False]),
+ (False, "any", [True, True, True, True, False]),
+ (False, "all", [False, True, True, False, False]),
+ ],
+)
+def test_item_match(case: bool, mode: str, expected: List[bool]):
+ data = {
+ "generated": [
+ "You should consider purchasing Nike or Adidas shoes.",
+ "I eat apples, grapes, and oranges",
+ "grapes, oranges, apples.",
+ "Oranges are more sour than grapes.",
+ "This test doesn't have the words.",
+ ],
+ "expected": [
+ ["nike", "adidas", "puma"],
+ ["grapes", "apples", "oranges"],
+ ["Apples", "Oranges", "Grapes"],
+ ["orange", "sweet", "grape"],
+ ["none", "of", "these"],
+ ],
+ }
+ df = pd.DataFrame(data)
+ df["expected"] = df["expected"].apply(tuple)
+ feature_generator = ItemMatch(columns=["generated", "expected"], case_sensitive=case, mode=mode)
+ result = feature_generator.generate_feature(
+ data=df,
+ data_definition=create_data_definition(None, df, ColumnMapping()),
+ )
+ column_expected = feature_generator._feature_column_name()
+ column_name_obj = feature_generator._as_column()
+ expected_df = pd.DataFrame({column_expected: expected})
+ assert result.equals(expected_df)
+ assert column_name_obj.display_name == f"Text contains {mode} of defined items"
+
+
+@pytest.mark.parametrize(
+ ("case", "mode", "expected"),
+ [
+ (True, "any", [True, False, True, False, True]),
+ (True, "all", [True, False, True, True, True]),
+ (False, "any", [False, False, False, False, True]),
+ (False, "all", [True, False, False, True, True]),
+ ],
+)
+def test_item_no_match(case: bool, mode: str, expected: List[bool]):
+ data = {
+ "generated": [
+ "You should consider purchasing Nike or Adidas shoes.",
+ "I eat apples, grapes, and oranges",
+ "grapes, oranges, apples.",
+ "Oranges are more sour than grapes.",
+ "This test doesn't have the words.",
+ ],
+ "forbidden": [
+ ["nike", "adidas", "puma"],
+ ["grapes", "apples", "oranges"],
+ ["Apples", "Oranges", "Grapes"],
+ ["orange", "sweet", "grape"],
+ ["none", "of", "these"],
+ ],
+ }
+ feature_generator = ItemNoMatch(columns=["generated", "forbidden"], case_sensitive=case, mode=mode)
+ df = pd.DataFrame(data)
+ df["forbidden"] = df["forbidden"].apply(tuple)
+ result = feature_generator.generate_feature(
+ data=df,
+ data_definition=create_data_definition(None, df, ColumnMapping()),
+ )
+ column_expected = feature_generator._feature_column_name()
+ column_name_obj = feature_generator._as_column()
+ expected_df = pd.DataFrame({column_expected: expected})
+ assert result.equals(expected_df)
+ assert column_name_obj.display_name == f"Text does not contain {mode} of defined items"
diff --git a/tests/metrics/data_interity/test_dataset_rouge_summary_metric.py b/tests/metrics/data_interity/test_dataset_rouge_summary_metric.py
new file mode 100644
index 0000000000..814bf39ec2
--- /dev/null
+++ b/tests/metrics/data_interity/test_dataset_rouge_summary_metric.py
@@ -0,0 +1,45 @@
+import json
+
+import pandas as pd
+import pytest
+
+from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric
+from evidently.report.report import Report
+
+
+@pytest.mark.parametrize(
+ "current_df, reference_df, metric, expected_json",
+ (
+ (
+ pd.DataFrame(
+ {
+ "summary": ["hello there", "general kenobi"],
+ }
+ ),
+ pd.DataFrame({"summary": ["hello there", "no de"]}),
+ ROUGESummaryMetric(column_name="summary", rouge_n=1),
+ {
+ "current": ["hello there", "general kenobi"],
+ "reference": ["hello there", "no de"],
+ "rouge_type": "ROUGE-1",
+ "per_row_scores": [1.0, 0.0],
+ "summary_score": 0.5,
+ },
+ ),
+ ),
+)
+def test_rouge_summary_metric_with_report(
+ current_df: pd.DataFrame,
+ reference_df: pd.DataFrame,
+ metric,
+ expected_json: dict,
+) -> None:
+ report = Report(metrics=[metric])
+
+ report.run(current_data=current_df, reference_data=reference_df)
+
+ assert report.show()
+ json_result = report.json()
+ assert len(json_result) > 0
+ result = json.loads(json_result)
+ assert result["metrics"][0]["result"] == expected_json
diff --git a/tests/multitest/metrics/data_integrity.py b/tests/multitest/metrics/data_integrity.py
index d52ae6526a..7973928f44 100644
--- a/tests/multitest/metrics/data_integrity.py
+++ b/tests/multitest/metrics/data_integrity.py
@@ -16,6 +16,7 @@
from evidently.metrics.data_integrity.column_summary_metric import NumericCharacteristics
from evidently.metrics.data_integrity.dataset_missing_values_metric import DatasetMissingValuesMetric
from evidently.metrics.data_integrity.dataset_summary_metric import DatasetSummaryMetric
+from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric
from tests.multitest.conftest import AssertExpectedResult
from tests.multitest.conftest import Error
from tests.multitest.conftest import NoopOutcome
@@ -206,6 +207,27 @@ def dataset_summary_metric():
)
+@metric
+def rouge_summary_metric():
+ return TestMetric(
+ name="rouge_summary_metric",
+ metric=ROUGESummaryMetric(column_name="summary", rouge_n=1),
+ fingerprint="bfc616f760b973d2cbfbf0540c7b2c71",
+ outcomes=NoopOutcome(),
+ datasets=[
+ TestDataset(
+ "rouge_summary_metric_data",
+ current=pd.DataFrame(
+ {
+ "summary": ["hello there", "general kenobi"],
+ }
+ ),
+ reference=pd.DataFrame({"summary": ["hello there", "no de"]}),
+ ),
+ ],
+ )
+
+
@metric
def column_reg_exp_metric():
return TestMetric(
diff --git a/tests/test_pydantic_aliases.py b/tests/test_pydantic_aliases.py
index 488322edd3..0cd96d923c 100644
--- a/tests/test_pydantic_aliases.py
+++ b/tests/test_pydantic_aliases.py
@@ -16,6 +16,9 @@
from evidently.base_metric import MetricResult
from evidently.collector.config import CollectorTrigger
from evidently.collector.storage import CollectorStorage
+from evidently.experimental.dataset_generators.base import BaseDatasetGenerator
+from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider
+from evidently.experimental.dataset_generators.llm.splitter import Splitter
from evidently.features.generated_features import BaseDescriptor
from evidently.features.generated_features import GeneratedFeatures
from evidently.features.llm_judge import BaseLLMPromptTemplate
@@ -32,6 +35,8 @@
from evidently.tests.base_test import TestParameters
from evidently.ui.components.base import Component
from evidently.ui.dashboards.base import DashboardPanel
+from evidently.utils.llm.prompts import PromptBlock
+from evidently.utils.llm.prompts import PromptTemplate
T = TypeVar("T")
@@ -105,6 +110,11 @@ def test_all_aliases_correct():
CollectorStorage: "collector_storage",
BaseLLMPromptTemplate: "prompt_template",
DashboardPanel: "dashboard_panel",
+ BaseDatasetGenerator: "dataset_generator",
+ Splitter: "splitter",
+ DataCollectionProvider: "data_collecton_provider",
+ PromptBlock: "prompt_block",
+ PromptTemplate: "prompt_template",
}
skip = [Component]
skip_literal = [EvidentlyBaseModel, WithTestAndMetricDependencies, BasePreset]