diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e92b54ddfe..1093d9bbaf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -115,9 +115,7 @@ jobs: - name: Install minimal dependencies run: pip install -r requirements.min.txt - name: Install package - run: pip install -e .[dev,spark,fsspec] - - name: Run pip-audit - run: pip-audit --ignore-vuln PYSEC-2024-48 --ignore-vuln GHSA-jw8x-6495-233v --ignore-vuln GHSA-4hq2-rpgc-r8r7 + run: pip install -e .[dev,spark,fsspec,llm] - name: Run Tests run: python -m pytest --durations=50 test: @@ -155,7 +153,7 @@ jobs: uses: ./.github/share-actions/get-bikes-dataset-cached - name: Install package - run: pip install -e .[dev,spark,fsspec] + run: pip install -e .[dev,spark,fsspec,llm] - name: Run Tests run: python -m pytest --durations=50 @@ -173,7 +171,7 @@ jobs: cache: "pip" cache-dependency-path: setup.py - name: Install dependencies - run: pip install -e ".[dev]" + run: pip install -e . - name: Install wheel run: pip install wheel - name: Build package diff --git a/.github/workflows/ui.yml b/.github/workflows/ui.yml index 9bff9b1ae3..89dfb3bbb1 100644 --- a/.github/workflows/ui.yml +++ b/.github/workflows/ui.yml @@ -151,8 +151,8 @@ jobs: uses: ./.github/share-actions/ui-node-pnpm-install - name: Install Playwright Browsers - working-directory: ui - run: pnpm dlx playwright@1.43.0 install --with-deps + working-directory: ui/service + run: pnpm exec playwright install --with-deps chromium - name: 🔍 Get bikes dataset cached uses: ./.github/share-actions/get-bikes-dataset-cached @@ -162,7 +162,7 @@ jobs: - name: Wait UI to be ready to test working-directory: ui/service - run: pnpm wait-on tcp:127.0.0.1:8000 -t 200000 + run: pnpm wait-on tcp:127.0.0.1:8000 -t 4m - name: Run Service Playwright tests working-directory: ui/service diff --git a/docs/book/reference/all-metrics.md b/docs/book/reference/all-metrics.md index d697ba09d1..484440cd06 100644 --- a/docs/book/reference/all-metrics.md +++ b/docs/book/reference/all-metrics.md @@ -272,6 +272,9 @@ Check for regular expression matches. | **DoesNotContain()** Example use:
`DoesNotContain(items=["as a large language model"]` | **Required:**
`items: List[str]`

**Optional:** | | **IncludesWords()** Example use:
`IncludesWords(words_list=['booking', 'hotel', 'flight']` | **Required:**
`words_list: List[str]`

**Optional:** | | **ExcludesWords()** Example use:
`ExcludesWords(words_list=['buy', 'sell', 'bet']`| **Required:**
`words_list: List[str]`

**Optional:** | +| **ItemMatch()** Example use:
`ItemMatch(with_column="expected")`| **Required:**
`with_column: str`

**Optional:** | +| **ItemNoMatch()** Example use:
`ItemMatch(with_column="forbidden")`| **Required:**
`with_column: str`

**Optional:** | +| **JSONSchemaMatch()** Example use:
`JSONSchemaMatch(expected_schema={"name": str, "age": int}, exact_match=False, validate_types=True)`| **Required:**
`expected_schema: Dict[str, type]`

**Optional:** | ## Descriptors: Text stats diff --git a/examples/data_generators.py b/examples/data_generators.py new file mode 100644 index 0000000000..1cd8ef87e1 --- /dev/null +++ b/examples/data_generators.py @@ -0,0 +1,66 @@ +from evidently.experimental.dataset_generators.llm.questions import QADatasetFromSeedGenerator, QADatasetGenerator +from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider +from evidently.options.base import Options + + +def generate_from_file(): + file_path = "../cloud_quickstart_tracing.pdf" + data = DataCollectionProvider.from_files(file_path, chunk_size=50, chunk_overlap=20, splitter="simple") + + generator = QADatasetGenerator( + data_collection=data, + provider="openai", + model="gpt-4o-mini", + num_questions=5, + options=Options.from_any_options(None) + ) + generated = generator.generate() + for _, a in generated.iterrows(): + print("Q", a["questions"]) + if "answers" in a: + print("A", a["answers"]) + if "context" in a: + print("C", a["context"]) + print() + + +def main(): + data = DataCollectionProvider.from_chunks(chunks=["I am a banana", "My spoon is too big"]) + generator = QADatasetGenerator( + data_collection=data, + provider="openai", + model="gpt-4o-mini", + num_questions=5, + options=Options.from_any_options(None) + ) + + generated = generator.generate() + for _, a in generated.iterrows(): + print("Q", a["questions"]) + if "answers" in a: + print("A", a["answers"]) + if "context" in a: + print("C", a["context"]) + print() + + generator = QADatasetFromSeedGenerator( + seed_question="What is 'kek'?", + num_questions=5, + provider="openai", + model="gpt-4o-mini", + options=Options.from_any_options(None) + ) + + generated = generator.generate() + for _, a in generated.iterrows(): + print("Q", a["questions"]) + if "answers" in a: + print("A", a["answers"]) + if "context" in a: + print("C", a["context"]) + print() + + +if __name__ == '__main__': + main() + # generate_from_file() diff --git a/examples/how_to_questions/metrics/data_integrity/dataset_rouge_summary_metric.ipynb b/examples/how_to_questions/metrics/data_integrity/dataset_rouge_summary_metric.ipynb new file mode 100644 index 0000000000..f0abc6298c --- /dev/null +++ b/examples/how_to_questions/metrics/data_integrity/dataset_rouge_summary_metric.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evidently Dataset ROUGE Summary Metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from evidently.report import Report\n", + "from evidently.metrics import ROUGESummaryMetric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "current_data = {\n", + " \"summary\": [\"hello there\", \"general kenobi\"],\n", + "}\n", + "\n", + "current_df = pd.DataFrame(current_data)\n", + "\n", + "reference_data = {\n", + " \"summary\": [\"hello there\", \"no de\"]\n", + "}\n", + "\n", + "current_df = pd.DataFrame(current_data)\n", + "reference_df = pd.DataFrame(reference_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "report = Report(metrics=[\n", + " ROUGESummaryMetric(column_name=\"summary\", rouge_n=2)\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "report.run(current_data=current_df, reference_data=reference_df)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "report.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "report.as_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "report.as_dataframe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb b/examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb index 78a65bbee8..9b8be1b638 100644 --- a/examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb +++ b/examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb @@ -116,7 +116,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.13" + "version": "3.8.19" } }, "nbformat": 4, diff --git a/requirements.dev.txt b/requirements.dev.txt index 7f8701e3ca..606cca9714 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -16,6 +16,7 @@ pip-audit pyspark ruff==0.3.7 pre-commit==3.5.0 +evaluate==0.4.1 # service dependencies litestar>=2.7.1 diff --git a/requirements.min.txt b/requirements.min.txt index f858d8e507..cd8ee86f57 100644 --- a/requirements.min.txt +++ b/requirements.min.txt @@ -31,3 +31,5 @@ openai==1.16.2 evaluate==0.4.1 transformers[torch]==4.39.3 sentence-transformers==2.7.0 +rouge-score==0.1.2 +chromadb==0.4.0 diff --git a/setup.cfg b/setup.cfg index 7f9f43d785..231d1f6f6c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -106,6 +106,15 @@ ignore_missing_imports = True [mypy-litellm.*] ignore_missing_imports = True +[mypy-chromadb.*] +ignore_missing_imports = True + +[mypy-llama_index.*] +ignore_missing_imports = True + +[mypy-pypdf.*] +ignore_missing_imports = True + [tool:pytest] testpaths=tests python_classes=*Test diff --git a/setup.py b/setup.py index 46e9cd43aa..329d5869fe 100644 --- a/setup.py +++ b/setup.py @@ -76,6 +76,7 @@ "deprecation>=2.1.0", "uuid6>=2024.7.10", "cryptography>=43.0.1", + "evaluate>=0.4.1", ], extras_require={ "dev": [ @@ -96,12 +97,15 @@ "ruff==0.3.7", "pre-commit==3.5.0", "pytest-asyncio==0.23.7", + "evaluate>=0.4.1", ], "llm": [ "openai>=1.16.2", "evaluate>=0.4.1", "transformers[torch]>=4.39.3", "sentence-transformers>=2.7.0", + "rouge-score>=0.1.2", + "chromadb>=0.4.0", ], "spark": ["pyspark>=3.4.0"], "fsspec": [ diff --git a/src/evidently/descriptors/__init__.py b/src/evidently/descriptors/__init__.py index 2520abdbe3..173a1be486 100644 --- a/src/evidently/descriptors/__init__.py +++ b/src/evidently/descriptors/__init__.py @@ -3,6 +3,7 @@ from .custom_descriptor import CustomPairColumnEval from .hf_descriptor import HuggingFaceModel from .hf_descriptor import HuggingFaceToxicityModel +from .json_schema_match_descriptor import JSONSchemaMatch from .llm_judges import BiasLLMEval from .llm_judges import ContextQualityLLMEval from .llm_judges import DeclineLLMEval @@ -19,6 +20,8 @@ from .sentiment_descriptor import Sentiment from .text_contains_descriptor import Contains from .text_contains_descriptor import DoesNotContain +from .text_contains_descriptor import ItemMatch +from .text_contains_descriptor import ItemNoMatch from .text_length_descriptor import TextLength from .text_part_descriptor import BeginsWith from .text_part_descriptor import EndsWith @@ -47,6 +50,8 @@ "EndsWith", "DoesNotContain", "IncludesWords", + "ItemMatch", + "ItemNoMatch", "ExcludesWords", "TextLength", "TriggerWordsPresence", @@ -55,5 +60,6 @@ "SentenceCount", "Sentiment", "RegExp", + "JSONSchemaMatch", "_registry", ] diff --git a/src/evidently/descriptors/_registry.py b/src/evidently/descriptors/_registry.py index 0f912a86fe..5ac97efc42 100644 --- a/src/evidently/descriptors/_registry.py +++ b/src/evidently/descriptors/_registry.py @@ -15,6 +15,11 @@ "evidently.descriptors.hf_descriptor.HuggingFaceToxicityModel", "evidently:descriptor:HuggingFaceToxicityModel", ) +register_type_alias( + FeatureDescriptor, + "evidently.descriptors.json_schema_match_descriptor.JSONSchemaMatch", + "evidently:descriptor:JSONSchemaMatch", +) register_type_alias( FeatureDescriptor, "evidently.descriptors.llm_judges.BiasLLMEval", "evidently:descriptor:BiasLLMEval" ) @@ -72,6 +77,12 @@ "evidently.descriptors.text_contains_descriptor.DoesNotContain", "evidently:descriptor:DoesNotContain", ) +register_type_alias( + FeatureDescriptor, "evidently.descriptors.text_contains_descriptor.ItemMatch", "evidently:descriptor:ItemMatch" +) +register_type_alias( + FeatureDescriptor, "evidently.descriptors.text_contains_descriptor.ItemNoMatch", "evidently:descriptor:ItemNoMatch" +) register_type_alias( FeatureDescriptor, "evidently.descriptors.text_length_descriptor.TextLength", "evidently:descriptor:TextLength" ) diff --git a/src/evidently/descriptors/json_schema_match_descriptor.py b/src/evidently/descriptors/json_schema_match_descriptor.py new file mode 100644 index 0000000000..f85818370e --- /dev/null +++ b/src/evidently/descriptors/json_schema_match_descriptor.py @@ -0,0 +1,23 @@ +from typing import Dict + +from evidently.features import json_schema_match_feature +from evidently.features.generated_features import FeatureDescriptor +from evidently.features.generated_features import GeneratedFeature + + +class JSONSchemaMatch(FeatureDescriptor): + class Config: + type_alias = "evidently:descriptor:JSONSchemaMatch" + + expected_schema: Dict[str, type] + validate_types: bool = False + exact_match: bool = False + + def feature(self, column_name: str) -> GeneratedFeature: + return json_schema_match_feature.JSONSchemaMatch( + column_name=column_name, + expected_schema=self.expected_schema, + validate_types=self.validate_types, + exact_match=self.exact_match, + display_name=self.display_name, + ) diff --git a/src/evidently/descriptors/text_contains_descriptor.py b/src/evidently/descriptors/text_contains_descriptor.py index 7e069970d5..4795c4f77f 100644 --- a/src/evidently/descriptors/text_contains_descriptor.py +++ b/src/evidently/descriptors/text_contains_descriptor.py @@ -39,3 +39,37 @@ def feature(self, column_name: str) -> GeneratedFeature: self.mode, self.display_name, ) + + +class ItemMatch(FeatureDescriptor): + class Config: + type_alias = "evidently:descriptor:ItemMatch" + + with_column: str + mode: str = "any" + case_sensitive: bool = True + + def feature(self, column_name: str) -> GeneratedFeature: + return text_contains_feature.ItemMatch( + columns=[column_name, self.with_column], + case_sensitive=self.case_sensitive, + mode=self.mode, + display_name=self.display_name, + ) + + +class ItemNoMatch(FeatureDescriptor): + class Config: + type_alias = "evidently:descriptor:ItemNoMatch" + + with_column: str + mode: str = "any" + case_sensitive: bool = True + + def feature(self, column_name: str) -> GeneratedFeature: + return text_contains_feature.ItemNoMatch( + columns=[column_name, self.with_column], + case_sensitive=self.case_sensitive, + mode=self.mode, + display_name=self.display_name, + ) diff --git a/src/evidently/experimental/dataset_generators/__init__.py b/src/evidently/experimental/dataset_generators/__init__.py new file mode 100644 index 0000000000..4bfe1f7c80 --- /dev/null +++ b/src/evidently/experimental/dataset_generators/__init__.py @@ -0,0 +1,3 @@ +from . import _registry + +__all__ = ["_registry"] diff --git a/src/evidently/experimental/dataset_generators/_registry.py b/src/evidently/experimental/dataset_generators/_registry.py new file mode 100644 index 0000000000..74a027ac6a --- /dev/null +++ b/src/evidently/experimental/dataset_generators/_registry.py @@ -0,0 +1,67 @@ +from evidently.experimental.dataset_generators.base import BaseDatasetGenerator +from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider +from evidently.experimental.dataset_generators.llm.splitter import Splitter +from evidently.pydantic_utils import register_type_alias +from evidently.utils.llm.prompts import PromptTemplate + +register_type_alias( + BaseDatasetGenerator, + "evidently.experimental.dataset_generators.llm.questions.QADatasetFromSeedGenerator", + "evidently:dataset_generator:QADatasetFromSeedGenerator", +) +register_type_alias( + BaseDatasetGenerator, + "evidently.experimental.dataset_generators.llm.questions.QADatasetGenerator", + "evidently:dataset_generator:QADatasetGenerator", +) +register_type_alias( + DataCollectionProvider, + "evidently.experimental.dataset_generators.llm.index.ChunksDataCollectionProvider", + "evidently:data_collecton_provider:ChunksDataCollectionProvider", +) +register_type_alias( + DataCollectionProvider, + "evidently.experimental.dataset_generators.llm.index.FileDataCollectionProvider", + "evidently:data_collecton_provider:FileDataCollectionProvider", +) + +register_type_alias( + PromptTemplate, + "evidently.experimental.dataset_generators.llm.prompts.BaselineAnswerPromptTemplate", + "evidently:prompt_template:BaselineAnswerPromptTemplate", +) +register_type_alias( + PromptTemplate, + "evidently.experimental.dataset_generators.llm.prompts.NaiveQuestionsFromContextPromptTemplate", + "evidently:prompt_template:NaiveQuestionsFromContextPromptTemplate", +) +register_type_alias( + PromptTemplate, + "evidently.experimental.dataset_generators.llm.prompts.QuestionsFromContextPromptTemplate", + "evidently:prompt_template:QuestionsFromContextPromptTemplate", +) +register_type_alias( + PromptTemplate, + "evidently.experimental.dataset_generators.llm.prompts.QuestionsFromSeedPromptTemplate", + "evidently:prompt_template:QuestionsFromSeedPromptTemplate", +) +register_type_alias( + PromptTemplate, + "evidently.experimental.dataset_generators.llm.prompts.ReformulateQuestionPromptTemplate", + "evidently:prompt_template:ReformulateQuestionPromptTemplate", +) +register_type_alias( + PromptTemplate, + "evidently.experimental.dataset_generators.llm.prompts.SimpleQuestionPromptTemplate", + "evidently:prompt_template:SimpleQuestionPromptTemplate", +) +register_type_alias( + Splitter, + "evidently.experimental.dataset_generators.llm.splitter.LlamaIndexSplitter", + "evidently:splitter:LlamaIndexSplitter", +) +register_type_alias( + Splitter, + "evidently.experimental.dataset_generators.llm.splitter.SimpleSplitter", + "evidently:splitter:SimpleSplitter", +) diff --git a/src/evidently/experimental/dataset_generators/base.py b/src/evidently/experimental/dataset_generators/base.py new file mode 100644 index 0000000000..0aefc12c8e --- /dev/null +++ b/src/evidently/experimental/dataset_generators/base.py @@ -0,0 +1,21 @@ +from abc import ABC +from abc import abstractmethod + +import pandas as pd +from typing_extensions import TypeAlias + +from evidently.options.base import Options +from evidently.pydantic_utils import EvidentlyBaseModel + +DatasetGeneratorResult: TypeAlias = pd.DataFrame + + +class BaseDatasetGenerator(EvidentlyBaseModel, ABC): + class Config: + is_base_type = True + + options: Options + + @abstractmethod + def generate(self) -> DatasetGeneratorResult: + raise NotImplementedError diff --git a/src/evidently/experimental/dataset_generators/llm/__init__.py b/src/evidently/experimental/dataset_generators/llm/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/evidently/experimental/dataset_generators/llm/base.py b/src/evidently/experimental/dataset_generators/llm/base.py new file mode 100644 index 0000000000..9710610657 --- /dev/null +++ b/src/evidently/experimental/dataset_generators/llm/base.py @@ -0,0 +1,22 @@ +from typing import Optional + +from evidently._pydantic_compat import PrivateAttr +from evidently.experimental.dataset_generators.base import BaseDatasetGenerator +from evidently.options.base import Options +from evidently.utils.llm.wrapper import LLMWrapper +from evidently.utils.llm.wrapper import get_llm_wrapper + + +class BaseLLMDatasetGenerator(BaseDatasetGenerator): + provider: str + model: str + _llm_wrapper: Optional[LLMWrapper] = PrivateAttr(None) + + def get_llm_wrapper(self, options: Options) -> LLMWrapper: + if self._llm_wrapper is None: + self._llm_wrapper = get_llm_wrapper(self.provider, self.model, options) + return self._llm_wrapper + + @property + def wrapper(self): + return self.get_llm_wrapper(self.options) diff --git a/src/evidently/experimental/dataset_generators/llm/index.py b/src/evidently/experimental/dataset_generators/llm/index.py new file mode 100644 index 0000000000..1b5d2c2bf5 --- /dev/null +++ b/src/evidently/experimental/dataset_generators/llm/index.py @@ -0,0 +1,149 @@ +import abc +import glob +import os +from pathlib import Path +from typing import List +from typing import Optional + +import chromadb +from chromadb.types import Collection +from chromadb.utils import embedding_functions + +from evidently.experimental.dataset_generators.llm.splitter import AnySplitter +from evidently.experimental.dataset_generators.llm.splitter import Splitter +from evidently.pydantic_utils import EvidentlyBaseModel + +Chunk = str +DEFAULT_CHUNK_SIZE = 512 +DEFAULT_CHUNK_OVERLAP = 20 + + +def read_text(filename: str) -> str: + file_path = Path(filename) + if file_path.suffix.lower() == ".pdf": + try: + from pypdf import PdfReader + except ImportError as e: + raise ImportError("Please install pypdf to extract context from .pdf files") from e + reader = PdfReader(file_path) + text = "" + for page_num in range(len(reader.pages)): + page = reader.pages[page_num] + text += page.extract_text() + return text + else: + return Path(filename).read_text() + + +class DataCollectionProvider(EvidentlyBaseModel, abc.ABC): + class Config: + is_base_type = True + + chunk_size: int = DEFAULT_CHUNK_SIZE + chunk_overlap: int = DEFAULT_CHUNK_OVERLAP + splitter: AnySplitter = "llama_index" + + @abc.abstractmethod + def get_data_collection(self) -> "DataCollection": + raise NotImplementedError + + @classmethod + def from_files( + cls, + path: str, + chunk_size: int = DEFAULT_CHUNK_SIZE, + chunk_overlap: int = DEFAULT_CHUNK_OVERLAP, + splitter: AnySplitter = "llama_index", + ) -> "DataCollectionProvider": + return FileDataCollectionProvider( + path=path, chunk_size=chunk_size, chunk_overlap=chunk_overlap, splitter=splitter + ) + + @classmethod + def from_chunks(cls, chunks: List[str]): + return ChunksDataCollectionProvider(chunks=chunks) + + +class ChunksDataCollectionProvider(DataCollectionProvider): + class Config: + type_alias = "evidently:data_collecton_provider:ChunksDataCollectionProvider" + + chunks: List[Chunk] + + def get_data_collection(self): + dc = DataCollection(name="chunks", chunks=self.chunks) + dc.init_collection() + return dc + + +class FileDataCollectionProvider(DataCollectionProvider): + class Config: + type_alias = "evidently:data_collecton_provider:FileDataCollectionProvider" + + path: str + + def get_data_collection(self): + file_path = Path(self.path) + paths = [self.path] if file_path.is_file() else glob.glob(os.path.join(self.path, "*")) + + splitter = Splitter.from_any(self.splitter, self.chunk_size, self.chunk_overlap) + chunks = list(splitter.split([read_text(p) for p in paths])) + + data_collection = DataCollection(name=file_path.name, chunks=chunks) + data_collection.init_collection() + return data_collection + + +class DataCollection: + name: str + chunks: List[Chunk] + collection: Optional[Collection] = None + + def __init__(self, name: str, chunks: List[str], collection: Optional["Collection"] = None): + self.name = name + self.chunks = chunks + self.collection = collection + + def init_collection(self): + if self.collection is None: + # fixme: huggingface/tokenizers warns about clean_up_tokenization_spaces + import warnings + + os.environ["TOKENIZERS_PARALLELISM"] = "false" + warnings.filterwarnings("ignore", category=FutureWarning) + + default_embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction( + model_name="all-MiniLM-L6-v2", + ) + chroma_client = chromadb.Client() + collection = chroma_client.get_or_create_collection( + name=self.name, + embedding_function=default_embedding_function, + ) + for i, chunk in enumerate(self.chunks): + collection.upsert( + ids=str(i), + documents=chunk, + ) + self.collection = collection + + def find_relevant_chunks(self, question: str, n_results: int = 3) -> List[Chunk]: + """ + Queries the collection with a given question and returns the relevant text chunks. + + Args: + question (str): The query or question text to search for. + n_results (int): Number of results to retrieve. Default is 3. + + Returns: + List[Chunk]: A list of relevant text chunks. + """ + if self.collection is None: + raise ValueError("Collection is not initialized") + results = self.collection.query( + query_texts=question, + n_results=min(n_results, len(self.chunks)), + ) + + relevant_chunks = [chunk for document in results["documents"] for chunk in document] + return relevant_chunks diff --git a/src/evidently/experimental/dataset_generators/llm/prompts.py b/src/evidently/experimental/dataset_generators/llm/prompts.py new file mode 100644 index 0000000000..bb38038f57 --- /dev/null +++ b/src/evidently/experimental/dataset_generators/llm/prompts.py @@ -0,0 +1,95 @@ +from typing import ClassVar +from typing import List + +from evidently.utils.llm.prompts import BlockPromptTemplate +from evidently.utils.llm.prompts import PromptBlock +from evidently.utils.llm.prompts import WithSystemPrompt +from evidently.utils.llm.prompts import llm_call + + +class SimpleQuestionPromptTemplate(BlockPromptTemplate): + class Config: + type_alias = "evidently:prompt_template:SimpleQuestionPromptTemplate" + + blocks: ClassVar = [ + "Please generate a {question_type} question about this:", + PromptBlock.input("context").anchored(), + PromptBlock.json_output(question="question text", answer="answer text"), + ] + question_type: str = "simple" + + +class QuestionsFromSeedPromptTemplate(BlockPromptTemplate): + class Config: + type_alias = "evidently:prompt_template:QuestionsFromSeedPromptTemplate" + + blocks: ClassVar = [ + """Write for me {number} alternative questions quite similar to the question you got. + The question: """, + PromptBlock.input("seed_question").anchored(), + PromptBlock.string_list_output("questions"), + ] + + @llm_call + def generate(self, seed_question: str, number: int) -> List[str]: ... + + +class QuestionsFromContextPromptTemplate(WithSystemPrompt, BlockPromptTemplate): + class Config: + type_alias = "evidently:prompt_template:QuestionsFromContextPromptTemplate" + + system_prompt: str = "You are an assistant who generates questions based on provided context" + + @llm_call + def generate_questions(self, context: str, number: int) -> List[str]: ... + + +class NaiveQuestionsFromContextPromptTemplate(QuestionsFromContextPromptTemplate): + class Config: + type_alias = "evidently:prompt_template:NaiveQuestionsFromContextPromptTemplate" + + blocks: ClassVar = [ + "Generate {number} conceptual questions based on the provided context and " + "can be answered from the information in the provided context.\n" + "Here is a context", + PromptBlock.input("context").anchored(), + "Remain faithful to the above context.\n" + "Avoid providing any preamble!\n" + "Avoid providing any closing statement!", + PromptBlock.string_list_output("questions"), + ] + + +class ReformulateQuestionPromptTemplate(QuestionsFromContextPromptTemplate): + class Config: + type_alias = "evidently:prompt_template:ReformulateQuestionPromptTemplate" + + blocks: ClassVar = [ + """Write for me {number} alternative questions quite similar to the question you got. +The question:""", + PromptBlock.input("context").anchored(), + PromptBlock.string_list_output("questions"), + ] + number: int + system_prompt: str = "You are a smart assistant who helps repharase questions" + + +class BaselineAnswerPromptTemplate(WithSystemPrompt, BlockPromptTemplate): + class Config: + type_alias = "evidently:prompt_template:BaselineAnswerPromptTemplate" + + blocks: ClassVar = [ + "Your task is to answer the following query:", + PromptBlock.input("question").anchored(), + "You have access to the following documents which are meant to provide context as you answer the query:", + PromptBlock.input("context").anchored(), + """Please remain faithful to the underlying context, +and deviate from it only if you haven't found the answer in the provided context. +Avoid providing any preamble! +Avoid providing any closing statement!""", + PromptBlock.string_output("answer"), + ] + system_prompt: str = "You are a helpful assistant that answer a given question directly without any preamble" + + @llm_call + def generate_answers(self, question: str, context: str): ... diff --git a/src/evidently/experimental/dataset_generators/llm/questions.py b/src/evidently/experimental/dataset_generators/llm/questions.py new file mode 100644 index 0000000000..263d7f5fd7 --- /dev/null +++ b/src/evidently/experimental/dataset_generators/llm/questions.py @@ -0,0 +1,75 @@ +import random +from typing import List +from typing import Sequence +from typing import Tuple + +import pandas as pd + +from evidently.experimental.dataset_generators.base import DatasetGeneratorResult +from evidently.experimental.dataset_generators.llm.base import BaseLLMDatasetGenerator +from evidently.experimental.dataset_generators.llm.index import Chunk +from evidently.experimental.dataset_generators.llm.index import DataCollection +from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider +from evidently.experimental.dataset_generators.llm.prompts import BaselineAnswerPromptTemplate +from evidently.experimental.dataset_generators.llm.prompts import NaiveQuestionsFromContextPromptTemplate +from evidently.experimental.dataset_generators.llm.prompts import QuestionsFromContextPromptTemplate +from evidently.experimental.dataset_generators.llm.prompts import QuestionsFromSeedPromptTemplate + +Question = str +Answer = str +GeneratedQuestion = Tuple[Question, Answer, Chunk] +ChunkSet = List[Chunk] + + +class QADatasetGenerator(BaseLLMDatasetGenerator): + class Config: + type_alias = "evidently:dataset_generator:QADatasetGenerator" + + data_collection: DataCollectionProvider + num_questions: int + questions: QuestionsFromContextPromptTemplate = NaiveQuestionsFromContextPromptTemplate() + answers: BaselineAnswerPromptTemplate = BaselineAnswerPromptTemplate() + + def generate(self) -> DatasetGeneratorResult: + documents = self.data_collection.get_data_collection() + chunk_set_count, chunks_in_set_count, questions_per_chunkset = self.get_chunks_and_question_count() + chunk_sets = self.generate_chunksets(documents, chunk_set_count, chunks_in_set_count) + questions: List[Question] = self.generate_questions(chunk_sets, questions_per_chunkset) + relevant_chunks = [documents.find_relevant_chunks(q) for q in questions] + answers = self.generate_answers(questions, relevant_chunks) + return pd.DataFrame({"questions": questions, "answers": answers, "context": relevant_chunks}) + + def get_chunks_and_question_count(self) -> Tuple[int, int, int]: + return 1, 1, self.num_questions + + def generate_chunksets(self, documents: DataCollection, count: int, chunks_per_set: int) -> List[ChunkSet]: + return [[random.choice(documents.chunks) for _ in range(chunks_per_set)] for _ in range(count)] + + def generate_questions(self, chunk_sets: Sequence[List[Chunk]], questions_per_chunkset: int) -> List[Question]: + questions = self.wrapper.run_batch_sync( + self.questions.generate_questions(context="\n\n".join(chunks), number=questions_per_chunkset) + for chunks in chunk_sets + ) + return [q for qs in questions for q in qs] + + def generate_answers(self, questions: List[Question], relevant_chunks: List[List[Chunk]]) -> List[str]: + return self.wrapper.run_batch_sync( + self.answers.generate_answers(question=question, context="\n".join(chunks)) + for question, chunks in zip(questions, relevant_chunks) + ) + + +class QADatasetFromSeedGenerator(BaseLLMDatasetGenerator): + class Config: + type_alias = "evidently:dataset_generator:QADatasetFromSeedGenerator" + + seed_question: str + num_questions: int + prompt: QuestionsFromSeedPromptTemplate = QuestionsFromSeedPromptTemplate() + + def generate(self) -> DatasetGeneratorResult: + response = self.wrapper.run_sync( + self.prompt.generate(number=self.num_questions, seed_question=self.seed_question) + ) + + return pd.DataFrame({"questions": response}) diff --git a/src/evidently/experimental/dataset_generators/llm/splitter.py b/src/evidently/experimental/dataset_generators/llm/splitter.py new file mode 100644 index 0000000000..e4b775eb29 --- /dev/null +++ b/src/evidently/experimental/dataset_generators/llm/splitter.py @@ -0,0 +1,130 @@ +import re +from abc import ABC +from abc import abstractmethod +from enum import Enum +from typing import ClassVar +from typing import List +from typing import Optional +from typing import Sequence +from typing import Union + +from evidently._pydantic_compat import PrivateAttr +from evidently.pydantic_utils import EvidentlyBaseModel + + +class TextSource: + @classmethod + def from_any(cls, text_source: "AnyTextSource"): + if isinstance(text_source, TextSource): + return text_source + if isinstance(text_source, str): + return StrSource(text_source) + raise NotImplementedError(f"Cannot create TextSource from {text_source.__class__.__name__}") + + @abstractmethod + def get_text(self) -> str: + raise NotImplementedError + + +class StrSource(TextSource): + def __init__(self, value: str): + self.value = value + + def get_text(self) -> str: + return self.value + + +AnyTextSource = Union[str, bytes, TextSource] + +Chunk = str +Split = str + + +class Splitters(str, Enum): + Simple = "simple" + LlamaIndex = "llama_index" + + +AnySplitter = Union[str, Splitters, "Splitter"] + + +class Splitter(EvidentlyBaseModel, ABC): + class Config: + is_base_type = True + + chunk_size: int + chunk_overlap: int + + def split(self, texts: Union[AnyTextSource, List[AnyTextSource]]) -> Sequence[Chunk]: + if not isinstance(texts, list): + texts = [texts] + + for text in texts: + yield from self.split_text(TextSource.from_any(text)) + + @abstractmethod + def split_text(self, text: TextSource) -> Sequence[Chunk]: + raise NotImplementedError + + @classmethod + def from_any(cls, splitter: AnySplitter, chunk_size: int, chunk_overlap: int, **kwargs): + if isinstance(splitter, Splitter): + return splitter + if isinstance(splitter, str): + splitter = Splitters(splitter) + if isinstance(splitter, Splitters): + if splitter == Splitters.Simple: + return SimpleSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) + if splitter == Splitters.LlamaIndex: + return LlamaIndexSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs) + raise ValueError(f"Unknown splitter {splitter}") + raise NotImplementedError(f"Cannot create splitter from {splitter.__class__.__name__}") + + +class SimpleSplitter(Splitter): + class Config: + type_alias = "evidently:splitter:SimpleSplitter" + + split_re: ClassVar = re.compile(r"([^,.;。?!]+[,.;。?!]?)") + + def split_text(self, text: TextSource) -> Sequence[Chunk]: + current_splits: List[str] = [] + current_size = 0 + for split in self.split_re.split(text.get_text()): + split_size = len(split) + if len(current_splits) > 0 and current_size + split_size > self.chunk_size: + yield "".join(current_splits) + while current_size > self.chunk_overlap and len(current_splits) > 0: + last, *current_splits = current_splits + last_size = len(last) + current_size -= last_size + current_size += split_size + current_splits.append(split) + if current_size > 0: + yield "".join(current_splits) + + +class LlamaIndexSplitter(Splitter): + class Config: + type_alias = "evidently:splitter:LlamaIndexSplitter" + + separator: str = " " + paragraph_separator: Optional[str] = None + _splitter = PrivateAttr(None) + + @property + def splitter(self): + if self._splitter is None: + from llama_index.core.node_parser import SentenceSplitter + from llama_index.core.node_parser.text.sentence import DEFAULT_PARAGRAPH_SEP + + self._splitter = SentenceSplitter( + chunk_size=self.chunk_size, + chunk_overlap=self.chunk_overlap, + separator=self.separator, + paragraph_separator=self.paragraph_separator or DEFAULT_PARAGRAPH_SEP, + ) + return self._splitter + + def split_text(self, text: TextSource) -> Sequence[Chunk]: + yield from self.splitter.split_text(text.get_text()) diff --git a/src/evidently/features/_registry.py b/src/evidently/features/_registry.py index ba2e101f5f..b3f579a981 100644 --- a/src/evidently/features/_registry.py +++ b/src/evidently/features/_registry.py @@ -27,6 +27,11 @@ "evidently.features.hf_feature.HuggingFaceToxicityFeature", "evidently:feature:HuggingFaceToxicityFeature", ) +register_type_alias( + GeneratedFeatures, + "evidently.features.json_schema_match_feature.JSONSchemaMatch", + "evidently:feature:JSONSchemaMatch", +) register_type_alias(GeneratedFeatures, "evidently.features.llm_judge.LLMJudge", "evidently:feature:LLMJudge") register_type_alias( GeneratedFeatures, @@ -52,6 +57,12 @@ register_type_alias( GeneratedFeatures, "evidently.features.text_contains_feature.DoesNotContain", "evidently:feature:DoesNotContain" ) +register_type_alias( + GeneratedFeatures, "evidently.features.text_contains_feature.ItemMatch", "evidently:feature:ItemMatch" +) +register_type_alias( + GeneratedFeatures, "evidently.features.text_contains_feature.ItemNoMatch", "evidently:feature:ItemNoMatch" +) register_type_alias( GeneratedFeatures, "evidently.features.text_length_feature.TextLength", "evidently:feature:TextLength" ) diff --git a/src/evidently/features/json_schema_match_feature.py b/src/evidently/features/json_schema_match_feature.py new file mode 100644 index 0000000000..e81e8ba01b --- /dev/null +++ b/src/evidently/features/json_schema_match_feature.py @@ -0,0 +1,76 @@ +import json +from typing import ClassVar +from typing import Dict +from typing import Optional + +import pandas as pd + +from evidently.base_metric import ColumnName +from evidently.core import ColumnType +from evidently.features.generated_features import GeneratedFeature +from evidently.utils.data_preprocessing import DataDefinition + + +class JSONSchemaMatch(GeneratedFeature): + class Config: + type_alias = "evidently:feature:JSONSchemaMatch" + + __feature_type__: ClassVar = ColumnType.Categorical + column_name: str + expected_schema: Dict[str, type] + validate_types: bool + exact_match: bool + + def __init__( + self, + column_name: str, + expected_schema: Dict[str, type], + validate_types: bool = False, + exact_match: bool = False, + display_name: Optional[str] = None, + ): + self.column_name = column_name + self.validate_types = validate_types if not exact_match else True + self.expected_schema = expected_schema + self.exact_match = exact_match + self.display_name = display_name + super().__init__() + + def _feature_column_name(self) -> str: + match_type = "exact" if self.exact_match else "minimal" + return f"{self.column_name}_json_schema_{match_type}_match" + + def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame: + calculated = data.apply(lambda row: self.match_json_schema(row[self.column_name]), axis=1) + return pd.DataFrame({self._feature_column_name(): calculated}) + + def match_json_schema(self, json_text: str) -> bool: + try: + json_obj = json.loads(json_text) + except json.JSONDecodeError: + return False + + if self.exact_match: + return self._exact_match(json_obj) + else: + return self._minimal_match(json_obj) + + def _minimal_match(self, json_obj: Dict) -> bool: + for key, expected_type in self.expected_schema.items(): + if key not in json_obj or json_obj[key] is None: + return False + if self.validate_types and expected_type and not isinstance(json_obj[key], expected_type): + return False + return True + + def _exact_match(self, json_obj: Dict) -> bool: + if set(json_obj.keys()) != set(self.expected_schema.keys()): + return False + return self._minimal_match(json_obj) + + def _as_column(self) -> ColumnName: + match_type = "exact" if self.exact_match else "minimal" + return self._create_column( + self._feature_column_name(), + default_display_name=f"JSONSchemaMatch {match_type} match", + ) diff --git a/src/evidently/features/text_contains_feature.py b/src/evidently/features/text_contains_feature.py index 31bd3a0975..6b909b95c0 100644 --- a/src/evidently/features/text_contains_feature.py +++ b/src/evidently/features/text_contains_feature.py @@ -112,3 +112,109 @@ def comparison(self, item: str, string: str): if self.case_sensitive: return item in string return item.casefold() in string.casefold() + + +class ItemMatch(GeneratedFeature): + class Config: + type_alias = "evidently:feature:ItemMatch" + + __feature_type__: ClassVar = ColumnType.Categorical + columns: List[str] + case_sensitive: bool + mode: str + + def __init__( + self, + columns: List[str], + case_sensitive: bool = True, + mode: str = "any", + display_name: Optional[str] = None, + ): + if len(columns) != 2: + raise ValueError("two columns must be provided") + self.columns = columns + self.display_name = display_name + self.case_sensitive = case_sensitive + if mode not in ["any", "all"]: + raise ValueError("mode must be either 'any' or 'all'") + self.mode = mode + super().__init__() + + def _feature_column_name(self) -> str: + return f"{self.columns[0]}_{self.columns[1]}" + "_item_match_" + str(self.case_sensitive) + "_" + self.mode + + def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame: + if self.mode == "any": + calculated = data.apply( + lambda row: any(self.comparison(word, row[self.columns[0]]) for word in row[self.columns[1]]), + axis=1, + ) + else: + calculated = data.apply( + lambda row: all(self.comparison(word, row[self.columns[0]]) for word in row[self.columns[1]]), + axis=1, + ) + return pd.DataFrame({self._feature_column_name(): calculated}) + + def _as_column(self) -> ColumnName: + return self._create_column( + self._feature_column_name(), + default_display_name=f"Text contains {self.mode} of defined items", + ) + + def comparison(self, item: str, string: str): + if self.case_sensitive: + return item in string + return item.casefold() in string.casefold() + + +class ItemNoMatch(GeneratedFeature): + class Config: + type_alias = "evidently:feature:ItemNoMatch" + + __feature_type__: ClassVar = ColumnType.Categorical + columns: List[str] + case_sensitive: bool + mode: str + + def __init__( + self, + columns: List[str], + case_sensitive: bool = True, + mode: str = "any", + display_name: Optional[str] = None, + ): + self.columns = columns + self.display_name = display_name + self.case_sensitive = case_sensitive + if mode not in ["any", "all"]: + raise ValueError("mode must be either 'any' or 'all'") + self.mode = mode + super().__init__() + + def _feature_column_name(self) -> str: + return f"{self.columns[0]}_{self.columns[1]}" + "_item_no_match_" + str(self.case_sensitive) + "_" + self.mode + + def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame: + if self.mode == "any": + calculated = data.apply( + lambda row: not any(self.comparison(word, row[self.columns[0]]) for word in row[self.columns[1]]), + axis=1, + ) + else: + calculated = data.apply( + lambda row: not all(self.comparison(word, row[self.columns[0]]) for word in row[self.columns[1]]), + axis=1, + ) + return pd.DataFrame({self._feature_column_name(): calculated}) + + def _as_column(self) -> ColumnName: + return self._create_column( + self._feature_column_name(), + default_display_name=f"Text does not contain {self.mode} of defined items", + ) + + def comparison(self, item: str, string: str): + if self.case_sensitive: + return item in string + return item.casefold() in string.casefold() diff --git a/src/evidently/metrics/__init__.py b/src/evidently/metrics/__init__.py index c88a28babf..773b77626c 100644 --- a/src/evidently/metrics/__init__.py +++ b/src/evidently/metrics/__init__.py @@ -32,6 +32,7 @@ from .data_integrity.column_summary_metric import ColumnSummaryMetric from .data_integrity.dataset_missing_values_metric import DatasetMissingValuesMetric from .data_integrity.dataset_summary_metric import DatasetSummaryMetric +from .data_integrity.rouge_summary_metric import ROUGESummaryMetric from .data_quality.column_category_metric import ColumnCategoryMetric from .data_quality.column_correlations_metric import ColumnCorrelationsMetric from .data_quality.column_distribution_metric import ColumnDistributionMetric @@ -99,6 +100,7 @@ "ColumnSummaryMetric", "DatasetMissingValuesMetric", "DatasetSummaryMetric", + "ROUGESummaryMetric", "ColumnCategoryMetric", "ColumnCorrelationsMetric", "ColumnDistributionMetric", diff --git a/src/evidently/metrics/_registry.py b/src/evidently/metrics/_registry.py index 1ed0ce8345..26f6e58a8a 100644 --- a/src/evidently/metrics/_registry.py +++ b/src/evidently/metrics/_registry.py @@ -138,6 +138,13 @@ "evidently.metrics.data_integrity.dataset_summary_metric.DatasetSummaryMetric", "evidently:metric:DatasetSummaryMetric", ) + +register_type_alias( + Metric, + "evidently.metrics.data_integrity.rouge_summary_metric.ROUGESummaryMetric", + "evidently:metric:ROUGESummaryMetric", +) + register_type_alias( Metric, "evidently.metrics.data_quality.column_category_metric.ColumnCategoryMetric", @@ -570,6 +577,11 @@ "evidently.metrics.data_integrity.dataset_summary_metric.DatasetSummaryMetricResult", "evidently:metric_result:DatasetSummaryMetricResult", ) +register_type_alias( + MetricResult, + "evidently.metrics.data_integrity.rouge_summary_metric.ROUGESummaryMetricResult", + "evidently:metric_result:ROUGESummaryMetricResult", +) register_type_alias( MetricResult, "evidently.metrics.data_quality.column_category_metric.CategoryStat", diff --git a/src/evidently/metrics/data_integrity/rouge_summary_metric.py b/src/evidently/metrics/data_integrity/rouge_summary_metric.py new file mode 100644 index 0000000000..c9c53aeb2b --- /dev/null +++ b/src/evidently/metrics/data_integrity/rouge_summary_metric.py @@ -0,0 +1,103 @@ +from typing import List + +import evaluate +import pandas as pd + +from evidently.base_metric import InputData +from evidently.base_metric import Metric +from evidently.base_metric import MetricResult +from evidently.core import IncludeTags +from evidently.model.widget import BaseWidgetInfo +from evidently.options.base import AnyOptions +from evidently.renderers.base_renderer import MetricRenderer +from evidently.renderers.base_renderer import default_renderer +from evidently.renderers.html_widgets import header_text +from evidently.renderers.html_widgets import table_data +from evidently.renderers.html_widgets import text_widget + + +class ROUGESummaryMetricResult(MetricResult): + class Config: + type_alias = "evidently:metric_result:ROUGESummaryMetricResult" + field_tags = { + "current": {IncludeTags.Current}, + "reference": {IncludeTags.Reference}, + "rouge_type": {IncludeTags.Parameter}, + "per_row_scores": {IncludeTags.Parameter}, + "summary_score": {IncludeTags.Parameter}, + } + + current: list + reference: list + rouge_type: str + per_row_scores: list + summary_score: float + + +class ROUGESummaryMetric(Metric[ROUGESummaryMetricResult]): + class Config: + type_alias = "evidently:metric:ROUGESummaryMetric" + arbitrary_types_allowed = True + + column_name: str + rouge_n: int + + def __init__(self, column_name: str, rouge_n: int, options: AnyOptions = None): + self.column_name = column_name + self.rouge_n = rouge_n + super().__init__(options=options) + + def _calculate_summary_rouge(self, current: pd.Series, reference: pd.Series): + rouge_evaluator = evaluate.load("rouge") + + current = current.astype(str).tolist() + reference = reference.astype(str).tolist() + + rouge_scores = rouge_evaluator.compute( + rouge_types=[f"rouge{self.rouge_n}"], predictions=current, references=reference, use_aggregator=False + ) + + per_row_rouge_scores = rouge_scores[f"rouge{self.rouge_n}"] + + summary_rouge_score = sum(per_row_rouge_scores) / len(per_row_rouge_scores) + + return per_row_rouge_scores, summary_rouge_score, current, reference + + def calculate(self, data: InputData) -> ROUGESummaryMetricResult: + if data.current_data is None or data.reference_data is None: + raise ValueError("The current data or the reference data is None.") + if len(data.current_data[self.column_name]) == 0 or len(data.reference_data[self.column_name]) == 0: + raise ValueError("The current data or the reference data is empty.") + + per_row_rouge_scores, summary_rouge_score, current, reference = self._calculate_summary_rouge( + data.current_data[self.column_name], data.reference_data[self.column_name] + ) + + result = ROUGESummaryMetricResult( + rouge_type=f"ROUGE-{self.rouge_n}", + per_row_scores=per_row_rouge_scores, + summary_score=summary_rouge_score, + current=current, + reference=reference, + ) + return result + + +@default_renderer(wrap_type=ROUGESummaryMetric) +class ROUGESummaryMetricRenderer(MetricRenderer): + @staticmethod + def _get_table(metric) -> BaseWidgetInfo: + column_names = ["Metric", "current", "reference", "score"] + rows = [] + for i in range(len(metric.current)): + rows.append([metric.rouge_type, metric.current[i], metric.reference[i], metric.per_row_scores[i]]) + # rows.append(["metric.rouge_type", 1, "metric.current[i]", "metric.reference[i]", 2.4]) + return table_data(title="", column_names=column_names, data=rows) + + def render_html(self, obj: ROUGESummaryMetric) -> List[BaseWidgetInfo]: + metric = obj.get_result() + return [ + header_text(label="ROUGE Metric"), + self._get_table(metric), + text_widget(text=f"{metric.summary_score}", title="Overall ROUGE score"), + ] diff --git a/src/evidently/suite/base_suite.py b/src/evidently/suite/base_suite.py index 3022c3cdac..2e109afeaf 100644 --- a/src/evidently/suite/base_suite.py +++ b/src/evidently/suite/base_suite.py @@ -500,6 +500,7 @@ def __iter__(self) -> Iterator[Tuple[str, str, DatasetID]]: class SnapshotLinks(BaseModel): datasets: DatasetInputOutputLinks = DatasetInputOutputLinks() computation_config_id: Optional[ComputationConfigID] = None + task_id: Optional[str] = None class Snapshot(BaseModel): diff --git a/src/evidently/ui/config.py b/src/evidently/ui/config.py index 52bdc36adb..53a35f00a0 100644 --- a/src/evidently/ui/config.py +++ b/src/evidently/ui/config.py @@ -117,8 +117,13 @@ def load_config(config_type: Type[TConfig], box: dict) -> TConfig: continue if section in ("renamed_vars", "dict_itemiterator"): continue - if section in config_type.__fields__: - component = parse_obj_as(config_type.__fields__[section].type_, component_dict) + if section == "additional_components": + for subsection, compoennt_subdict in component_dict.items(): + component = parse_obj_as(SECTION_COMPONENT_TYPE_MAPPING.get(subsection, Component), compoennt_subdict) + components[subsection] = component + elif section in config_type.__fields__: + type_ = config_type.__fields__[section].type_ + component = parse_obj_as(type_, component_dict) named_components[section] = component elif section in SECTION_COMPONENT_TYPE_MAPPING: component = parse_obj_as(SECTION_COMPONENT_TYPE_MAPPING[section], component_dict) diff --git a/src/evidently/utils/llm/__init__.py b/src/evidently/utils/llm/__init__.py new file mode 100644 index 0000000000..4bfe1f7c80 --- /dev/null +++ b/src/evidently/utils/llm/__init__.py @@ -0,0 +1,3 @@ +from . import _registry + +__all__ = ["_registry"] diff --git a/src/evidently/utils/llm/_registry.py b/src/evidently/utils/llm/_registry.py new file mode 100644 index 0000000000..63f06a4ade --- /dev/null +++ b/src/evidently/utils/llm/_registry.py @@ -0,0 +1,21 @@ +from evidently.pydantic_utils import register_type_alias +from evidently.utils.llm.prompts import PromptBlock +from evidently.utils.llm.prompts import PromptTemplate + +register_type_alias(PromptBlock, "evidently.utils.llm.prompts.Anchor", "evidently:prompt_block:Anchor") +register_type_alias( + PromptBlock, "evidently.utils.llm.prompts.JsonOutputFormatBlock", "evidently:prompt_block:JsonOutputFormatBlock" +) +register_type_alias( + PromptBlock, "evidently.utils.llm.prompts.NoopOutputFormat", "evidently:prompt_block:NoopOutputFormat" +) +register_type_alias(PromptBlock, "evidently.utils.llm.prompts.SimpleBlock", "evidently:prompt_block:SimpleBlock") +register_type_alias( + PromptBlock, "evidently.utils.llm.prompts.StringFormatBlock", "evidently:prompt_block:StringFormatBlock" +) +register_type_alias( + PromptBlock, "evidently.utils.llm.prompts.StringListFormatBlock", "evidently:prompt_block:StringListFormatBlock" +) +register_type_alias( + PromptTemplate, "evidently.utils.llm.prompts.BlockPromptTemplate", "evidently:prompt_template:BlockPromptTemplate" +) diff --git a/src/evidently/utils/llm/base.py b/src/evidently/utils/llm/base.py new file mode 100644 index 0000000000..2abf77b571 --- /dev/null +++ b/src/evidently/utils/llm/base.py @@ -0,0 +1,20 @@ +import dataclasses +from typing import Any +from typing import Dict + + +@dataclasses.dataclass +class LLMMessage: + role: str + content: str + + @classmethod + def user(cls, message: str): + return LLMMessage("user", message) + + @classmethod + def system(cls, message: str): + return LLMMessage("system", message) + + +LLMResponse = Dict[str, Any] diff --git a/src/evidently/utils/llm/errors.py b/src/evidently/utils/llm/errors.py new file mode 100644 index 0000000000..606fb62542 --- /dev/null +++ b/src/evidently/utils/llm/errors.py @@ -0,0 +1,13 @@ +from evidently.errors import EvidentlyError + + +class EvidentlyLLMError(EvidentlyError): + pass + + +class LLMResponseParseError(EvidentlyLLMError): + pass + + +class LLMRequestError(EvidentlyLLMError): + pass diff --git a/src/evidently/utils/llm/prompts.py b/src/evidently/utils/llm/prompts.py new file mode 100644 index 0000000000..bc0eed4749 --- /dev/null +++ b/src/evidently/utils/llm/prompts.py @@ -0,0 +1,275 @@ +import inspect +import json +import re +from abc import ABC +from abc import abstractmethod +from functools import wraps +from typing import Any +from typing import Callable +from typing import ClassVar +from typing import Dict +from typing import Generic +from typing import Iterator +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import Type +from typing import TypeVar +from typing import Union + +import typing_inspect + +from evidently.pydantic_utils import EvidentlyBaseModel +from evidently.utils.llm.base import LLMMessage +from evidently.utils.llm.errors import LLMResponseParseError +from evidently.utils.llm.wrapper import LLMRequest + +TResult = TypeVar("TResult") + + +class PromptBlock(EvidentlyBaseModel): + class Config: + is_base_type = True + + def render(self): + # ))) + result = self._render() + for field in self.__fields__: + placeholder = f"{{{field}}}" + if placeholder in result: + result = result.replace(placeholder, getattr(self, field)) + return result + + @abstractmethod + def _render(self) -> str: + raise NotImplementedError + + @classmethod + def simple(cls, value: str): + return SimpleBlock(value=value) + + @classmethod + def input(cls, placeholder_name: str = "input"): + return SimpleBlock(value=f"{{{placeholder_name}}}") + + @classmethod + def json_output(cls, **fields: Union[str, Tuple[str, str]]): + return JsonOutputFormatBlock(fields=fields) + + @classmethod + def string_list_output(cls, of_what: str): + return StringListFormatBlock(of_what=of_what) + + @classmethod + def string_output(cls, what: str): + return StringFormatBlock(what=what) + + def anchored(self, start: str = "__start__", end: str = "__end__"): + return Anchor(start=start, block=self, end=end) + + +class Anchor(PromptBlock): + class Config: + type_alias = "evidently:prompt_block:Anchor" + + start: str + block: PromptBlock + end: str + + def _render(self) -> str: + return f"{self.start}\n{self.block.render()}\n{self.end}" + + +class SimpleBlock(PromptBlock): + class Config: + type_alias = "evidently:prompt_block:SimpleBlock" + + value: str + + def _render(self) -> str: + return self.value + + +class OutputFormatBlock(PromptBlock, ABC, Generic[TResult]): + @abstractmethod + def parse_response(self, response: str) -> TResult: + raise NotImplementedError + + +class NoopOutputFormat(OutputFormatBlock[str]): + class Config: + type_alias = "evidently:prompt_block:NoopOutputFormat" + + def _render(self) -> str: + return "" + + def parse_response(self, response: str) -> str: + return response + + +class JsonOutputFormatBlock(OutputFormatBlock[Dict[str, Any]]): + class Config: + type_alias = "evidently:prompt_block:JsonOutputFormatBlock" + + fields: Dict[str, Union[Tuple[str, str], str]] + + def _render(self) -> str: + values = [] + example_rows = [] + for field, descr in self.fields.items(): + if isinstance(descr, tuple): + descr, field_key = descr + else: + field_key = field + values.append(field) + example_rows.append(f'"{field_key}": "{descr}"') + + example_rows_str = "\n".join(example_rows) + return f"Return {', '.join(values)} formatted as json without formatting as follows:\n{{{{\n{example_rows_str}\n}}}}" + + def parse_response(self, response: str) -> Dict[str, Any]: + try: + return json.loads(response) + except json.JSONDecodeError as e: + raise LLMResponseParseError(f"Failed to parse response '{response}' as json") from e + + +class StringListFormatBlock(OutputFormatBlock[List[str]]): + class Config: + type_alias = "evidently:prompt_block:StringListFormatBlock" + + of_what: str + + def _render(self) -> str: + return f"""Return a list of {self.of_what}. +This should be only a list of string {self.of_what}, each one on a new line with no enumeration""" + + def parse_response(self, response: str) -> List[str]: + return response.split("\n") + + +class StringFormatBlock(OutputFormatBlock[str]): + class Config: + type_alias = "evidently:prompt_block:StringFormatBlock" + + what: str + + def _render(self) -> str: + return f"""Return {self.what} only.""" + + def parse_response(self, response: str) -> str: + return response + + +def llm_call(f: Callable) -> Callable[..., LLMRequest]: + sig = inspect.getfullargspec(f) + response_type = sig.annotations.get("return", str) + + @wraps(f) + def inner(self: PromptTemplate, *args, **kwargs): + kwargs = inspect.getcallargs(f, *args, **kwargs, self=self) + del kwargs["self"] + template = self.get_template() + placeholders = self.list_placeholders(template) + if set(placeholders) != set(kwargs.keys()): + raise TypeError( + f"{f} arg signature ({list(kwargs)}) does not correspond to placeholders in prompt ({placeholders})" + ) + + output_format = self.get_output_format() + prompt_response_type = _get_genric_arg(output_format.__class__) + if prompt_response_type != response_type: + raise TypeError( + f"{f} response type ({response_type}) does not correspond to prompt output type {prompt_response_type}" + ) + + # todo: validate kwargs against sig.annotations + # todo: define response parser with validation against response_type + + return LLMRequest( + messages=self.get_messages(kwargs, template=template), + response_parser=self.parse, + response_type=response_type, + ) + + return inner + + +def _get_genric_arg(cls: Type): + return typing_inspect.get_args(next(b for b in cls.__orig_bases__ if typing_inspect.is_generic_type(b)))[0] + + +placeholders_re = re.compile(r"\{([a-zA-Z0-9_]+)}") + + +class PromptTemplate(EvidentlyBaseModel): + class Config: + is_base_type = True + + # __run_func__ : ClassVar[Callable] + @abstractmethod + def get_blocks(self) -> Sequence[PromptBlock]: + raise NotImplementedError + + def iterate(self, values: Sequence[Dict[str, str]]) -> Iterator[str]: + template = self.get_template() + for vals in values: + yield self.render(vals, template) + + def render(self, values: dict, template: Optional[str] = None): + return (template or self.get_template()).format(**values) + + def get_template(self) -> str: + return "\n".join(block.render() for block in self.get_blocks()) + + def list_placeholders(self, template: Optional[str] = None): + template = template or self.get_template() + return list(placeholders_re.findall(template)) + + def get_output_format(self) -> OutputFormatBlock: + output: Optional[OutputFormatBlock] = next( + (b for b in self.get_blocks() if isinstance(b, OutputFormatBlock)), None + ) + return output if output is not None else NoopOutputFormat() # type: ignore[return-value] + + def parse(self, response: str, keys: Optional[List[str]] = None) -> Dict[str, Any]: + output = self.get_output_format() + parsed = output.parse_response(response) + if keys is not None and set(keys) != set(parsed.keys()): + raise LLMResponseParseError(f"Keys {keys} are required but got {list(parsed.keys())}") + return parsed + + def get_messages(self, values, template: Optional[str] = None) -> List[LLMMessage]: + return [LLMMessage.user(self.render(values, template))] + + +class WithSystemPrompt(PromptTemplate, ABC): + system_prompt: str + + def get_messages(self, values, template: Optional[str] = None) -> List[LLMMessage]: + msgs = super().get_messages(values, template) + msgs.insert(0, LLMMessage.system(self.system_prompt)) + return msgs + + +AnyBlock = Union[str, PromptBlock, Callable] + + +class BlockPromptTemplate(PromptTemplate): + class Config: + type_alias = "evidently:prompt_template:BlockPromptTemplate" + + blocks: ClassVar[List[AnyBlock]] + + def get_blocks(self) -> Sequence[PromptBlock]: + return [self._to_block(b) for b in self.blocks] + + def _to_block(self, block: AnyBlock) -> PromptBlock: + if isinstance(block, PromptBlock): + return block + if isinstance(block, str): + return PromptBlock.simple(block) + # if callable(block): todo + # return PromptBlock.func(block) + raise NotImplementedError(f"Cannot create promt block from {block}") diff --git a/src/evidently/utils/llm/wrapper.py b/src/evidently/utils/llm/wrapper.py new file mode 100644 index 0000000000..ef26cdb68d --- /dev/null +++ b/src/evidently/utils/llm/wrapper.py @@ -0,0 +1,215 @@ +import asyncio +import dataclasses +import datetime +from abc import ABC +from abc import abstractmethod +from asyncio import Lock +from asyncio import Semaphore +from asyncio import sleep +from typing import Callable +from typing import ClassVar +from typing import Dict +from typing import Generic +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import Type +from typing import TypeVar + +from evidently._pydantic_compat import SecretStr +from evidently.options.base import Options +from evidently.options.option import Option +from evidently.ui.base import sync_api +from evidently.utils.llm.base import LLMMessage +from evidently.utils.llm.errors import LLMRequestError + +TResult = TypeVar("TResult") + + +class RateLimiter: + def __init__(self, rate: Optional[int], interval: datetime.timedelta): + self.rate = rate + self.interval = interval + self.enters: List[datetime.datetime] = [] + self.lock = Lock() + + async def __aenter__(self): + if self.rate is None: + return + while True: + async with self.lock: + await self._clean() + if len(self.enters) < self.rate: + self.enters.append(datetime.datetime.now()) + break + await sleep(0.1) + + async def __aexit__(self, exc_type, exc_val, exc_tb): + pass + + async def _clean(self): + now = datetime.datetime.now() + self.enters = [e for e in self.enters if now - e < self.interval] + + +@dataclasses.dataclass +class LLMRequest(Generic[TResult]): + messages: List[LLMMessage] + response_parser: Callable[[str], TResult] + response_type: Type[TResult] + retries: int = 1 + + +class LLMWrapper(ABC): + __used_options__: ClassVar[List[Type[Option]]] = [] + + @abstractmethod + async def complete(self, messages: List[LLMMessage]) -> str: + raise NotImplementedError + + async def complete_batch( + self, messages_batch: List[List[LLMMessage]], batch_size: Optional[int] = None, rpm_limit: Optional[int] = None + ) -> List[str]: + if batch_size is None: + batch_size = self.get_batch_size() + if rpm_limit is None: + rpm_limit = self.get_rpm_limit() + rate_limiter = RateLimiter(rate=rpm_limit, interval=datetime.timedelta(minutes=1)) + semaphore = Semaphore(batch_size) + + async def work(messages: List[LLMMessage]) -> str: + async with semaphore, rate_limiter: + return await self.complete(messages) + + return await asyncio.gather(*[work(msgs) for msgs in messages_batch]) + + async def run(self, request: LLMRequest[TResult]) -> TResult: + num_retries = request.retries + error = None + while num_retries >= 0: + num_retries -= 1 + try: + response = await self.complete(request.messages) + return request.response_parser(response) + except Exception as e: + error = e + raise error + + async def run_batch( + self, requests: Sequence[LLMRequest[TResult]], batch_size: Optional[int] = None, rpm_limit: Optional[int] = None + ) -> List[TResult]: + if batch_size is None: + batch_size = self.get_batch_size() + if rpm_limit is None: + rpm_limit = self.get_rpm_limit() + rate_limiter = RateLimiter(rate=rpm_limit, interval=datetime.timedelta(minutes=1)) + semaphore = Semaphore(batch_size) + + async def work(request: LLMRequest[TResult]) -> TResult: + async with semaphore, rate_limiter: + return await self.run(request) + + return await asyncio.gather(*[work(r) for r in requests]) + + def get_batch_size(self) -> int: + return 100 + + def get_rpm_limit(self) -> Optional[int]: + return None + + def get_used_options(self) -> List[Type[Option]]: + return self.__used_options__ + + complete_batch_sync = sync_api(complete_batch) + run_sync = sync_api(run) + run_batch_sync = sync_api(run_batch) + + +LLMProvider = str +LLMModel = str +LLMWrapperProvider = Callable[[LLMModel, Options], LLMWrapper] +_wrappers: Dict[Tuple[LLMProvider, Optional[LLMModel]], LLMWrapperProvider] = {} + + +def llm_provider(name: LLMProvider, model: Optional[LLMModel]): + def dec(f: LLMWrapperProvider): + _wrappers[(name, model)] = f + return f + + return dec + + +def get_llm_wrapper(provider: LLMProvider, model: LLMModel, options: Options) -> LLMWrapper: + key: Tuple[str, Optional[str]] = (provider, model) + if key in _wrappers: + return _wrappers[key](model, options) + key = (provider, None) + if key in _wrappers: + return _wrappers[key](model, options) + raise ValueError(f"LLM wrapper for provider {provider} model {model} not found") + + +class OpenAIKey(Option): + api_key: Optional[SecretStr] = None + rpm_limit: int = 500 + + def __init__(self, api_key: Optional[str] = None): + self.api_key = SecretStr(api_key) if api_key is not None else None + super().__init__() + + def get_api_key(self) -> Optional[str]: + if self.api_key is None: + return None + return self.api_key.get_secret_value() + + +@llm_provider("openai", None) +class OpenAIWrapper(LLMWrapper): + __used_options__: ClassVar = [OpenAIKey] + + def __init__(self, model: str, options: Options): + import openai + + self.model = model + self.options = options.get(OpenAIKey) + self._clients: Dict[int, openai.AsyncOpenAI] = {} + + @property + def client(self): + import openai + + try: + loop = asyncio.get_running_loop() + except RuntimeError as e: + raise RuntimeError("Cannot access OpenAIWrapper client without loop") from e + loop_id = id(loop) + if loop_id not in self._clients: + self._clients[loop_id] = openai.AsyncOpenAI(api_key=self.options.get_api_key()) + return self._clients[loop_id] + + async def complete(self, messages: List[LLMMessage]) -> str: + import openai + + messages = [{"role": msg.role, "content": msg.content} for msg in messages] + try: + response = await self.client.chat.completions.create(model=self.model, messages=messages) # type: ignore[arg-type] + except openai.OpenAIError as e: + raise LLMRequestError("Failed to call OpenAI complete API") from e + content = response.choices[0].message.content + assert content is not None # todo: better error + return content + + def get_rpm_limit(self) -> Optional[int]: + return self.options.rpm_limit + + +@llm_provider("litellm", None) +class LiteLLMWrapper(LLMWrapper): + def __init__(self, model: str): + self.model = model + + async def complete(self, messages: List[LLMMessage]) -> str: + from litellm import completion + + return completion(model=self.model, messages=messages).choices[0].message.content diff --git a/tests/dataset_generator/__init__.py b/tests/dataset_generator/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/features/test_json_schema_match_feature.py b/tests/features/test_json_schema_match_feature.py new file mode 100644 index 0000000000..42b2e704f7 --- /dev/null +++ b/tests/features/test_json_schema_match_feature.py @@ -0,0 +1,147 @@ +from typing import Any +from typing import Dict + +import pandas as pd +import pytest + +from evidently.features.json_schema_match_feature import JSONSchemaMatch + + +@pytest.mark.parametrize( + ("column_value, expected_schema, validate_types, exact_match, expected_output"), + [ + # Invalid JSON + ('{"name": "Invalid json"]', {"name": str, "age": int}, False, False, False), + # Exact Match + ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, True, True, True), + ('{"name": "Jane", "age": 25}', {"name": str, "age": int, "city": str}, True, True, False), + ('{"name": "Jane", "age": 25, "city": "New York"}', {"name": str, "age": int}, True, True, False), + ('{"name": "Jane", "age": 25}', {"name": int, "age": int}, True, True, False), + # Minimal Match without type validation + ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, False, False, True), + ('{"name": "Jane", "age": 25, "city": "New York"}', {"name": str, "age": int}, False, False, True), + ('{"name": "Jane", "age": null, "city": "New York"}', {"name": str, "age": int}, False, False, False), + # Minimal Match with type validation + ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, True, False, True), + ( + '{"name": "Jane", "age": "25"}', + {"name": str, "age": int}, + True, + False, + False, + ), # Fail due to type mismatch (age as string) + ], +) +def test_match_json_schema( + column_value: str, expected_schema: Dict[str, type], validate_types: bool, exact_match: bool, expected_output: bool +): + schema_match = JSONSchemaMatch( + expected_schema=expected_schema, + validate_types=validate_types, + exact_match=exact_match, + column_name="TestColumnName", + ) + result = schema_match.match_json_schema(json_text=column_value) + assert result == expected_output + + +@pytest.mark.parametrize( + ("json_obj, expected_schema, validate_types, expected_output"), + [ + # Minimal Match with type validation + ({"name": "Jane", "age": 25}, {"name": str, "age": int}, True, True), + ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, True, False), + ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int}, True, True), + ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int, "region": str}, True, False), + ({"name": "Jane", "age": None, "city": "New York"}, {"name": str, "age": int}, True, False), + # Minimal Match without type validation + ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, False, True), + ({"name": "Jane", "age": None, "city": "New York"}, {"name": str, "age": int}, False, False), + ], +) +def test_minimal_match( + json_obj: Dict[str, Any], expected_schema: Dict[str, type], validate_types: bool, expected_output: bool +): + schema_match = JSONSchemaMatch( + expected_schema=expected_schema, validate_types=validate_types, exact_match=False, column_name="TestColumnName" + ) + result = schema_match._minimal_match(json_obj) + assert result == expected_output + + +@pytest.mark.parametrize( + ("json_obj, expected_schema, validate_types, expected_output"), + [ + # Exact Match + ({"name": "Jane", "age": 25}, {"name": str, "age": int}, True, True), + ({"name": "Jane", "age": 25}, {"name": str, "age": int}, False, True), + ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, True, False), + ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int}, True, False), + ({"name": "Jane", "age": 25}, {"name": str, "age": int, "city": str}, True, False), + ( + {"name": "Jane", "age": 25, "city": ["New York", "California"]}, + {"name": str, "age": int, "city": list}, + True, + True, + ), + ( + {"name": "Jane", "age": 25, "city": ["New York", "California"]}, + {"name": str, "age": int, "city": dict}, + True, + False, + ), + ], +) +def test_exact_match( + json_obj: Dict[str, Any], expected_schema: Dict[str, type], validate_types: bool, expected_output: bool +): + schema_match = JSONSchemaMatch( + expected_schema=expected_schema, validate_types=validate_types, exact_match=False, column_name="TestColumnName" + ) + result = schema_match._exact_match(json_obj) + assert result == expected_output + + +test_data = pd.DataFrame( + { + "TestColumnName": [ + '{"name": "John", "age": 30, "city": "New York"}', + '{"name": "Jane", "age": null, "city": "London"}', + '{"name": "Mike", "age": 25, "city": "San Francisco"}', + '{"name": "Invalid json"]', + '{"name": "Anna", "age": "22", "country": "Canada"}', + ] + } +) + + +@pytest.mark.parametrize( + ("expected_schema, validate_types, exact_match, expected_output"), + [ + # Minimal Match without type validation + ({"name": str, "age": int}, False, False, [True, False, True, False, True]), + # Minimal Match with type validation + ({"name": str, "age": int}, True, False, [True, False, True, False, False]), + # Exact Match + ({"name": str, "age": int, "city": str}, True, True, [True, False, True, False, False]), + ], +) +def test_generate_feature( + expected_schema: Dict[str, type], validate_types: bool, exact_match: bool, expected_output: list +): + schema_match = JSONSchemaMatch( + expected_schema=expected_schema, + validate_types=validate_types, + exact_match=exact_match, + column_name="TestColumnName", + ) + result = schema_match.generate_feature(test_data, None) + assert result[schema_match._feature_column_name()].tolist() == expected_output + + +def test_generate_feature_column_name_dne(): + schema_match = JSONSchemaMatch( + expected_schema={"test": str}, validate_types=False, exact_match=False, column_name="DNEColumn" + ) + with pytest.raises(KeyError): + schema_match.generate_feature(test_data, None) diff --git a/tests/features/test_text_contains_feature.py b/tests/features/test_text_contains_feature.py index 51dceeb680..3b590c9de4 100644 --- a/tests/features/test_text_contains_feature.py +++ b/tests/features/test_text_contains_feature.py @@ -5,6 +5,8 @@ from evidently.features.text_contains_feature import Contains from evidently.features.text_contains_feature import DoesNotContain +from evidently.features.text_contains_feature import ItemMatch +from evidently.features.text_contains_feature import ItemNoMatch from evidently.pipeline.column_mapping import ColumnMapping from evidently.utils.data_preprocessing import create_data_definition @@ -61,3 +63,83 @@ def test_text_not_contains_feature(items: List[str], case: bool, mode: str, expe column_expected = feature_generator._feature_column_name() expected_df = pd.DataFrame({column_expected: expected}) assert result.equals(expected_df) + + +@pytest.mark.parametrize( + ("case", "mode", "expected"), + [ + (True, "any", [False, True, False, True, False]), + (True, "all", [False, True, False, False, False]), + (False, "any", [True, True, True, True, False]), + (False, "all", [False, True, True, False, False]), + ], +) +def test_item_match(case: bool, mode: str, expected: List[bool]): + data = { + "generated": [ + "You should consider purchasing Nike or Adidas shoes.", + "I eat apples, grapes, and oranges", + "grapes, oranges, apples.", + "Oranges are more sour than grapes.", + "This test doesn't have the words.", + ], + "expected": [ + ["nike", "adidas", "puma"], + ["grapes", "apples", "oranges"], + ["Apples", "Oranges", "Grapes"], + ["orange", "sweet", "grape"], + ["none", "of", "these"], + ], + } + df = pd.DataFrame(data) + df["expected"] = df["expected"].apply(tuple) + feature_generator = ItemMatch(columns=["generated", "expected"], case_sensitive=case, mode=mode) + result = feature_generator.generate_feature( + data=df, + data_definition=create_data_definition(None, df, ColumnMapping()), + ) + column_expected = feature_generator._feature_column_name() + column_name_obj = feature_generator._as_column() + expected_df = pd.DataFrame({column_expected: expected}) + assert result.equals(expected_df) + assert column_name_obj.display_name == f"Text contains {mode} of defined items" + + +@pytest.mark.parametrize( + ("case", "mode", "expected"), + [ + (True, "any", [True, False, True, False, True]), + (True, "all", [True, False, True, True, True]), + (False, "any", [False, False, False, False, True]), + (False, "all", [True, False, False, True, True]), + ], +) +def test_item_no_match(case: bool, mode: str, expected: List[bool]): + data = { + "generated": [ + "You should consider purchasing Nike or Adidas shoes.", + "I eat apples, grapes, and oranges", + "grapes, oranges, apples.", + "Oranges are more sour than grapes.", + "This test doesn't have the words.", + ], + "forbidden": [ + ["nike", "adidas", "puma"], + ["grapes", "apples", "oranges"], + ["Apples", "Oranges", "Grapes"], + ["orange", "sweet", "grape"], + ["none", "of", "these"], + ], + } + feature_generator = ItemNoMatch(columns=["generated", "forbidden"], case_sensitive=case, mode=mode) + df = pd.DataFrame(data) + df["forbidden"] = df["forbidden"].apply(tuple) + result = feature_generator.generate_feature( + data=df, + data_definition=create_data_definition(None, df, ColumnMapping()), + ) + column_expected = feature_generator._feature_column_name() + column_name_obj = feature_generator._as_column() + expected_df = pd.DataFrame({column_expected: expected}) + assert result.equals(expected_df) + assert column_name_obj.display_name == f"Text does not contain {mode} of defined items" diff --git a/tests/metrics/data_interity/test_dataset_rouge_summary_metric.py b/tests/metrics/data_interity/test_dataset_rouge_summary_metric.py new file mode 100644 index 0000000000..814bf39ec2 --- /dev/null +++ b/tests/metrics/data_interity/test_dataset_rouge_summary_metric.py @@ -0,0 +1,45 @@ +import json + +import pandas as pd +import pytest + +from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric +from evidently.report.report import Report + + +@pytest.mark.parametrize( + "current_df, reference_df, metric, expected_json", + ( + ( + pd.DataFrame( + { + "summary": ["hello there", "general kenobi"], + } + ), + pd.DataFrame({"summary": ["hello there", "no de"]}), + ROUGESummaryMetric(column_name="summary", rouge_n=1), + { + "current": ["hello there", "general kenobi"], + "reference": ["hello there", "no de"], + "rouge_type": "ROUGE-1", + "per_row_scores": [1.0, 0.0], + "summary_score": 0.5, + }, + ), + ), +) +def test_rouge_summary_metric_with_report( + current_df: pd.DataFrame, + reference_df: pd.DataFrame, + metric, + expected_json: dict, +) -> None: + report = Report(metrics=[metric]) + + report.run(current_data=current_df, reference_data=reference_df) + + assert report.show() + json_result = report.json() + assert len(json_result) > 0 + result = json.loads(json_result) + assert result["metrics"][0]["result"] == expected_json diff --git a/tests/multitest/metrics/data_integrity.py b/tests/multitest/metrics/data_integrity.py index d52ae6526a..7973928f44 100644 --- a/tests/multitest/metrics/data_integrity.py +++ b/tests/multitest/metrics/data_integrity.py @@ -16,6 +16,7 @@ from evidently.metrics.data_integrity.column_summary_metric import NumericCharacteristics from evidently.metrics.data_integrity.dataset_missing_values_metric import DatasetMissingValuesMetric from evidently.metrics.data_integrity.dataset_summary_metric import DatasetSummaryMetric +from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric from tests.multitest.conftest import AssertExpectedResult from tests.multitest.conftest import Error from tests.multitest.conftest import NoopOutcome @@ -206,6 +207,27 @@ def dataset_summary_metric(): ) +@metric +def rouge_summary_metric(): + return TestMetric( + name="rouge_summary_metric", + metric=ROUGESummaryMetric(column_name="summary", rouge_n=1), + fingerprint="bfc616f760b973d2cbfbf0540c7b2c71", + outcomes=NoopOutcome(), + datasets=[ + TestDataset( + "rouge_summary_metric_data", + current=pd.DataFrame( + { + "summary": ["hello there", "general kenobi"], + } + ), + reference=pd.DataFrame({"summary": ["hello there", "no de"]}), + ), + ], + ) + + @metric def column_reg_exp_metric(): return TestMetric( diff --git a/tests/test_pydantic_aliases.py b/tests/test_pydantic_aliases.py index 488322edd3..0cd96d923c 100644 --- a/tests/test_pydantic_aliases.py +++ b/tests/test_pydantic_aliases.py @@ -16,6 +16,9 @@ from evidently.base_metric import MetricResult from evidently.collector.config import CollectorTrigger from evidently.collector.storage import CollectorStorage +from evidently.experimental.dataset_generators.base import BaseDatasetGenerator +from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider +from evidently.experimental.dataset_generators.llm.splitter import Splitter from evidently.features.generated_features import BaseDescriptor from evidently.features.generated_features import GeneratedFeatures from evidently.features.llm_judge import BaseLLMPromptTemplate @@ -32,6 +35,8 @@ from evidently.tests.base_test import TestParameters from evidently.ui.components.base import Component from evidently.ui.dashboards.base import DashboardPanel +from evidently.utils.llm.prompts import PromptBlock +from evidently.utils.llm.prompts import PromptTemplate T = TypeVar("T") @@ -105,6 +110,11 @@ def test_all_aliases_correct(): CollectorStorage: "collector_storage", BaseLLMPromptTemplate: "prompt_template", DashboardPanel: "dashboard_panel", + BaseDatasetGenerator: "dataset_generator", + Splitter: "splitter", + DataCollectionProvider: "data_collecton_provider", + PromptBlock: "prompt_block", + PromptTemplate: "prompt_template", } skip = [Component] skip_literal = [EvidentlyBaseModel, WithTestAndMetricDependencies, BasePreset]