Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(llm-observability): spans #27933

Merged
merged 25 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions frontend/src/lib/taxonomy.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,11 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
description:
'A generative AI trace. Usually a trace tracks a single user interaction and contains one or more AI generation calls',
},
$ai_span: {
label: 'AI Span',
description:
'A generative AI span. Usually a span tracks a unit of work for a trace of generative AI models (LLMs)',
},
$ai_metric: {
label: 'AI Metric',
description: 'An evaluation metric for a trace of generative AI models (LLMs)',
Expand Down Expand Up @@ -1454,11 +1459,6 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
label: 'AI Output State (LLM)',
description: 'Output state of the LLM agent',
},
$ai_trace_name: {
label: 'AI Trace Name (LLM)',
description: 'The name given to this trace of LLM API calls',
examples: ['LangGraph'],
},
$ai_provider: {
label: 'AI Provider (LLM)',
description: 'The provider of the AI model used to generate the output from the LLM API',
Expand Down Expand Up @@ -1490,6 +1490,21 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
description: 'The text provided by the user for feedback on the LLM trace',
examples: ['"The response was helpful, but it did not use the provided context."'],
},
$ai_parent_id: {
label: 'AI Parent ID (LLM)',
description: 'The parent span ID of a span or generation, used to group a trace into a tree view',
examples: ['bdf42359-9364-4db7-8958-c001f28c9255'],
},
$ai_span_id: {
label: 'AI Span ID (LLM)',
description: 'The unique identifier for a LLM trace, generation, or span.',
examples: ['bdf42359-9364-4db7-8958-c001f28c9255'],
},
$ai_span_name: {
label: 'AI Span Name (LLM)',
description: 'The name given to this LLM trace, generation, or span.',
examples: ['summarize_text'],
},
},
numerical_event_properties: {}, // Same as event properties, see assignment below
person_properties: {}, // Currently person properties are the same as event properties, see assignment below
Expand Down
2 changes: 1 addition & 1 deletion jest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ process.env.TZ = process.env.TZ || 'UTC'
const esmModules = ['query-selector-shadow-dom', 'react-syntax-highlighter', '@react-hook', '@medv', 'monaco-editor']
const eeFolderExists = fs.existsSync('ee/frontend/exports.ts')
function rootDirectories() {
const rootDirectories = ['<rootDir>/frontend/src']
const rootDirectories = ['<rootDir>/frontend/src', '<rootDir>/products']
if (eeFolderExists) {
rootDirectories.push('<rootDir>/ee/frontend')
}
Expand Down
9 changes: 4 additions & 5 deletions posthog/demo/matrix/models.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from contextlib import contextmanager
import datetime as dt
from abc import ABC, abstractmethod
from collections import defaultdict
from collections.abc import Callable, Generator, Iterable
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import dataclass
from enum import Enum, auto
from itertools import chain
from typing import TYPE_CHECKING, Any, Generic, Literal, Optional, TypeVar
from collections.abc import Generator
from collections.abc import Callable, Iterable
from urllib.parse import urlparse, parse_qs
from urllib.parse import parse_qs, urlparse
from uuid import UUID, uuid4

import tiktoken
Expand Down Expand Up @@ -228,7 +227,7 @@ def set_trace_output(output: Any):
{
"$ai_input_state": input_state,
"$ai_output_state": output_state,
"$ai_trace_name": "SpikeChain",
"$ai_span_name": "SpikeChain",
"$ai_trace_id": trace_id,
},
distinct_id=distinct_id,
Expand Down

Large diffs are not rendered by default.

118 changes: 112 additions & 6 deletions posthog/hogql_queries/ai/test/test_traces_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _create_ai_generation_event(
def _create_ai_trace_event(
*,
trace_id: str,
trace_name: str,
trace_name: str | None,
input_state: Any,
output_state: Any,
team: Team | None = None,
Expand All @@ -106,7 +106,7 @@ def _create_ai_trace_event(
):
props = {
"$ai_trace_id": trace_id,
"$ai_trace_name": trace_name,
"$ai_span_name": trace_name,
"$ai_input_state": input_state,
"$ai_output_state": output_state,
}
Expand All @@ -123,6 +123,41 @@ def _create_ai_trace_event(
)


def _create_ai_span_event(
*,
trace_id: str,
input_state: Any,
output_state: Any,
span_id: str | None = None,
parent_id: str | None = None,
span_name: str | None = None,
team: Team | None = None,
distinct_id: str | None = None,
properties: dict[str, Any] | None = None,
timestamp: datetime | None = None,
event_uuid: str | UUID | None = None,
):
props = {
"$ai_trace_id": trace_id,
"$ai_span_name": span_name,
"$ai_input_state": input_state,
"$ai_output_state": output_state,
"$ai_span_id": span_id or str(uuid.uuid4()),
"$ai_parent_id": parent_id or trace_id,
}
if properties:
props.update(properties)

_create_event(
event="$ai_span",
distinct_id=distinct_id,
properties=props,
team=team,
timestamp=timestamp,
event_uuid=str(event_uuid) if event_uuid else None,
)


class TestTracesQueryRunner(ClickhouseTestMixin, BaseTest):
def setUp(self):
super().setUp()
Expand Down Expand Up @@ -556,11 +591,19 @@ def test_model_parameters(self):

def test_full_trace(self):
_create_person(distinct_ids=["person1"], team=self.team, properties={"foo": "bar"})
_create_ai_span_event(
trace_id="trace1",
span_name="runnable",
input_state={"messages": [{"role": "user", "content": "Foo"}]},
output_state={"messages": [{"role": "user", "content": "Foo"}, {"role": "assistant", "content": "Bar"}]},
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 9),
)
_create_ai_generation_event(
distinct_id="person1",
trace_id="trace1",
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 0),
timestamp=datetime(2024, 12, 1, 0, 9, 30),
)
_create_ai_generation_event(
distinct_id="person1",
Expand Down Expand Up @@ -591,10 +634,18 @@ def test_full_trace(self):
response.results[0].outputState,
{"messages": [{"role": "user", "content": "Foo"}, {"role": "assistant", "content": "Bar"}]},
)
self.assertEqual(len(response.results[0].events), 2)
self.assertEqual(len(response.results[0].events), 3)

self.assertEqual(response.results[0].events[0].event, "$ai_span")
self.assertEqual(response.results[0].events[0].properties["$ai_trace_id"], "trace1")
self.assertEqual(response.results[0].events[0].properties["$ai_span_name"], "runnable")

self.assertEqual(response.results[0].events[1].event, "$ai_generation")
self.assertEqual(response.results[0].events[1].properties["$ai_trace_id"], "trace1")

self.assertEqual(response.results[0].events[2].event, "$ai_generation")
self.assertEqual(response.results[0].events[2].properties["$ai_trace_id"], "trace1")

@snapshot_clickhouse_queries
def test_properties_filter_with_multiple_events_in_group(self):
_create_person(distinct_ids=["person1"], team=self.team)
Expand Down Expand Up @@ -671,7 +722,7 @@ def test_trace_property_filter_for_event_group(self):
team=self.team,
query=TracesQuery(
properties=[
EventPropertyFilter(key="$ai_trace_name", value="runnable", operator=PropertyOperator.EXACT)
EventPropertyFilter(key="$ai_span_name", value="runnable", operator=PropertyOperator.EXACT)
],
dateRange=DateRange(date_from="2024-12-01T00:00:00Z", date_to="2024-12-01T00:10:00Z"),
),
Expand All @@ -694,10 +745,65 @@ def test_trace_property_filter_for_event_group(self):
team=self.team,
query=TracesQuery(
properties=[
EventPropertyFilter(key="$ai_trace_name", value="runnable", operator=PropertyOperator.EXACT),
EventPropertyFilter(key="$ai_span_name", value="runnable", operator=PropertyOperator.EXACT),
EventPropertyFilter(key="foo", value="bar", operator=PropertyOperator.EXACT),
],
dateRange=DateRange(date_from="2024-12-01T00:00:00Z", date_to="2024-12-01T00:10:00Z"),
),
).calculate()
self.assertEqual(len(response.results), 0)

def test_trace_name_fallback(self):
"""
$ai_trace_name is a deprecated property, but we still want to support it for backwards compatibility.
"""
_create_person(distinct_ids=["person1"], team=self.team)
_create_ai_generation_event(
distinct_id="person1",
trace_id="trace1",
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 0),
)
_create_ai_trace_event(
trace_id="trace1",
trace_name="runnable",
input_state={"messages": [{"role": "user", "content": "Foo"}]},
output_state={"messages": [{"role": "user", "content": "Foo"}, {"role": "assistant", "content": "Bar"}]},
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 5),
)

response = TracesQueryRunner(
team=self.team,
query=TracesQuery(
traceId="trace1",
dateRange=DateRange(date_from="2024-12-01T00:00:00Z", date_to="2024-12-01T00:10:00Z"),
),
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].traceName, "runnable")

_create_ai_generation_event(
distinct_id="person1",
trace_id="trace2",
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 0),
)
_create_ai_trace_event(
trace_id="trace2",
trace_name=None,
input_state={"messages": [{"role": "user", "content": "Foo"}]},
output_state={"messages": [{"role": "user", "content": "Foo"}, {"role": "assistant", "content": "Bar"}]},
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 5),
properties={"$ai_trace_name": "bar"},
)
response = TracesQueryRunner(
team=self.team,
query=TracesQuery(
traceId="trace2",
dateRange=DateRange(date_from="2024-12-01T00:00:00Z", date_to="2024-12-01T00:10:00Z"),
),
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].traceName, "bar")
4 changes: 2 additions & 2 deletions posthog/hogql_queries/ai/traces_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,15 +222,15 @@ def _get_event_query(self) -> ast.SelectQuery:
arraySort(x -> x.3, groupArray(tuple(uuid, event, timestamp, properties))) as events,
{filter_conditions}
FROM events
WHERE event IN ('$ai_generation', '$ai_metric', '$ai_feedback') AND {common_conditions}
WHERE event IN ('$ai_span', '$ai_generation', '$ai_metric', '$ai_feedback') AND {common_conditions}
GROUP BY id
) AS generations
LEFT JOIN (
SELECT
properties.$ai_trace_id as id,
argMin(properties.$ai_input_state, timestamp) as input_state,
argMin(properties.$ai_output_state, timestamp) as output_state,
argMin(properties.$ai_trace_name, timestamp) as trace_name,
argMin(ifNull(properties.$ai_span_name, properties.$ai_trace_name), timestamp) as trace_name,
{filter_conditions}
FROM events
WHERE event = '$ai_trace' AND {common_conditions}
Expand Down
18 changes: 18 additions & 0 deletions posthog/taxonomy/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ class CoreFilterDefinition(TypedDict):
"label": "AI Feedback (LLM)",
"description": "User-provided feedback for a trace of a generative AI model (LLM).",
},
"$ai_trace": {
"label": "AI Trace (LLM)",
"description": "A generative AI trace. Usually a trace tracks a single user interaction and contains one or more AI generation calls",
},
"$ai_span": {
"label": "AI Span (LLM)",
"description": "A generative AI span. Usually a span tracks a unit of work for a trace of generative AI models (LLMs)",
},
"Application Opened": {
"label": "Application Opened",
"description": "When a user opens the mobile app either for the first time or from the foreground.",
Expand Down Expand Up @@ -1377,6 +1385,16 @@ class CoreFilterDefinition(TypedDict):
"description": "The text provided by the user for feedback on the LLM trace",
"examples": ['"The response was helpful, but it did not use the provided context."'],
},
"$ai_parent_id": {
"label": "AI Parent ID (LLM)",
"description": "The parent span ID of a span or generation, used to group a trace into a tree view",
"examples": ["bdf42359-9364-4db7-8958-c001f28c9255"],
},
"$ai_span_id": {
"label": "AI Span ID (LLM)",
"description": "The unique identifier for a LLM trace, generation, or span.",
"examples": ["bdf42359-9364-4db7-8958-c001f28c9255"],
},
},
"numerical_event_properties": {},
"person_properties": {},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ export function ConversationDisplay({ eventProperties }: { eventProperties: Even
</header>
<ConversationMessagesDisplay
input={eventProperties.$ai_input}
output={eventProperties.$ai_output_choices}
output={eventProperties.$ai_output_choices ?? eventProperties.$ai_output ?? eventProperties.$ai_error}
httpStatus={eventProperties.$ai_http_status}
raisedError={eventProperties.$ai_is_error}
bordered
/>
</>
Expand Down
Loading
Loading