Skip to content

Commit

Permalink
feat(llm-observability): spans (#27933)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Michael Matloka <[email protected]>
  • Loading branch information
3 people authored Jan 28, 2025
1 parent 5b4d185 commit d7758ee
Show file tree
Hide file tree
Showing 14 changed files with 622 additions and 169 deletions.
25 changes: 20 additions & 5 deletions frontend/src/lib/taxonomy.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,11 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
description:
'A generative AI trace. Usually a trace tracks a single user interaction and contains one or more AI generation calls',
},
$ai_span: {
label: 'AI Span',
description:
'A generative AI span. Usually a span tracks a unit of work for a trace of generative AI models (LLMs)',
},
$ai_metric: {
label: 'AI Metric',
description: 'An evaluation metric for a trace of generative AI models (LLMs)',
Expand Down Expand Up @@ -1454,11 +1459,6 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
label: 'AI Output State (LLM)',
description: 'Output state of the LLM agent',
},
$ai_trace_name: {
label: 'AI Trace Name (LLM)',
description: 'The name given to this trace of LLM API calls',
examples: ['LangGraph'],
},
$ai_provider: {
label: 'AI Provider (LLM)',
description: 'The provider of the AI model used to generate the output from the LLM API',
Expand Down Expand Up @@ -1490,6 +1490,21 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
description: 'The text provided by the user for feedback on the LLM trace',
examples: ['"The response was helpful, but it did not use the provided context."'],
},
$ai_parent_id: {
label: 'AI Parent ID (LLM)',
description: 'The parent span ID of a span or generation, used to group a trace into a tree view',
examples: ['bdf42359-9364-4db7-8958-c001f28c9255'],
},
$ai_span_id: {
label: 'AI Span ID (LLM)',
description: 'The unique identifier for a LLM trace, generation, or span.',
examples: ['bdf42359-9364-4db7-8958-c001f28c9255'],
},
$ai_span_name: {
label: 'AI Span Name (LLM)',
description: 'The name given to this LLM trace, generation, or span.',
examples: ['summarize_text'],
},
},
numerical_event_properties: {}, // Same as event properties, see assignment below
person_properties: {}, // Currently person properties are the same as event properties, see assignment below
Expand Down
2 changes: 1 addition & 1 deletion jest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ process.env.TZ = process.env.TZ || 'UTC'
const esmModules = ['query-selector-shadow-dom', 'react-syntax-highlighter', '@react-hook', '@medv', 'monaco-editor']
const eeFolderExists = fs.existsSync('ee/frontend/exports.ts')
function rootDirectories() {
const rootDirectories = ['<rootDir>/frontend/src']
const rootDirectories = ['<rootDir>/frontend/src', '<rootDir>/products']
if (eeFolderExists) {
rootDirectories.push('<rootDir>/ee/frontend')
}
Expand Down
9 changes: 4 additions & 5 deletions posthog/demo/matrix/models.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from contextlib import contextmanager
import datetime as dt
from abc import ABC, abstractmethod
from collections import defaultdict
from collections.abc import Callable, Generator, Iterable
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import dataclass
from enum import Enum, auto
from itertools import chain
from typing import TYPE_CHECKING, Any, Generic, Literal, Optional, TypeVar
from collections.abc import Generator
from collections.abc import Callable, Iterable
from urllib.parse import urlparse, parse_qs
from urllib.parse import parse_qs, urlparse
from uuid import UUID, uuid4

import tiktoken
Expand Down Expand Up @@ -228,7 +227,7 @@ def set_trace_output(output: Any):
{
"$ai_input_state": input_state,
"$ai_output_state": output_state,
"$ai_trace_name": "SpikeChain",
"$ai_span_name": "SpikeChain",
"$ai_trace_id": trace_id,
},
distinct_id=distinct_id,
Expand Down

Large diffs are not rendered by default.

118 changes: 112 additions & 6 deletions posthog/hogql_queries/ai/test/test_traces_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _create_ai_generation_event(
def _create_ai_trace_event(
*,
trace_id: str,
trace_name: str,
trace_name: str | None,
input_state: Any,
output_state: Any,
team: Team | None = None,
Expand All @@ -106,7 +106,7 @@ def _create_ai_trace_event(
):
props = {
"$ai_trace_id": trace_id,
"$ai_trace_name": trace_name,
"$ai_span_name": trace_name,
"$ai_input_state": input_state,
"$ai_output_state": output_state,
}
Expand All @@ -123,6 +123,41 @@ def _create_ai_trace_event(
)


def _create_ai_span_event(
*,
trace_id: str,
input_state: Any,
output_state: Any,
span_id: str | None = None,
parent_id: str | None = None,
span_name: str | None = None,
team: Team | None = None,
distinct_id: str | None = None,
properties: dict[str, Any] | None = None,
timestamp: datetime | None = None,
event_uuid: str | UUID | None = None,
):
props = {
"$ai_trace_id": trace_id,
"$ai_span_name": span_name,
"$ai_input_state": input_state,
"$ai_output_state": output_state,
"$ai_span_id": span_id or str(uuid.uuid4()),
"$ai_parent_id": parent_id or trace_id,
}
if properties:
props.update(properties)

_create_event(
event="$ai_span",
distinct_id=distinct_id,
properties=props,
team=team,
timestamp=timestamp,
event_uuid=str(event_uuid) if event_uuid else None,
)


class TestTracesQueryRunner(ClickhouseTestMixin, BaseTest):
def setUp(self):
super().setUp()
Expand Down Expand Up @@ -556,11 +591,19 @@ def test_model_parameters(self):

def test_full_trace(self):
_create_person(distinct_ids=["person1"], team=self.team, properties={"foo": "bar"})
_create_ai_span_event(
trace_id="trace1",
span_name="runnable",
input_state={"messages": [{"role": "user", "content": "Foo"}]},
output_state={"messages": [{"role": "user", "content": "Foo"}, {"role": "assistant", "content": "Bar"}]},
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 9),
)
_create_ai_generation_event(
distinct_id="person1",
trace_id="trace1",
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 0),
timestamp=datetime(2024, 12, 1, 0, 9, 30),
)
_create_ai_generation_event(
distinct_id="person1",
Expand Down Expand Up @@ -591,10 +634,18 @@ def test_full_trace(self):
response.results[0].outputState,
{"messages": [{"role": "user", "content": "Foo"}, {"role": "assistant", "content": "Bar"}]},
)
self.assertEqual(len(response.results[0].events), 2)
self.assertEqual(len(response.results[0].events), 3)

self.assertEqual(response.results[0].events[0].event, "$ai_span")
self.assertEqual(response.results[0].events[0].properties["$ai_trace_id"], "trace1")
self.assertEqual(response.results[0].events[0].properties["$ai_span_name"], "runnable")

self.assertEqual(response.results[0].events[1].event, "$ai_generation")
self.assertEqual(response.results[0].events[1].properties["$ai_trace_id"], "trace1")

self.assertEqual(response.results[0].events[2].event, "$ai_generation")
self.assertEqual(response.results[0].events[2].properties["$ai_trace_id"], "trace1")

@snapshot_clickhouse_queries
def test_properties_filter_with_multiple_events_in_group(self):
_create_person(distinct_ids=["person1"], team=self.team)
Expand Down Expand Up @@ -671,7 +722,7 @@ def test_trace_property_filter_for_event_group(self):
team=self.team,
query=TracesQuery(
properties=[
EventPropertyFilter(key="$ai_trace_name", value="runnable", operator=PropertyOperator.EXACT)
EventPropertyFilter(key="$ai_span_name", value="runnable", operator=PropertyOperator.EXACT)
],
dateRange=DateRange(date_from="2024-12-01T00:00:00Z", date_to="2024-12-01T00:10:00Z"),
),
Expand All @@ -694,10 +745,65 @@ def test_trace_property_filter_for_event_group(self):
team=self.team,
query=TracesQuery(
properties=[
EventPropertyFilter(key="$ai_trace_name", value="runnable", operator=PropertyOperator.EXACT),
EventPropertyFilter(key="$ai_span_name", value="runnable", operator=PropertyOperator.EXACT),
EventPropertyFilter(key="foo", value="bar", operator=PropertyOperator.EXACT),
],
dateRange=DateRange(date_from="2024-12-01T00:00:00Z", date_to="2024-12-01T00:10:00Z"),
),
).calculate()
self.assertEqual(len(response.results), 0)

def test_trace_name_fallback(self):
"""
$ai_trace_name is a deprecated property, but we still want to support it for backwards compatibility.
"""
_create_person(distinct_ids=["person1"], team=self.team)
_create_ai_generation_event(
distinct_id="person1",
trace_id="trace1",
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 0),
)
_create_ai_trace_event(
trace_id="trace1",
trace_name="runnable",
input_state={"messages": [{"role": "user", "content": "Foo"}]},
output_state={"messages": [{"role": "user", "content": "Foo"}, {"role": "assistant", "content": "Bar"}]},
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 5),
)

response = TracesQueryRunner(
team=self.team,
query=TracesQuery(
traceId="trace1",
dateRange=DateRange(date_from="2024-12-01T00:00:00Z", date_to="2024-12-01T00:10:00Z"),
),
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].traceName, "runnable")

_create_ai_generation_event(
distinct_id="person1",
trace_id="trace2",
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 0),
)
_create_ai_trace_event(
trace_id="trace2",
trace_name=None,
input_state={"messages": [{"role": "user", "content": "Foo"}]},
output_state={"messages": [{"role": "user", "content": "Foo"}, {"role": "assistant", "content": "Bar"}]},
team=self.team,
timestamp=datetime(2024, 12, 1, 0, 5),
properties={"$ai_trace_name": "bar"},
)
response = TracesQueryRunner(
team=self.team,
query=TracesQuery(
traceId="trace2",
dateRange=DateRange(date_from="2024-12-01T00:00:00Z", date_to="2024-12-01T00:10:00Z"),
),
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].traceName, "bar")
4 changes: 2 additions & 2 deletions posthog/hogql_queries/ai/traces_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,15 +222,15 @@ def _get_event_query(self) -> ast.SelectQuery:
arraySort(x -> x.3, groupArray(tuple(uuid, event, timestamp, properties))) as events,
{filter_conditions}
FROM events
WHERE event IN ('$ai_generation', '$ai_metric', '$ai_feedback') AND {common_conditions}
WHERE event IN ('$ai_span', '$ai_generation', '$ai_metric', '$ai_feedback') AND {common_conditions}
GROUP BY id
) AS generations
LEFT JOIN (
SELECT
properties.$ai_trace_id as id,
argMin(properties.$ai_input_state, timestamp) as input_state,
argMin(properties.$ai_output_state, timestamp) as output_state,
argMin(properties.$ai_trace_name, timestamp) as trace_name,
argMin(ifNull(properties.$ai_span_name, properties.$ai_trace_name), timestamp) as trace_name,
{filter_conditions}
FROM events
WHERE event = '$ai_trace' AND {common_conditions}
Expand Down
18 changes: 18 additions & 0 deletions posthog/taxonomy/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ class CoreFilterDefinition(TypedDict):
"label": "AI Feedback (LLM)",
"description": "User-provided feedback for a trace of a generative AI model (LLM).",
},
"$ai_trace": {
"label": "AI Trace (LLM)",
"description": "A generative AI trace. Usually a trace tracks a single user interaction and contains one or more AI generation calls",
},
"$ai_span": {
"label": "AI Span (LLM)",
"description": "A generative AI span. Usually a span tracks a unit of work for a trace of generative AI models (LLMs)",
},
"Application Opened": {
"label": "Application Opened",
"description": "When a user opens the mobile app either for the first time or from the foreground.",
Expand Down Expand Up @@ -1377,6 +1385,16 @@ class CoreFilterDefinition(TypedDict):
"description": "The text provided by the user for feedback on the LLM trace",
"examples": ['"The response was helpful, but it did not use the provided context."'],
},
"$ai_parent_id": {
"label": "AI Parent ID (LLM)",
"description": "The parent span ID of a span or generation, used to group a trace into a tree view",
"examples": ["bdf42359-9364-4db7-8958-c001f28c9255"],
},
"$ai_span_id": {
"label": "AI Span ID (LLM)",
"description": "The unique identifier for a LLM trace, generation, or span.",
"examples": ["bdf42359-9364-4db7-8958-c001f28c9255"],
},
},
"numerical_event_properties": {},
"person_properties": {},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ export function ConversationDisplay({ eventProperties }: { eventProperties: Even
</header>
<ConversationMessagesDisplay
input={eventProperties.$ai_input}
output={eventProperties.$ai_output_choices}
output={eventProperties.$ai_output_choices ?? eventProperties.$ai_output ?? eventProperties.$ai_error}
httpStatus={eventProperties.$ai_http_status}
raisedError={eventProperties.$ai_is_error}
bordered
/>
</>
Expand Down
Loading

0 comments on commit d7758ee

Please sign in to comment.