Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for submitting telemetry data via HTTP request headers #30

Merged
merged 14 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,31 @@ on:

jobs:
build:

runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.8", "3.12"]
os: ["macos-latest", "ubuntu-latest", "windows-latest"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Add conda to system path
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
- uses: conda-incubator/setup-miniconda@v3
name: Setup Miniconda
with:
auto-update-conda: true
python-version: ${{ matrix.python-version }}
- name: Install dependencies (conda)
run: |
conda env update --file environment.yml --name base
conda install --file requirements.dev.txt
- name: Install conda-basic-auth
conda env update --file environment.yaml --name test
conda install --name test --file requirements.dev.txt
- name: Conda info
travishathaway marked this conversation as resolved.
Show resolved Hide resolved
run: |
pip install -e .
conda info
conda list --name test
- name: Test with pytest
run: |
pytest --doctest-modules
conda run --name test pytest --doctest-modules
263 changes: 212 additions & 51 deletions anaconda_conda_telemetry/hooks.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,53 @@
from conda import __version__ as CONDA_VERSION
from __future__ import annotations

import functools
import logging
import sys
import time
import typing

from conda.base.context import context
from conda.cli.main_list import list_packages
from conda.common.url import mask_anaconda_token
from conda.models.channel import all_channel_urls
from conda.plugins import hookimpl, CondaPostCommand
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
# ConsoleSpanExporter,
)
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from conda.plugins import hookimpl, CondaRequestHeader

try:
from conda_build import __version__ as CONDA_BUILD_VERSION
except ImportError:
CONDA_BUILD_VERSION = "n/a"

# Tracer
provider = TracerProvider()
exporter = OTLPSpanExporter(
endpoint="http://localhost:4317/",
insecure=True,
)
processor = BatchSpanProcessor(exporter)
provider.add_span_processor(processor)
if typing.TYPE_CHECKING:
from collections.abc import Iterator

logger = logging.getLogger(__name__)

# Enable when debugging output is desired
# processor_console = BatchSpanProcessor(ConsoleSpanExporter())
# provider.add_span_processor(processor_console)
#: Field separator for request header
FIELD_SEPARATOR = ";"

# Sets the global default tracer provider
trace.set_tracer_provider(provider)
#: Size limit in bytes for the payload in the request header
SIZE_LIMIT = 7_000

# Creates a tracer from the global tracer provider
tracer = trace.get_tracer("anaconda-conda-telemetry")
#: Prefix for all custom headers submitted via this plugin
HEADER_PREFIX = "Anaconda-Telemetry"

#: Hosts we want to submit request headers to
REQUEST_HEADER_HOSTS = {"repo.anaconda.com", "conda.anaconda.org"}


def timer(func):
@functools.wraps(func)
travishathaway marked this conversation as resolved.
Show resolved Hide resolved
def wrapper_timer(*args, **kwargs):
tic = time.perf_counter()
value = func(*args, **kwargs)
toc = time.perf_counter()
elapsed_time = toc - tic
logger.info(
f"function: {func.__name__}; duration (seconds): {elapsed_time:0.4f}"
)
return value

return wrapper_timer


def get_virtual_packages() -> tuple[str, ...]:
Expand All @@ -54,35 +68,182 @@ def get_channel_urls() -> tuple[str, ...]:
return tuple(mask_anaconda_token(c) for c in channels)


def submit_telemetry_data(command: str):
def get_conda_command() -> str | None:
"""
Submits telemetry data to the configured data collector
Use ``sys.argv`` to determine the conda command that is current being run
"""
with tracer.start_as_current_span("post_command_hook") as current_span:
current_span.set_attribute(
"python_implementation", context.python_implementation_name_version[0]
)
current_span.set_attribute(
"python_version", context.python_implementation_name_version[1]
)
current_span.set_attribute("conda_version", CONDA_VERSION)
current_span.set_attribute("solver_version", context.solver_user_agent())
current_span.set_attribute("conda_build_version", CONDA_BUILD_VERSION)
current_span.set_attribute("virtual_packages", get_virtual_packages())
current_span.set_attribute(
"platform_system", context.platform_system_release[0]
)
current_span.set_attribute(
"platform_release", context.platform_system_release[1]
)
current_span.set_attribute("channel_urls", get_channel_urls())
current_span.set_attribute("conda_command", command)
if len(sys.argv) > 2:
return sys.argv[1]

travishathaway marked this conversation as resolved.
Show resolved Hide resolved

@hookimpl
def conda_post_commands():
yield CondaPostCommand(
name="post-command-submit-telemetry-data",
action=submit_telemetry_data,
run_for=["install", "remove", "update", "create"],
def get_package_list() -> tuple[str, ...]:
"""
Retrieve the list of packages in the current environment
"""
_, packages = list_packages(context.active_prefix, format="canonical")

return packages


def get_search_term() -> str:
"""
Retrieve the search term being used when search command is run
"""
return context._argparse_args.match_spec


def get_install_arguments() -> tuple[str, ...]:
"""
Get the position argument which have specified via the ``install`` or ``create`` commands
"""
return context._argparse_args.packages


@timer
@functools.lru_cache(None)
def get_sys_info_header_value() -> str:
"""
Return ``;`` delimited string of extra system information
"""
telemetry_data = {
"conda_build_version": CONDA_BUILD_VERSION,
"conda_command": get_conda_command(),
}

return FIELD_SEPARATOR.join(
f"{key}:{value}" for key, value in telemetry_data.items()
)


@timer
@functools.lru_cache(None)
def get_channel_urls_header_value() -> str:
"""
Return ``FIELD_SEPARATOR`` delimited string of channel URLs
"""
return FIELD_SEPARATOR.join(get_channel_urls())


@timer
@functools.lru_cache(None)
def get_virtual_packages_header_value() -> str:
"""
Return ``FIELD_SEPARATOR`` delimited string of virtual packages
"""
return FIELD_SEPARATOR.join(get_virtual_packages())


@timer
@functools.lru_cache(None)
def get_install_arguments_header_value() -> str:
"""
Return ``FIELD_SEPARATOR`` delimited string of channel URLs
"""
return FIELD_SEPARATOR.join(get_install_arguments())


@timer
@functools.lru_cache(None)
def get_installed_packages_header_value() -> str:
travishathaway marked this conversation as resolved.
Show resolved Hide resolved
"""
Return ``FIELD_SEPARATOR`` delimited string of install arguments
"""
return FIELD_SEPARATOR.join(get_package_list())


class HeaderWrapper(typing.NamedTuple):
"""
Object that wraps ``CondaRequestHeader`` and adds a ``size_limit`` field
"""

header: CondaRequestHeader
size_limit: int


def validate_headers(
custom_headers: list[HeaderWrapper],
) -> Iterator[CondaRequestHeader]:
"""
Makes sure that all headers combined are not larger than ``SIZE_LIMIT``.

Any headers over their individual limits will be truncated.
"""
total_max_size = sum(header.size_limit for header in custom_headers)
assert total_max_size <= SIZE_LIMIT

travishathaway marked this conversation as resolved.
Show resolved Hide resolved
travishathaway marked this conversation as resolved.
Show resolved Hide resolved
for wrapper in custom_headers:
wrapper.header.value = wrapper.header.value[: wrapper.size_limit]
yield wrapper.header


@hookimpl
def conda_request_headers():
custom_headers = [
HeaderWrapper(
header=CondaRequestHeader(
name=f"{HEADER_PREFIX}-Sys-Info",
description="Custom headers used to submit telemetry data",
value=get_sys_info_header_value(),
hosts=REQUEST_HEADER_HOSTS,
),
size_limit=500,
),
HeaderWrapper(
header=CondaRequestHeader(
name=f"{HEADER_PREFIX}-Channels",
description="Header which exposes the channel URLs currently in use",
value=get_channel_urls_header_value(),
hosts=REQUEST_HEADER_HOSTS,
),
size_limit=500,
),
HeaderWrapper(
header=CondaRequestHeader(
name=f"{HEADER_PREFIX}-Virtual-Pkgs",
description="Header which exposes the virtual packages currently in use",
travishathaway marked this conversation as resolved.
Show resolved Hide resolved
value=get_virtual_packages_header_value(),
hosts=REQUEST_HEADER_HOSTS,
),
size_limit=500,
),
HeaderWrapper(
header=CondaRequestHeader(
name=f"{HEADER_PREFIX}-Packages",
description="Header which exposes the currently installed packages",
value=get_installed_packages_header_value(),
hosts=REQUEST_HEADER_HOSTS,
),
size_limit=5_000,
),
]

command = get_conda_command()

if command == "search":
custom_headers.append(
HeaderWrapper(
header=CondaRequestHeader(
name=f"{HEADER_PREFIX}-Search",
description="Header which exposes what is being searched for",
value=get_search_term(),
hosts=REQUEST_HEADER_HOSTS,
),
size_limit=500,
)
)

if command in {"install", "create"}:
custom_headers.append(
HeaderWrapper(
header=CondaRequestHeader(
name=f"{HEADER_PREFIX}-Install",
description="Header which exposes what is currently being installed as "
"specified on the command line",
value=get_install_arguments_header_value(),
hosts=REQUEST_HEADER_HOSTS,
),
size_limit=500,
)
)

yield from validate_headers(custom_headers)
3 changes: 0 additions & 3 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,3 @@ channels:
dependencies:
- python>=3.8
- conda-canary/label/dev::conda
- opentelemetry-api
- opentelemetry-sdk
- opentelemetry-exporter-otlp-proto-grpc
Loading
Loading