diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0fb8869..00e7dc2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,11 +10,11 @@ on: jobs: build: - - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: python-version: ["3.8", "3.12"] + os: ["macos-latest", "ubuntu-latest", "windows-latest"] steps: - uses: actions/checkout@v3 @@ -22,17 +22,19 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH + - uses: conda-incubator/setup-miniconda@v3 + name: Setup Miniconda + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} - name: Install dependencies (conda) run: | - conda env update --file environment.yml --name base - conda install --file requirements.dev.txt - - name: Install conda-basic-auth + conda env update --file environment.yaml --name test + conda install --name test --file requirements.dev.txt + - name: Conda info run: | - pip install -e . + conda info + conda list --name test - name: Test with pytest run: | - pytest --doctest-modules + conda run --name test pytest --doctest-modules diff --git a/anaconda_conda_telemetry/hooks.py b/anaconda_conda_telemetry/hooks.py index f78fe96..737be26 100644 --- a/anaconda_conda_telemetry/hooks.py +++ b/anaconda_conda_telemetry/hooks.py @@ -1,39 +1,73 @@ -from conda import __version__ as CONDA_VERSION +from __future__ import annotations + +import functools +import logging +import time +import typing + from conda.base.context import context +from conda.cli.main_list import list_packages from conda.common.url import mask_anaconda_token from conda.models.channel import all_channel_urls -from conda.plugins import hookimpl, CondaPostCommand -from opentelemetry import trace -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import ( - BatchSpanProcessor, - # ConsoleSpanExporter, -) -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from conda.plugins import hookimpl, CondaRequestHeader try: from conda_build import __version__ as CONDA_BUILD_VERSION except ImportError: CONDA_BUILD_VERSION = "n/a" -# Tracer -provider = TracerProvider() -exporter = OTLPSpanExporter( - endpoint="http://localhost:4317/", - insecure=True, -) -processor = BatchSpanProcessor(exporter) -provider.add_span_processor(processor) +if typing.TYPE_CHECKING: + from collections.abc import Iterator + +logger = logging.getLogger(__name__) + +#: Field separator for request header +FIELD_SEPARATOR = ";" + +#: Size limit in bytes for the payload in the request header +SIZE_LIMIT = 7_000 + +#: Prefix for all custom headers submitted via this plugin +HEADER_PREFIX = "Anaconda-Telemetry" + +#: Name of the virtual package header +HEADER_VIRTUAL_PACKAGES = f"{HEADER_PREFIX}-Virtual-Packages" + +#: Name of the channels header +HEADER_CHANNELS = f"{HEADER_PREFIX}-Channels" + +#: Name of the packages header +HEADER_PACKAGES = f"{HEADER_PREFIX}-Packages" -# Enable when debugging output is desired -# processor_console = BatchSpanProcessor(ConsoleSpanExporter()) -# provider.add_span_processor(processor_console) +#: Name of the search header +HEADER_SEARCH = f"{HEADER_PREFIX}-Search" -# Sets the global default tracer provider -trace.set_tracer_provider(provider) +#: Name of the install header +HEADER_INSTALL = f"{HEADER_PREFIX}-Install" -# Creates a tracer from the global tracer provider -tracer = trace.get_tracer("anaconda-conda-telemetry") +#: Name of the sys info header +HEADER_SYS_INFO = f"{HEADER_PREFIX}-Sys-Info" + +#: Hosts we want to submit request headers to +REQUEST_HEADER_HOSTS = {"repo.anaconda.com", "conda.anaconda.org"} + + +def timer(func): + @functools.wraps(func) + def wrapper_timer(*args, **kwargs): + if logger.getEffectiveLevel() <= logging.INFO: + tic = time.perf_counter() + value = func(*args, **kwargs) + toc = time.perf_counter() + elapsed_time = toc - tic + logger.info( + f"function: {func.__name__}; duration (seconds): {elapsed_time:0.4f}" + ) + return value + + return func(*args, **kwargs) + + return wrapper_timer def get_virtual_packages() -> tuple[str, ...]: @@ -54,35 +88,183 @@ def get_channel_urls() -> tuple[str, ...]: return tuple(mask_anaconda_token(c) for c in channels) -def submit_telemetry_data(command: str): +def get_conda_command() -> str: """ - Submits telemetry data to the configured data collector + Use ``sys.argv`` to determine the conda command that is current being run """ - with tracer.start_as_current_span("post_command_hook") as current_span: - current_span.set_attribute( - "python_implementation", context.python_implementation_name_version[0] - ) - current_span.set_attribute( - "python_version", context.python_implementation_name_version[1] - ) - current_span.set_attribute("conda_version", CONDA_VERSION) - current_span.set_attribute("solver_version", context.solver_user_agent()) - current_span.set_attribute("conda_build_version", CONDA_BUILD_VERSION) - current_span.set_attribute("virtual_packages", get_virtual_packages()) - current_span.set_attribute( - "platform_system", context.platform_system_release[0] - ) - current_span.set_attribute( - "platform_release", context.platform_system_release[1] - ) - current_span.set_attribute("channel_urls", get_channel_urls()) - current_span.set_attribute("conda_command", command) + return context._argparse_args.cmd -@hookimpl -def conda_post_commands(): - yield CondaPostCommand( - name="post-command-submit-telemetry-data", - action=submit_telemetry_data, - run_for=["install", "remove", "update", "create"], +def get_package_list() -> tuple[str, ...]: + """ + Retrieve the list of packages in the current environment + """ + _, packages = list_packages(context.active_prefix, format="canonical") + + return packages + + +def get_search_term() -> str: + """ + Retrieve the search term being used when search command is run + """ + return context._argparse_args.match_spec + + +def get_install_arguments() -> tuple[str, ...]: + """ + Get the position argument which have specified via the ``install`` or ``create`` commands + """ + return context._argparse_args.packages + + +@timer +@functools.lru_cache(None) +def get_sys_info_header_value() -> str: + """ + Return ``;`` delimited string of extra system information + """ + telemetry_data = { + "conda_build_version": CONDA_BUILD_VERSION, + "conda_command": get_conda_command(), + } + + return FIELD_SEPARATOR.join( + f"{key}:{value}" for key, value in telemetry_data.items() ) + + +@timer +@functools.lru_cache(None) +def get_channel_urls_header_value() -> str: + """ + Return ``FIELD_SEPARATOR`` delimited string of channel URLs + """ + return FIELD_SEPARATOR.join(get_channel_urls()) + + +@timer +@functools.lru_cache(None) +def get_virtual_packages_header_value() -> str: + """ + Return ``FIELD_SEPARATOR`` delimited string of virtual packages + """ + return FIELD_SEPARATOR.join(get_virtual_packages()) + + +@timer +@functools.lru_cache(None) +def get_install_arguments_header_value() -> str: + """ + Return ``FIELD_SEPARATOR`` delimited string of channel URLs + """ + return FIELD_SEPARATOR.join(get_install_arguments()) + + +@timer +@functools.lru_cache(None) +def get_installed_packages_header_value() -> str: + """ + Return ``FIELD_SEPARATOR`` delimited string of install arguments + """ + return FIELD_SEPARATOR.join(get_package_list()) + + +class HeaderWrapper(typing.NamedTuple): + """ + Object that wraps ``CondaRequestHeader`` and adds a ``size_limit`` field + """ + + header: CondaRequestHeader + size_limit: int + + +def validate_headers( + custom_headers: list[HeaderWrapper], +) -> Iterator[CondaRequestHeader]: + """ + Makes sure that all headers combined are not larger than ``SIZE_LIMIT``. + + Any headers over their individual limits will be truncated. + """ + total_max_size = sum(header.size_limit for header in custom_headers) + assert ( + total_max_size <= SIZE_LIMIT + ), f"Total header size limited to {SIZE_LIMIT}. Exceeded with {total_max_size=}" + + for wrapper in custom_headers: + wrapper.header.value = wrapper.header.value[: wrapper.size_limit] + yield wrapper.header + + +@hookimpl +def conda_request_headers(): + custom_headers = [ + HeaderWrapper( + header=CondaRequestHeader( + name=HEADER_SYS_INFO, + description="Custom headers used to submit telemetry data", + value=get_sys_info_header_value(), + hosts=REQUEST_HEADER_HOSTS, + ), + size_limit=500, + ), + HeaderWrapper( + header=CondaRequestHeader( + name=HEADER_CHANNELS, + description="Header which exposes the channel URLs currently in use", + value=get_channel_urls_header_value(), + hosts=REQUEST_HEADER_HOSTS, + ), + size_limit=500, + ), + HeaderWrapper( + header=CondaRequestHeader( + name=HEADER_VIRTUAL_PACKAGES, + description="Header which exposes the virtual packages currently in use", + value=get_virtual_packages_header_value(), + hosts=REQUEST_HEADER_HOSTS, + ), + size_limit=500, + ), + HeaderWrapper( + header=CondaRequestHeader( + name=HEADER_PACKAGES, + description="Header which exposes the currently installed packages", + value=get_installed_packages_header_value(), + hosts=REQUEST_HEADER_HOSTS, + ), + size_limit=5_000, + ), + ] + + command = get_conda_command() + + if command == "search": + custom_headers.append( + HeaderWrapper( + header=CondaRequestHeader( + name=HEADER_SEARCH, + description="Header which exposes what is being searched for", + value=get_search_term(), + hosts=REQUEST_HEADER_HOSTS, + ), + size_limit=500, + ) + ) + + if command in {"install", "create"}: + custom_headers.append( + HeaderWrapper( + header=CondaRequestHeader( + name=HEADER_INSTALL, + description="Header which exposes what is currently being installed as " + "specified on the command line", + value=get_install_arguments_header_value(), + hosts=REQUEST_HEADER_HOSTS, + ), + size_limit=500, + ) + ) + + yield from validate_headers(custom_headers) diff --git a/environment.yaml b/environment.yaml index 16ac6ca..556da6a 100644 --- a/environment.yaml +++ b/environment.yaml @@ -3,6 +3,3 @@ channels: dependencies: - python>=3.8 - conda-canary/label/dev::conda - - opentelemetry-api - - opentelemetry-sdk - - opentelemetry-exporter-otlp-proto-grpc diff --git a/tests/test_hooks.py b/tests/test_hooks.py new file mode 100644 index 0000000..715f12c --- /dev/null +++ b/tests/test_hooks.py @@ -0,0 +1,122 @@ +import logging + +import pytest + +from anaconda_conda_telemetry.hooks import ( + conda_request_headers, + HEADER_INSTALL, + HEADER_CHANNELS, + HEADER_SYS_INFO, + HEADER_VIRTUAL_PACKAGES, + HEADER_PACKAGES, + HEADER_SEARCH, + timer, +) + + +@pytest.fixture(autouse=True) +def packages(mocker): + """ + Mocks ``anaconda_conda_telemetry.hooks.list_packages`` + """ + packages = [ + "defaults/osx-arm64::sqlite-3.45.3-h80987f9_0", + "defaults/osx-arm64::pcre2-10.42-hb066dcc_1", + "defaults/osx-arm64::libxml2-2.13.1-h0b34f26_2", + ] + + def mock_list_packages(*args, **kwargs): + return 0, packages + + mocker.patch("anaconda_conda_telemetry.hooks.list_packages", mock_list_packages) + + return packages + + +def test_conda_request_header_default_headers(mocker): + """ + Ensure default headers are returned + """ + mock_argparse_args = mocker.MagicMock(match_spec="package", cmd="search") + mocker.patch( + "anaconda_conda_telemetry.hooks.context._argparse_args", mock_argparse_args + ) + headers = {header.name: header for header in tuple(conda_request_headers())} + + expected_header_names_values = { + HEADER_SYS_INFO: "", + HEADER_CHANNELS: "", + HEADER_PACKAGES: "", + HEADER_VIRTUAL_PACKAGES: "", + } + expected_header_names = {key for key, _ in expected_header_names_values.items()} + + assert len(set(headers.keys()).intersection(expected_header_names)) == len( + expected_header_names + ) + + +def test_conda_request_header_with_search(monkeypatch, mocker): + """ + Ensure default headers are returned when conda search is invoked + """ + monkeypatch.setattr("sys.argv", ["conda", "search", "package"]) + mock_argparse_args = mocker.MagicMock(match_spec="package", cmd="search") + mocker.patch( + "anaconda_conda_telemetry.hooks.context._argparse_args", mock_argparse_args + ) + + header_names = {header.name for header in tuple(conda_request_headers())} + expected_header_names = { + HEADER_SYS_INFO, + HEADER_CHANNELS, + HEADER_PACKAGES, + HEADER_VIRTUAL_PACKAGES, + HEADER_SEARCH, + } + + assert len(header_names.intersection(expected_header_names)) == len( + expected_header_names + ) + + +def test_conda_request_header_with_install(monkeypatch, mocker): + """ + Ensure default headers are returned when conda search is invoked + """ + monkeypatch.setattr("sys.argv", ["conda", "install", "package"]) + mock_argparse_args = mocker.MagicMock(packages=["package"], cmd="install") + mocker.patch( + "anaconda_conda_telemetry.hooks.context._argparse_args", mock_argparse_args + ) + + header_names = {header.name for header in tuple(conda_request_headers())} + expected_header_names = { + HEADER_SYS_INFO, + HEADER_CHANNELS, + HEADER_PACKAGES, + HEADER_VIRTUAL_PACKAGES, + HEADER_INSTALL, + } + + assert len(header_names.intersection(expected_header_names)) == len( + expected_header_names + ) + + +def test_timer_in_info_mode(caplog): + """ + Ensure the timer decorator works and logs the time taken in INFO mode + """ + caplog.set_level(logging.INFO) + + @timer + def test(): + return 1 + + assert test() == 1 + + assert caplog.records[0].levelname == "INFO" + + assert "INFO anaconda_conda_telemetry.hooks" in caplog.text + assert "function: test; duration (seconds):" in caplog.text diff --git a/tests/test_placeholder.py b/tests/test_placeholder.py deleted file mode 100644 index e02870c..0000000 --- a/tests/test_placeholder.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_placeholder(): - assert 1 + 1 == 2