Skip to content

Commit

Permalink
Classic snap (#173)
Browse files Browse the repository at this point in the history
* classic snap working

* ruff conf changes

* better path label

* working but tests are still broken

* working except for tracing tests

* fixed tests

* scenario

* integration test

* linting

* add_relation is deprecated

* enough?

* forgot await

* formatting

* cast instead of ignore

* type hinting

* fmt

* static

* switch from black to ruff format

* formatting

* improve comment

* raises

* remove black settings

* only observe config_changed once

* add_relation is deprecated

* start and enable snap after refresh. Only refresh if revision changes

* fix rootfs_path

* explain the path fixing

* better commenting

* fmt

* not a method

* delete commented code

* pin major version of ops-scenario

* revert accidental change (also snap enable does not do what we want)
  • Loading branch information
dstathis authored Sep 12, 2024
1 parent 8caf3ec commit e4102a5
Show file tree
Hide file tree
Showing 17 changed files with 360 additions and 139 deletions.
6 changes: 6 additions & 0 deletions charmcraft-22.04.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ peers:

config:
options:
classic_snap:
description: |
Choose whether to use the classic snap over the strictly confined
one. Defaults to "true".
type: boolean
default: true
tls_insecure_skip_verify:
description: |
Flag to skip the verification for insecure TLS.
Expand Down
14 changes: 5 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,21 @@ branch = true
[tool.coverage.report]
show_missing = true

# Formatting tools configuration
[tool.black]
line-length = 99
target-version = ["py38"]

# Linting tools configuration
[tool.ruff]
line-length = 99
exclude = ["__pycache__", "*.egg_info"]

[tool.ruff.lint]
select = ["E", "W", "F", "C", "N", "R", "D", "I001"]
# Ignore E501 because using black creates errors with this
# Ignore D107 Missing docstring in __init__
ignore = ["C901", "E501", "D107", "RET504"]
ignore = ["C901", "D107", "E501", "RET504"]

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
# D100, D101, D102, D103: Ignore missing docstrings in tests
"tests/*" = ["D100","D101","D102","D103"]

[tool.ruff.pydocstyle]
[tool.ruff.lint.pydocstyle]
convention = "google"

# Static analysis tools configuration
Expand Down
214 changes: 156 additions & 58 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# See LICENSE file for licensing details.

"""A juju charm for Grafana Agent on Kubernetes."""

import logging
import os
import re
Expand All @@ -12,6 +13,7 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Union, get_args

import yaml
from charms.grafana_agent.v0.cos_agent import COSAgentRequirer, ReceiverProtocol
from charms.operator_libs_linux.v2 import snap # type: ignore
from charms.tempo_k8s.v1.charm_tracing import trace_charm
Expand Down Expand Up @@ -186,7 +188,8 @@ def __init__(self, *args):
self._on_cos_data_changed,
)
self.framework.observe(
self._cos.on.validation_error, self._on_cos_validation_error # pyright: ignore
self._cos.on.validation_error,
self._on_cos_validation_error, # pyright: ignore
)
self.framework.observe(self.on["juju_info"].relation_joined, self._on_juju_info_joined)
self.framework.observe(self.on.install, self.on_install)
Expand Down Expand Up @@ -226,6 +229,14 @@ def _on_cos_validation_error(self, event):

self._update_status()

def _verify_snap_track(self) -> None:
try:
# install_ga_snap calls snap.ensure so it should do the right thing whether the track
# changes or not.
install_ga_snap(classic=bool(self.config["classic_snap"]))
except (snap.SnapError, SnapSpecError) as e:
raise GrafanaAgentInstallError("Failed to refresh grafana-agent.") from e

def on_install(self, _event) -> None:
"""Install the Grafana Agent snap."""
self._install()
Expand All @@ -234,7 +245,7 @@ def _install(self) -> None:
"""Install/refresh the Grafana Agent snap."""
self.unit.status = MaintenanceStatus("Installing grafana-agent snap")
try:
install_ga_snap(classic=False)
install_ga_snap(classic=bool(self.config["classic_snap"]))
except (snap.SnapError, SnapSpecError) as e:
raise GrafanaAgentInstallError("Failed to install grafana-agent.") from e

Expand Down Expand Up @@ -402,7 +413,9 @@ def _additional_integrations(self) -> Dict[str, Any]:
)
return {
"node_exporter": {
"rootfs_path": "/var/lib/snapd/hostfs",
"rootfs_path": "/"
if bool(self.config["classic_snap"])
else "/var/lib/snapd/hostfs",
"enabled": True,
"enable_collectors": [
"logind",
Expand Down Expand Up @@ -509,73 +522,158 @@ def relabeling_config(self) -> list:
}
] + topology_relabels # type: ignore

def _evaluate_log_paths(self, paths: List[str], snap: str, app: str) -> List[str]:
"""Evaluate each log path using snap to resolve environment variables.
Raises:
Exception: If echo fails.
"""
# There is a potential for shell injection here. It seems okay because the potential
# attacking charm has root access on the machine already anyway.
new_paths = []
for path in paths:
cmd = f"echo 'echo {path}' | snap run --shell {snap}.{app}"
p = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if p.returncode != 0:
raise Exception(
f"Failed to evaluate path with command: {cmd}\nSTDOUT: {p.stdout}\nSTDERR: {p.stderr}"
)
new_paths.append(p.stdout.strip())
return new_paths

def _snap_plug_job(
self, owner: str, target_path: str, app: str, unit: str, label_path: str
) -> dict:
job_name = f"{owner}-{label_path.replace('/', '-')}"
job = {
"job_name": job_name,
"static_configs": [
{
"targets": ["localhost"],
"labels": {
"job": job_name,
"__path__": target_path,
**{ # from grafana-agent's topology
k: v
for k, v in self._instance_topology.items()
if k not in ["juju_unit", "juju_application"]
},
# from the topology of the charm owning the snap
"juju_application": app,
"juju_unit": unit,
"snap_name": owner,
},
}
],
"pipeline_stages": [
{
"drop": {
"expression": ".*file is a directory.*",
},
},
],
}

job["relabel_configs"] = [
{
"source_labels": ["__path__"],
"target_label": "path",
"replacement": label_path if label_path.startswith("/") else f"/{label_path}",
}
]
return job

def _path_label(self, path):
"""Best effort at figuring out what the path label should be.
Try to make the path reflect what it would normally be with a non snap version of the
software.
"""
match = re.match("^.*(var/log/.*$)", path)
if match:
return match.group(1)
match = re.match("^/var/snap/.*/common/(.*)$", path)
if match:
return match.group(1)
# We couldn't figure it out so just use the full path.
return path

@property
def _snap_plugs_logging_configs(self) -> List[Dict[str, Any]]:
"""One logging config for each separate snap connected over the "logs" endpoint."""
agent_fstab = SnapFstab(Path("/var/lib/snapd/mount/snap.grafana-agent.fstab"))

shared_logs_configs = []
endpoint_owners = {
endpoint.owner: {"juju_application": topology.application, "juju_unit": topology.unit}
for endpoint, topology in self._cos.snap_log_endpoints_with_topology
}
for fstab_entry in agent_fstab.entries:
if fstab_entry.owner not in endpoint_owners.keys():
continue

target_path = (
f"{fstab_entry.target}/**"
if fstab_entry
else "/snap/grafana-agent/current/shared-logs/**"
)
job_name = f"{fstab_entry.owner}-{fstab_entry.endpoint_source.replace('/', '-')}"
job = {
"job_name": job_name,
"static_configs": [
{
"targets": ["localhost"],
"labels": {
"job": job_name,
"__path__": target_path,
**{ # from grafana-agent's topology
k: v
for k, v in self._instance_topology.items()
if k not in ["juju_unit", "juju_application"]
},
# from the topology of the charm owning the snap
**endpoint_owners[fstab_entry.owner],
"snap_name": fstab_entry.owner,
},
}
],
"pipeline_stages": [
{
"drop": {
"expression": ".*file is a directory.*",
},
},
],
}
if self.config["classic_snap"]:
# Iterate through each logging endpoint.
for endpoint, topology in self._cos.snap_log_endpoints_with_topology:
try:
with open(f"/snap/{endpoint.owner}/current/meta/snap.yaml") as f:
snap_yaml = yaml.safe_load(f)
except FileNotFoundError:
logger.error(
f"snap file for {endpoint.owner} not found. It is likely not installed. Skipping."
)
continue
# Get the directories we need to monitor.
log_dirs = snap_yaml["slots"][endpoint.name]["source"]["read"]
for key in snap_yaml["apps"].keys():
snap_app_name = key # Just use any app.
break
# Evaluate any variables in the paths.
log_dirs = self._evaluate_log_paths(
paths=log_dirs, snap=endpoint.owner, app=snap_app_name
)
# Create a job for each path.
for path in log_dirs:
job = self._snap_plug_job(
endpoint.owner,
f"{path}/**",
topology.application,
str(topology.unit),
self._path_label(path),
)
shared_logs_configs.append(job)

job["relabel_configs"] = [
{
"source_labels": ["__path__"],
"target_label": "path",
"replacement": fstab_entry.relative_target,
else:
endpoint_owners = {
endpoint.owner: {
"juju_application": topology.application,
"juju_unit": topology.unit,
}
]

shared_logs_configs.append(job)
for endpoint, topology in self._cos.snap_log_endpoints_with_topology
}
for fstab_entry in agent_fstab.entries:
if fstab_entry.owner not in endpoint_owners.keys():
continue

target_path = (
f"{fstab_entry.target}/**"
if fstab_entry
else "/snap/grafana-agent/current/shared-logs/**"
)

job = self._snap_plug_job(
fstab_entry.owner,
target_path,
endpoint_owners[fstab_entry.owner]["juju_application"],
endpoint_owners[fstab_entry.owner]["juju_unit"],
fstab_entry.relative_target,
)
shared_logs_configs.append(job)

return shared_logs_configs

def _connect_logging_snap_endpoints(self):
for plug in self._cos.snap_log_endpoints:
try:
self.snap.connect("logs", service=plug.owner, slot=plug.name)
except snap.SnapError as e:
logger.error(f"error connecting plug {plug} to grafana-agent:logs")
logger.error(e.message)
# We need to run _verify_snap_track so we make sure we have refreshed BEFORE connecting.
self._verify_snap_track()
if not self.config["classic_snap"]:
for plug in self._cos.snap_log_endpoints:
try:
self.snap.connect("logs", service=plug.owner, slot=plug.name)
except snap.SnapError as e:
logger.error(f"error connecting plug {plug} to grafana-agent:logs")
logger.error(e.message)

def positions_dir(self) -> str:
"""Return the positions directory."""
Expand Down
21 changes: 12 additions & 9 deletions src/grafana_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# See LICENSE file for licensing details.

"""Common logic for both k8s and machine charms for Grafana Agent."""

import json
import logging
import os
Expand Down Expand Up @@ -270,6 +271,7 @@ def _on_loki_push_api_endpoint_departed(self, _event=None):

def _on_config_changed(self, _event=None):
"""Rebuild the config."""
self._verify_snap_track()
self._update_config()
self._update_status()

Expand All @@ -296,6 +298,9 @@ def _on_cert_transfer_removed(self, event: CertificateTransferRemovedEvent):
self.run(["update-ca-certificates", "--fresh"])

# Abstract Methods
def _verify_snap_track(self) -> None:
raise NotImplementedError("Please override the _verify_snap_track method")

@property
def is_k8s(self) -> bool:
"""Is this a k8s charm."""
Expand Down Expand Up @@ -608,9 +613,7 @@ def _delete_file_if_exists(self, file_path):
def _on_dashboard_status_changed(self, _event=None):
"""Re-initialize dashboards to forward."""
# TODO: add constructor arg for `inject_dropdowns=False` instead of 'private' method?
self._grafana_dashboards_provider._reinitialize_dashboard_data(
inject_dropdowns=False
) # noqa
self._grafana_dashboards_provider._reinitialize_dashboard_data(inject_dropdowns=False) # noqa
self._update_status()

def _enhance_endpoints_with_tls(self, endpoints) -> List[Dict[str, Any]]:
Expand Down Expand Up @@ -922,12 +925,12 @@ def _loki_config(self) -> Dict[str, Union[Any, List[Any]]]:
for config in configs:
for scrape_config in config.get("scrape_configs", []):
if scrape_config.get("loki_push_api"):
scrape_config["loki_push_api"]["server"][
"http_tls_config"
] = self.tls_config
scrape_config["loki_push_api"]["server"][
"grpc_tls_config"
] = self.tls_config
scrape_config["loki_push_api"]["server"]["http_tls_config"] = (
self.tls_config
)
scrape_config["loki_push_api"]["server"]["grpc_tls_config"] = (
self.tls_config
)

configs.extend(self._additional_log_configs) # type: ignore
return (
Expand Down
Loading

0 comments on commit e4102a5

Please sign in to comment.