Skip to content

Commit

Permalink
Add Prometheus metrics (#787)
Browse files Browse the repository at this point in the history
* Upgrade telemetry_metrics to 1.0 (stable API, no changes)

* Basic setup for Telemetry.Metrics and its Prometheus reporter

* Initial metrics for Arena game

* Add grafana, prometheus docker images to compose

* Metric for connected players

* Modify metrics and dashboards

* Formatting

* Fix rebase error with multiple terminate clauses

* Formatting
  • Loading branch information
AminArria authored Aug 2, 2024
1 parent 183a03a commit 41d4cdd
Show file tree
Hide file tree
Showing 11 changed files with 842 additions and 44 deletions.
5 changes: 4 additions & 1 deletion apps/arena/lib/arena/game_socket_handler.ex
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ defmodule Arena.GameSocketHandler do
event: {:joined, %GameJoined{player_id: player_id, config: to_broadcast_config(config), bounties: bounties}}
})

:telemetry.execute([:arena, :clients], %{count: 1})
{:reply, {:binary, encoded_msg}, state}
end

Expand Down Expand Up @@ -168,6 +169,8 @@ defmodule Arena.GameSocketHandler do

@impl true
def terminate(_reason, _req, %{game_finished: false, player_alive: true} = state) do
:telemetry.execute([:arena, :clients], %{count: -1})

if Application.get_env(:arena, :spawn_bots) do
spawn(fn ->
Finch.build(:get, Utils.get_bot_connection_url(state.game_id, state.client_id))
Expand All @@ -178,8 +181,8 @@ defmodule Arena.GameSocketHandler do
:ok
end

@impl true
def terminate(_reason, _req, _state) do
:telemetry.execute([:arena, :clients], %{count: -1})
:ok
end

Expand Down
11 changes: 11 additions & 0 deletions apps/arena/lib/arena/game_updater.ex
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ defmodule Arena.GameUpdater do

:ok = GameTracker.start_tracking(match_id, game_state.client_to_player_map, game_state.players, clients_ids)

:telemetry.execute([:arena, :game], %{count: 1})

{:ok,
%{
match_id: match_id,
Expand All @@ -93,6 +95,12 @@ defmodule Arena.GameUpdater do
}}
end

def terminate(_, _state) do
:telemetry.execute([:arena, :game], %{count: -1})
:telemetry.execute([:arena, :game, :tick], %{duration: 0, duration_measure: 0})
:ok
end

##########################
# API Callbacks
##########################
Expand Down Expand Up @@ -227,6 +235,7 @@ defmodule Arena.GameUpdater do
end

def handle_info(:update_game, %{game_state: game_state} = state) do
tick_duration_start_at = System.monotonic_time()
Process.send_after(self(), :update_game, state.game_config.game.tick_rate_ms)
now = DateTime.utc_now() |> DateTime.to_unix(:millisecond)
delta_time = now - game_state.server_timestamp
Expand Down Expand Up @@ -271,6 +280,8 @@ defmodule Arena.GameUpdater do
broadcast_game_update(game_state)
game_state = %{game_state | killfeed: [], damage_taken: %{}, damage_done: %{}}

tick_duration = System.monotonic_time() - tick_duration_start_at
:telemetry.execute([:arena, :game, :tick], %{duration: tick_duration, duration_measure: tick_duration})
{:noreply, %{state | game_state: game_state}}
end

Expand Down
59 changes: 23 additions & 36 deletions apps/arena/lib/arena_web/telemetry.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,38 @@ defmodule ArenaWeb.Telemetry do
children = [
# Telemetry poller will execute the given period measurements
# every 10_000ms. Learn more here: https://hexdocs.pm/telemetry_metrics
{:telemetry_poller, measurements: periodic_measurements(), period: 10_000}
{:telemetry_poller, measurements: periodic_measurements(), period: 10_000},
# Add reporters as children of your supervision tree.
# {Telemetry.Metrics.ConsoleReporter, metrics: metrics()}
# {Telemetry.Metrics.ConsoleReporter, metrics: metrics()},
## TODO: default port 9568, we probably need to make this dynamic to accomodate multiple apps
{TelemetryMetricsPrometheus, [metrics: metrics()]}
]

Supervisor.init(children, strategy: :one_for_one)
end

def metrics do
[
# Phoenix Metrics
summary("phoenix.endpoint.start.system_time",
unit: {:native, :millisecond}
),
summary("phoenix.endpoint.stop.duration",
unit: {:native, :millisecond}
),
summary("phoenix.router_dispatch.start.system_time",
tags: [:route],
unit: {:native, :millisecond}
),
summary("phoenix.router_dispatch.exception.duration",
tags: [:route],
unit: {:native, :millisecond}
),
summary("phoenix.router_dispatch.stop.duration",
tags: [:route],
unit: {:native, :millisecond}
),
summary("phoenix.socket_connected.duration",
unit: {:native, :millisecond}
),
summary("phoenix.channel_joined.duration",
unit: {:native, :millisecond}
),
summary("phoenix.channel_handled_in.duration",
tags: [:event],
unit: {:native, :millisecond}
),

# VM Metrics
summary("vm.memory.total", unit: {:byte, :kilobyte}),
summary("vm.total_run_queue_lengths.total"),
summary("vm.total_run_queue_lengths.cpu"),
summary("vm.total_run_queue_lengths.io")
last_value("vm.memory.total", unit: {:byte, :kilobyte}),
last_value("vm.total_run_queue_lengths.total"),
last_value("vm.total_run_queue_lengths.cpu"),
last_value("vm.total_run_queue_lengths.io"),

## Arena (game) metrics
sum("arena.game.count", description: "Number of games in progress"),
## TODO: this metric is an attempt to gather data to properly set the buckets for the distribution metric below
last_value("arena.game.tick.duration_measure",
description: "Last game tick duration",
unit: {:native, :nanosecond}
),
## TODO: Buckets probably need to be redefined, currently they all fall under the first bucket
distribution("arena.game.tick.duration",
description: "Time spent on running a game tick",
unit: {:native, :nanosecond},
reporter_options: [buckets: [7_500_000.0, 15_000_000.0, 22_500_000.0]]
),
sum("arena.clients.count", description: "Number of clients (websockets) connected")
]
end

Expand Down
3 changes: 2 additions & 1 deletion apps/arena/mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ defmodule Arena.MixProject do
{:phoenix_live_dashboard, "~> 0.8.2"},
{:swoosh, "~> 1.3"},
{:finch, "~> 0.13"},
{:telemetry_metrics, "~> 0.6"},
{:telemetry_metrics, "~> 1.0"},
{:telemetry_poller, "~> 1.0"},
{:telemetry_metrics_prometheus, "~> 1.1"},
{:gettext, "~> 0.20"},
{:jason, "~> 1.2"},
{:dns_cluster, "~> 0.1.1"},
Expand Down
2 changes: 1 addition & 1 deletion apps/configurator/mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ defmodule Configurator.MixProject do
{:heroicons,
github: "tailwindlabs/heroicons", tag: "v2.1.1", sparse: "optimized", app: false, compile: false, depth: 1},
{:finch, "~> 0.13"},
{:telemetry_metrics, "~> 0.6"},
{:telemetry_metrics, "~> 1.0"},
{:telemetry_poller, "~> 1.0"},
{:jason, "~> 1.2"},
{:bandit, "~> 1.2"},
Expand Down
2 changes: 1 addition & 1 deletion apps/game_client/mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ defmodule GameClient.MixProject do
{:tailwind, "~> 0.2.0", runtime: Mix.env() == :dev},
{:swoosh, "~> 1.3"},
{:finch, "~> 0.13"},
{:telemetry_metrics, "~> 0.6"},
{:telemetry_metrics, "~> 1.0"},
{:telemetry_poller, "~> 1.0"},
{:gettext, "~> 0.20"},
{:jason, "~> 1.2"},
Expand Down
2 changes: 1 addition & 1 deletion apps/gateway/mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ defmodule Gateway.MixProject do
{:phoenix_live_dashboard, "~> 0.8.2"},
{:swoosh, "~> 1.3"},
{:finch, "~> 0.13"},
{:telemetry_metrics, "~> 0.6"},
{:telemetry_metrics, "~> 1.0"},
{:telemetry_poller, "~> 1.0"},
{:gettext, "~> 0.20"},
{:jason, "~> 1.2"},
Expand Down
20 changes: 18 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
version: '3.2'

services:
postgres:
container_name: game_backend_db
Expand All @@ -13,6 +11,24 @@ services:
- PGDATA=/var/lib/postgresql/data/pgdata
volumes:
- game_backend_data:/var/lib/postgresql/data/
prometheus:
container_name: prometheus
image: prom/prometheus:latest
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
ports:
- 9090:9090
grafana:
image: grafana/grafana-oss
container_name: grafana
ports:
- 9100:3000
volumes:
- grafana_data:/var/lib/grafana

volumes:
game_backend_data:
grafana_data:
Loading

0 comments on commit 41d4cdd

Please sign in to comment.