diff --git a/README.rst b/README.rst index 988e27d..cc3f489 100644 --- a/README.rst +++ b/README.rst @@ -31,21 +31,21 @@ Metrics Example .. code-block:: - # HELP tableau_service_status description + # HELP tableau_service_status Tableau services statuses # TYPE tableau_service_status gauge - tableau_service_status{instance_id="0",node_id="node2",service_name="filestore",status="Active"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="authnservice",status="Active"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="databasemaintenance",status="Stopped"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="dataserver",status="Active"} 1.0 - tableau_service_status{instance_id="1",node_id="node2",service_name="dataserver",status="Active"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="cacheserver",status="Active"} 1.0 - tableau_service_status{instance_id="1",node_id="node2",service_name="cacheserver",status="Active"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="interactive",status="Active"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="searchserver",status="Active"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="statsservice",status="Active"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="vizportal",status="Active"} 1.0 - tableau_service_status{instance_id="1",node_id="node2",service_name="vizportal",status="Active"} 1.0 - tableau_service_status{instance_id="0",node_id="node2",service_name="backuprestore",status="Stopped"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="filestore",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="authnservice",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="databasemaintenance",state="Disabled",status="Stopped"} 0.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="dataserver",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="1",node_id="node2",service_name="dataserver",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="cacheserver",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="1",node_id="node2",service_name="cacheserver",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="interactive",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="searchserver",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="statsservice",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="vizportal",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="1",node_id="node2",service_name="vizportal",state="Enabled",status="Active"} 1.0 + tableau_service_status{instance_id="0",node_id="node2",service_name="backuprestore",state="Disabled",status="Stopped"} 0.0 -------------------- @@ -89,6 +89,25 @@ Configuration A sample configuration can be found at `samples/config.yaml.template `_ +--------- +Dashboard +--------- + +A sample Grafana Dashboard can be found at +`samples/grafana.json `_ + +.. image:: samples/dashboard.jpeg + :width: 80% + :align: center + :alt: Grafana Dashboard + +-------- +Alerting +-------- + +A sample of prometheus alert can be found at +`samples/prometheus.yaml `_ + ------------ Contributing ------------ diff --git a/samples/dashboard.jpeg b/samples/dashboard.jpeg new file mode 100644 index 0000000..4e305d9 Binary files /dev/null and b/samples/dashboard.jpeg differ diff --git a/samples/grafana.json b/samples/grafana.json new file mode 100644 index 0000000..b2ca981 --- /dev/null +++ b/samples/grafana.json @@ -0,0 +1,535 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1566, + "iteration": 1692780417224, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 0, + "text": "DOWN" + }, + "1": { + "color": "green", + "index": 2, + "text": "UP" + } + }, + "type": "value" + }, + { + "options": { + "from": 0.1, + "result": { + "color": "semi-dark-yellow", + "index": 1, + "text": "IMPAIRED" + }, + "to": 0.9 + }, + "type": "range" + } + ], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "#EAB839", + "value": 0.1 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 0, + "y": 0 + }, + "id": 36, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.5.15", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "editorMode": "code", + "expr": "avg (tableau_service_status{environment=~\"$environment\", node_id=~\"$nodeId\", instance_id=~\"$instanceId\", service_name=~\"$serviceName\",state=~\"$state\"}) ", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tableau Status", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 70, + "lineWidth": 0, + "spanNulls": true + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "#EAB839", + "value": 0.1 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 19, + "x": 5, + "y": 0 + }, + "id": 35, + "options": { + "alignValue": "left", + "legend": { + "displayMode": "hidden", + "placement": "bottom" + }, + "mergeValues": true, + "rowHeight": 0.9, + "showValue": "never", + "tooltip": { + "mode": "none", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "editorMode": "code", + "expr": "avg by (node_id) (tableau_service_status{environment=~\"$environment\", node_id=~\"$nodeId\", instance_id=~\"$instanceId\", service_name=~\"$serviceName\",state=~\"$state\"}) ", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tableau Status by Node", + "transparent": true, + "type": "state-timeline" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 20, + "panels": [], + "repeat": "nodeId", + "title": "$nodeId details", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "#EAB839", + "value": 0.1 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "name" + }, + "pluginVersion": "8.5.15", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "editorMode": "code", + "expr": "avg by (node_id) (tableau_service_status{environment=~\"$environment\", node_id=~\"$nodeId\", instance_id=~\"$instanceId\", service_name=~\"$serviceName\",state=~\"$state\"}) ", + "legendFormat": "Node: {{node_id}}", + "range": true, + "refId": "A" + } + ], + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "#EAB839", + "value": 0.1 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 27, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "name" + }, + "pluginVersion": "8.5.15", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "editorMode": "code", + "expr": "avg by (node_id, instance_id, service_name, status) (tableau_service_status{environment=~\"$environment\", node_id=~\"$nodeId\", service_name=~\"$serviceName\",state=~\"$state\"}) ", + "legendFormat": "{{service_name}}-{{instance_id}}: {{status}}", + "range": true, + "refId": "A" + } + ], + "transparent": true, + "type": "stat" + } + ], + "refresh": "", + "schemaVersion": 36, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "definition": "label_values(tableau_service_status, environment)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "environment", + "options": [], + "query": { + "query": "label_values(tableau_service_status, environment)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "definition": "label_values(tableau_service_status{environment=\"$environment\"}, node_id)", + "hide": 0, + "includeAll": true, + "label": "Node", + "multi": true, + "name": "nodeId", + "options": [], + "query": { + "query": "label_values(tableau_service_status{environment=\"$environment\"}, node_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "definition": "label_values(tableau_service_status{environment=\"$environment\", node_id=~\"$nodeId\"}, service_name)", + "hide": 0, + "includeAll": true, + "label": "Service", + "multi": true, + "name": "serviceName", + "options": [], + "query": { + "query": "label_values(tableau_service_status{environment=\"$environment\", node_id=~\"$nodeId\"}, service_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "definition": "label_values(tableau_service_status{environment=\"$environment\", node_id=~\"$nodeId\", service_name=~\"$serviceName\"}, instance_id)", + "hide": 0, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instanceId", + "options": [], + "query": { + "query": "label_values(tableau_service_status{environment=\"$environment\", node_id=~\"$nodeId\", service_name=~\"$serviceName\"}, instance_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "Enabled", + "value": "Enabled" + }, + "datasource": { + "type": "prometheus", + "uid": "000000040" + }, + "definition": "label_values(tableau_service_status{environment=\"$environment\", node_id=~\"$nodeId\", instance_id=~\"$instanceId\",service_name=~\"$serviceName\"}, state)", + "hide": 0, + "includeAll": true, + "label": "State", + "multi": false, + "name": "state", + "options": [], + "query": { + "query": "label_values(tableau_service_status{environment=\"$environment\", node_id=~\"$nodeId\", instance_id=~\"$instanceId\",service_name=~\"$serviceName\"}, state)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Tableau Service Status", + "uid": "l21Mx1RIz", + "version": 39, + "weekStart": "" +} diff --git a/samples/prometheus.yaml b/samples/prometheus.yaml new file mode 100644 index 0000000..6b07d42 --- /dev/null +++ b/samples/prometheus.yaml @@ -0,0 +1,22 @@ +groups: + - name: tableau-service-status-alerts + rules: + - alert: TableauImpaired + expr: + avg(tableau_service_status{state="Enabled"}) < 1 + for: 30m + labels: + severity: warning + annotations: + title: 'Tableau is Impaired' + description: 'One or more Tableau services are down for more than 30 minutes' + + - alert: TableauDown + expr: + avg(tableau_service_status{state="Enabled"}) == 0 + for: 5m + labels: + severity: critical + annotations: + title: 'Tableau is Down' + description: 'All Tableau Services are down for more than 5 minutes' diff --git a/src/tableau_prometheus_exporter/server.py b/src/tableau_prometheus_exporter/server.py index ad78b60..0dee286 100644 --- a/src/tableau_prometheus_exporter/server.py +++ b/src/tableau_prometheus_exporter/server.py @@ -14,7 +14,7 @@ GAUGE_TABLEAU_SERVICE_STATUS: Gauge = Gauge( name="tableau_service_status", - documentation="description", + documentation="Tableau services statuses", labelnames=[ "node_id", "service_name",