Skip to content

Commit

Permalink
Merge pull request #32 from weka/bugfix1
Browse files Browse the repository at this point in the history
Bugfix1
  • Loading branch information
vince-weka authored Dec 11, 2021
2 parents 01dbb41 + 0d2672a commit 1d8aab1
Show file tree
Hide file tree
Showing 7 changed files with 320 additions and 4 deletions.
3 changes: 3 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ TARGET=tarball/$TOOL
mkdir -p $TARGET
cp dist/$TOOL $TARGET
cp ${TOOL}.yml $TARGET
cp ${TOOL}.yml.j2 $TARGET
cp ${TOOL}.service $TARGET
cp ${TOOL}.service.j2 $TARGET
cd tarball
tar cvzf ../${TOOL}.tar $TOOL

2 changes: 1 addition & 1 deletion collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ def gather(self):
unit]

#log.debug(f"unit={unit}")
if unit != 'sizes':
if (unit != 'sizes') and (unit != "Blocks"):
try:
metric_objs['weka_stats_gauge'].add_metric(labelvalues, value,
timestamp=wekatime_to_datetime(timestamp).timestamp())
Expand Down
2 changes: 1 addition & 1 deletion export.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from wekalib.wekacluster import WekaCluster
import wekalib.exceptions

VERSION = "1.5.3"
VERSION = "1.5.5"
#VERSION = "experimental"

# set the root log
Expand Down
24 changes: 24 additions & 0 deletions export.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# systemd unit file for Weka Export
[Unit]
Description=Weka Export
Documentation=https://github.com/weka/export
After=network-online.target local-fs.target
Wants=network-online.target local-fs.target
AssertFileIsExecutable=/opt/weka/export/export
AssertFileNotEmpty=/opt/weka/export/export.yml

[Service]
Environment=LAUNCHED_BY_SYSTEMD=YES
WorkingDirectory=/opt/weka/export
ExecStart=/opt/weka/export/export -v -c /opt/weka/export/export.yml
# You should change the above line to match your environment

# Let systemd restart this service always
Restart=always

# Disable timeout login and wait until process is stopped
TimeoutStopSec=infinity
SendSIGKILL=no

[Install]
WantedBy=multi-user.target
23 changes: 23 additions & 0 deletions export.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# systemd unit file for Weka Export
[Unit]
Description=Weka Export
Documentation=https://github.com/weka/export
After=network-online.target local-fs.target
Wants=network-online.target local-fs.target
AssertFileIsExecutable={{ dest_dir }}/export
AssertFileNotEmpty={{ dest_dir }}/export.yml

[Service]
Environment=LAUNCHED_BY_SYSTEMD=YES
WorkingDirectory={{ dest_dir }}
ExecStart={{ dest_dir }}/export -v -c {{ dest_dir }}/export.yml

# Let systemd restart this service always
Restart=always

# Disable timeout login and wait until process is stopped
TimeoutStopSec=infinity
SendSIGKILL=no

[Install]
WantedBy=multi-user.target
263 changes: 263 additions & 0 deletions export.yml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
#
# config file for wekasolutions/export
#

# exporter section - info about how we're going to run
exporter:
listen_port: {{ listen_port }}
loki_host: {{ loki_host }}
loki_port: {{ loki_port }}
timeout: {{ timeout }}
max_procs: {{ max_procs }}
max_threads_per_proc: {{ max_threads_per_proc }}
backends_only: {{ backends_only }}

# cluster section - info about the weka cluster we want to export data from:
cluster:
auth_token_file: {{ auth_token_file }}
force_https: {{ force_https }}
verify_cert: {{ verify_cert }}
hosts: {{ hosts }}

# auth_token_file can be an absolute path, relative path, or filename.
# If just a filename it will be searched for in ".", "~/.weka", and "./.weka"
# This file can be generated with the 'weka user login' command and copied to where we are running
# hosts is a list of hostnames or ip addresses. Minimum of 1 requred. You do not need to list all hosts in the cluster

# This file comes pre-set to pupulate the Grafana Panels that we've provided
#
# File format:
#
#stats:
# category:
# statistic: unit_of_measurement
# statistic: unit_of_measurement
# statistic: unit_of_measurement
#
# if you are familiar with "weka stats", these are "--category <category> --stat <statistic>"
#
# For more info refer to: https://docs.weka.io/usage/statistics/list-of-statistics
#
# To monitor additional statistics, just uncomment the ones you want to be gathered/reported
# If uncommenting things in a commented out section/Category (ie: 'object_storage:), don't forget to uncomment
# the Category itself (eg: "# 'object_storage:") as well as one or more of the metrics under it
# These lines are marked "# Category"
#
# It does not like Categories with no metrics - fix forthcoming
# meanwhile, don't uncomment a Category without uncommenting at least one statistic under it.
#
#

stats:
cpu: # Category
CPU_UTILIZATION: percent' # metric
# object_storage: # Category
# FAILED_OBJECT_DELETES: count
# FAILED_OBJECT_DOWNLOADS: count
# FAILED_OBJECT_OPERATIONS: count
# FAILED_OBJECT_UPLOADS: count
# OBJECT_DELETES: count
# OBJECT_DELETE_DURATION: microsecs
# OBJECT_DELETE_LATENCY: microsecs
# OBJECT_DOWNLOADS: count
# OBJECT_DOWNLOADS_BG: count
# OBJECT_DOWNLOADS_FG: count
# OBJECT_DOWNLOAD_BYTES_BG: bytespersec
# OBJECT_DOWNLOAD_BYTES_FG: bytespersec
# OBJECT_DOWNLOAD_DURATION: microsecs
# OBJECT_DOWNLOAD_LATENCY: microsecs
# OBJECT_DOWNLOAD_SIZE: count
# OBJECT_OPERATIONS: count
# OBJECT_UPLOADS: count
# OBJECT_UPLOADS_BACKPRESSURE: count
# OBJECT_UPLOADS_MANUAL: count
# OBJECT_UPLOADS_MIGRATE: count
# OBJECT_UPLOADS_POLICY: count
# OBJECT_UPLOADS_RECLAMATION_REUPLOAD: count
# OBJECT_UPLOADS_STOW: count
# OBJECT_UPLOAD_BYTES_BACKPRESSURE: bytespersec
# OBJECT_UPLOAD_BYTES_MANUAL: bytespersec
# OBJECT_UPLOAD_BYTES_MIGRATE: bytespersec
# OBJECT_UPLOAD_BYTES_POLICY: bytespersec
# OBJECT_UPLOAD_BYTES_RECLAMATION_REUPLOAD: bytespersec
# OBJECT_UPLOAD_BYTES_STOW: bytespersec
# OBJECT_UPLOAD_DURATION: microsecs
# OBJECT_UPLOAD_LATENCY: microsecs
# OBJECT_UPLOAD_SIZE: bytes
# OBS_READ_BYTES: bytespersec
# OBS_WRITE_BYTES: bytespersec
# ONGOING_DOWNLOADS: count
# ONGOING_REMOVES: count
# ONGOING_UPLOADS: count
# READ_BYTES: bytespersec
# WRITE_BYTES: bytespersec
ops:
# ACCESS_LATENCY: microsecs
ACCESS_OPS: ops
# COMMIT_LATENCY: microsecs
COMMIT_OPS: ops
# CREATE_LATENCY: microsecs
CREATE_OPS: ops
# FILEATOMICOPEN_LATENCY: microsecs
# FILEATOMICOPEN_OPS: ops
# FILECLOSE_LATENCY: microsecs
FILECLOSE_OPS: ops
# FILEOPEN_LATENCY: microsecs
FILEOPEN_OPS: ops
# FLOCK_LATENCY: microsecs
FLOCK_OPS: ops
# FSINFO_LATENCY: microsecs
FSINFO_OPS: ops
# GETATTR_LATENCY: microsecs
GETATTR_OPS: ops
# LINK_LATENCY: microsecs
LINK_OPS: ops
# LOOKUP_LATENCY: microsecs
# LOOKUP_OPS: ops
# MKDIR_LATENCY: microsecs
MKDIR_OPS: ops
# MKNOD_LATENCY: microsecs
MKNOD_OPS: ops
OPS: ops
# PATHCONF_LATENCY: microsecs
# PATHCONF_OPS: ops
# READDIR_LATENCY: microsecs
READDIR_OPS: ops
# READLINK_LATENCY: microsecs
# READLINK_OPS: ops
READS: iops
READ_BYTES: bytespersec
# READ_DURATION: microsecs
READ_LATENCY: microsecs
# REMOVE_LATENCY: microsecs
REMOVE_OPS: ops
# RENAME_LATENCY: microsecs
RENAME_OPS: ops
# RMDIR_LATENCY: microsecs
RMDIR_OPS: ops
# SETATTR_LATENCY: microsecs
# SETATTR_OPS: ops
# STATFS_LATENCY: microsecs
# STATFS_OPS: ops
# SYMLINK_LATENCY: microsecs
# SYMLINK_OPS: ops
THROUGHPUT: bytespersec
# UNLINK_LATENCY: microsecs
UNLINK_OPS: ops
WRITES: iops
WRITE_BYTES: bytespersec
# WRITE_DURATION: microsecs
WRITE_LATENCY: microsecs
ops_driver: # Category
DIRECT_READ_SIZES: sizes
DIRECT_WRITE_SIZES: sizes
# FILEATOMICOPEN_LATENCY: microsecs
# FILEATOMICOPEN_OPS: ops
# FILECLOSE_LATENCY: microsecs
# FILECLOSE_OPS: ops
# FILEOPEN_LATENCY: microsecs
# FILEOPEN_OPS: ops
# FLOCK_LATENCY: microsecs
# FLOCK_OPS: ops
# GETATTR_LATENCY: microsecs
# GETATTR_OPS: ops
# IOCTL_OBS_PREFETCH_LATENCY: microsecs
# IOCTL_OBS_PREFETCH_OPS: ops
# LINK_LATENCY: microsecs
# LINK_OPS: ops
# LOOKUP_LATENCY: microsecs
# LOOKUP_OPS: ops
# MKNOD_LATENCY: microsecs
# MKNOD_OPS: ops
# OPS: ops
# READDIR_LATENCY: microsecs
# READDIR_OPS: ops
# READLINK_LATENCY: microsecs
# READLINK_OPS: ops
# READS: iops
# READ_BYTES: bytespersec
# READ_DURATION: microsecs
# READ_LATENCY: microsecs
READ_SIZES: sizes
# RENAME_LATENCY: microsecs
# RENAME_OPS: ops
# RMDIR_LATENCY: microsecs
# RMDIR_OPS: ops
# SETATTR_LATENCY: microsecs
# SETATTR_OPS: ops
# STATFS_LATENCY: microsecs
# STATFS_OPS: ops
# SYMLINK_LATENCY: microsecs
# SYMLINK_OPS: ops
# THROUGHPUT: bytespersec
# UNLINK_LATENCY: microsecs
# UNLINK_OPS: ops
# WRITES: iops
# WRITE_BYTES: bytespersec
# WRITE_DURATION: microsecs
# WRITE_LATENCY: microsecs
WRITE_SIZES: sizes
ops_nfs: # Category
# ACCESS_LATENCY: microsecs
# ACCESS_OPS: ops
# COMMIT_LATENCY: microsecs
# COMMIT_OPS: ops
# CREATE_LATENCY: microsecs
# CREATE_OPS: ops
# FSINFO_LATENCY: microsecs
# FSINFO_OPS: ops
# GETATTR_LATENCY: microsecs
# GETATTR_OPS: ops
# LINK_LATENCY: microsecs
# LINK_OPS: ops
# LOOKUP_LATENCY: microsecs
# LOOKUP_OPS: ops
# MKDIR_LATENCY: microsecs
# MKDIR_OPS: ops
# MKNOD_LATENCY: microsecs
# MKNOD_OPS: ops
# OPS: ops
# PATHCONF_LATENCY: microsecs
# PATHCONF_OPS: ops
# READDIR_LATENCY: microsecs
# READDIR_OPS: ops
# READLINK_LATENCY: microsecs
# READLINK_OPS: ops
# READS: iops
# READ_BYTES: bytespersec
# READ_DURATION: microsecs
# READ_LATENCY: microsecs
# READ_SIZES: sizes
# REMOVE_LATENCY: microsecs
# REMOVE_OPS: ops
# RENAME_LATENCY: microsecs
# RENAME_OPS: ops
# SETATTR_LATENCY: microsecs
# SETATTR_OPS: ops
# STATFS_LATENCY: microsecs
# STATFS_OPS: ops
# SYMLINK_LATENCY: microsecs
# SYMLINK_OPS: ops
# THROUGHPUT: bytespersec
# WRITES: iops
# WRITE_BYTES: bytespersec
# WRITE_DURATION: microsecs
# WRITE_LATENCY: microsecs
# WRITE_SIZES: sizes
ssd: # Category
# DRIVE_READ_LATENCY: microsecs
# DRIVE_READ_OPS: ops
# DRIVE_WRITE_LATENCY: microsecs
# DRIVE_WRITE_OPS: ops
# SSD_BLOCKS_READ: count
# SSD_BLOCKS_WRITTEN: count
# SSD_MEDIA_ERRORS: count
# SSD_NON_MEDIA_ERRORS: count
# SSD_READ_ERRORS: count
# SSD_READ_LATENCY: microsecs
# SSD_READ_REQS: iops
# SSD_WRITES: iops
# SSD_WRITE_ERRORS: count
# SSD_WRITE_LATENCY: microsecs
# network:
# PUMPS_TXQ_FULL: times
7 changes: 5 additions & 2 deletions lokilogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import time
import socket
import sys
from logging import getLogger
from logging import getLogger, INFO

import dateutil
import dateutil.parser
Expand Down Expand Up @@ -112,14 +112,17 @@ def send_events(self, event_dict, cluster):
# "node_id": event["nid"],

# map weka event severities to Loki event severities
orig_sev = event['severity']
if event['severity'] == 'MAJOR' or event['severity'] == 'MINOR':
event['severity'] = 'ERROR'
elif event['severity'] == 'CRITICAL':
event['severity'] = 'FATAL'

description = f"cluster:{cluster.name} :{event['severity']}: {event['type']}: {event['description']}"
description = f"cluster:{cluster.name} :{orig_sev}: {event['type']}: {event['description']}"
log.debug(f"sending event: timestamp={timestamp}, labels={labels}, desc={description}")

log.log(INFO, f"WekaEvent: {description}") # send to syslog

try:
if self.loki_logevent(timestamp, description, labels=labels):
# only update time if upload successful, so we don't drop events (they should retry upload next time)
Expand Down

0 comments on commit 1d8aab1

Please sign in to comment.