From d4f98c22dbebd69ba7288f7de53a1874cf61bc5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= <jdanek@redhat.com>
Date: Sun, 12 Mar 2023 18:56:28 +0100
Subject: [PATCH 1/2] todo: add wip logging notes and configs for vector and
 fluentd

---
 docs/notes/logging/fluentd.conf         | 59 ++++++++++++++++++++++
 docs/notes/logging/readme.md            | 66 +++++++++++++++++++++++++
 docs/notes/logging/sample.log           | 34 +++++++++++++
 docs/notes/logging/sample_locations.log |  1 +
 docs/notes/logging/skrouterd_vector.vrl | 20 ++++++++
 docs/notes/logging/vector.conf          | 17 +++++++
 6 files changed, 197 insertions(+)
 create mode 100644 docs/notes/logging/fluentd.conf
 create mode 100644 docs/notes/logging/readme.md
 create mode 100644 docs/notes/logging/sample.log
 create mode 100644 docs/notes/logging/sample_locations.log
 create mode 100644 docs/notes/logging/skrouterd_vector.vrl
 create mode 100644 docs/notes/logging/vector.conf

diff --git a/docs/notes/logging/fluentd.conf b/docs/notes/logging/fluentd.conf
new file mode 100644
index 000000000..0a209446a
--- /dev/null
+++ b/docs/notes/logging/fluentd.conf
@@ -0,0 +1,59 @@
+
+
+#  <source>
+#     @type forward
+#     port 24224
+#   </source>
+
+# https://docs.fluentd.org/configuration/config-file
+
+<source>
+  # https://docs.fluentd.org/input/tail
+  @type tail
+  path sample.log
+  read_from_head true
+  #pos_file sample.log.pos
+  tag skrouterd
+  # https://docs.fluentd.org/configuration/parse-section
+  <parse>
+    # https://docs.fluentd.org/parser/regexp
+    # https://docs.fluentd.org/parser/regexp#how-to-debug-my-regexp-pattern
+    @type regexp
+    # https://ruby-doc.org/core-2.4.1/Regexp.html
+    #expression /^(?<time>[^ ]+ [^ ]+ [^ ]+) (?<component>[^ ]+) \((?<severity>[^ ]+)\) (?<message>.*)$/
+    # notice that .*? in message field is lazy capture, but .* would also work here (but not in vector)
+    expression /^(?<time>[^ ]+ [^ ]+ [^ ]+) (?<component>[^ ]+) \((?<severity>[^ ]+)\) (?<message>.*?)(?: \((?<source>.*:\d+)\))?$/
+    time_key time
+    # https://docs.fluentd.org/configuration/parse-section#time-parameters
+    time_format %Y-%m-%d %H:%M:%S.%N %z
+    types component:string,level:string,message:string,source:string
+  </parse>
+</source>
+
+<source>
+  # https://docs.fluentd.org/input/tail
+  @type tail
+  path sample_locations.log
+  read_from_head true
+  #pos_file sample_locations.log.pos
+  tag skrouterd
+  # https://docs.fluentd.org/configuration/parse-section
+  <parse>
+    # https://docs.fluentd.org/parser/regexp
+    # https://docs.fluentd.org/parser/regexp#how-to-debug-my-regexp-pattern
+    @type regexp
+    expression /^(?<time>[^ ]+ [^ ]+ [^ ]+) (?<component>[^ ]+) \((?<severity>[^ ]+)\) (?<message>.*) \((?<source>.*:\d+)\)$/
+    time_key time
+    # https://docs.fluentd.org/configuration/format-section#time-parameters
+    time_format %Y-%m-%d %H:%M:%S.%N %z
+    types user_id:integer,paid:bool,paid_usd_amount:float
+  </parse>
+</source>
+
+
+  <match skrouterd>
+    @type file
+    path fluent.out
+  </match>
+
+# https://docs.fluentd.org/configuration/config-file#embedded-ruby-code
\ No newline at end of file
diff --git a/docs/notes/logging/readme.md b/docs/notes/logging/readme.md
new file mode 100644
index 000000000..b4aac2896
--- /dev/null
+++ b/docs/notes/logging/readme.md
@@ -0,0 +1,66 @@
+# Logging
+
+Skupper Router produces two kinds of logs. First, the operation logs and then flow logs.
+
+This document deals with the operation logs only.
+
+## OpenShift
+
+[OpenShift 4.12 by default uses Fluentd, Elasticsearch, and Kibana](https://access.redhat.com/documentation/en-us/openshift_container_platform/4.12/html/logging/cluster-logging) for its logging subsystem.
+[Fluentd collects the logs](https://docs.openshift.com/container-platform/4.12/logging/config/cluster-logging-collector.html), then
+[Elasticsearch is used to store the collected data](https://docs.openshift.com/container-platform/4.12/logging/config/cluster-logging-log-store.html), and finally there is
+[Kibana to visualize the collected data](https://docs.openshift.com/container-platform/4.12/logging/cluster-logging-visualizer.html).
+
+In the above, Fluentd can be [replaced by Vector](https://access.redhat.com/documentation/en-us/openshift_container_platform/4.12/html/logging/cluster-logging#cluster-logging-about-vector_cluster-logging), and
+[an external log store can substitute for Logstash](https://access.redhat.com/documentation/en-us/openshift_container_platform/4.12/html/logging/cluster-logging#cluster-logging-forwarding-about_cluster-logging).   
+
+### Log parsing
+
+At some point throughout the log processing pipeline, the logs, which skrouterd produces in a plain text format, need to be parsed.
+Every component of the pipeline is capable of performing this step
+Fluentd [<parse> directive](https://docs.fluentd.org/configuration/parse-section)
+Vector [remap with the VRL language](https://vector.dev/docs/reference/vrl/) or transform with an [arbitrary lua program](https://vector.dev/docs/reference/configuration/transforms/lua/)
+Elasticsearch [Grok filter plugin](https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html)
+
+#### Date and time
+
+    ruby -e 'require "time"; puts Time.strptime("2023-03-12 11:54:24.084418 +0100", "%Y-%m-%d %H:%M:%S.%N %z")'
+
+In general, the log processing pipeline may consists from gathering the log messages, 
+
+### Log collection
+
+#### Fluentd
+
+```shell
+sudo dnf install -y ruby-devel
+gem install fluentd --no-doc
+```
+
+Check the config syntax with `fluentd --dry-run -c fluent.conf`
+
+Test with
+
+    fluentd -c fluentd.conf
+
+#### Vektor
+
+```shell
+sudo dnf install -y https://packages.timber.io/vector/0.28.1/vector-0.28.1-1.$(arch).rpm
+```
+
+Test with
+
+    vector -c vector.conf < sample.log
+
+### Visualization
+
+## Kibana
+
+### Putting it all together
+
+
+
+
+
+[]{}
\ No newline at end of file
diff --git a/docs/notes/logging/sample.log b/docs/notes/logging/sample.log
new file mode 100644
index 000000000..20b6c01ee
--- /dev/null
+++ b/docs/notes/logging/sample.log
@@ -0,0 +1,34 @@
+2023-03-12 11:54:24.077669 +0100 SERVER (info) Container Name: Standalone_ZIsKPjNEXUzhcok
+2023-03-12 11:54:24.078884 +0100 ROUTER (info) Router started in Standalone mode
+2023-03-12 11:54:24.078984 +0100 ROUTER (info) Version: 2.1.0-156-g2a85d022
+2023-03-12 11:54:24.080946 +0100 ROUTER_CORE (info) Streaming link scrubber: Scan interval: 30 seconds, max free pool: 128 links
+2023-03-12 11:54:24.081033 +0100 ROUTER_CORE (info) Core module enabled: streaming_link_scrubber
+2023-03-12 11:54:24.081103 +0100 ROUTER_CORE (info) Core module present but disabled: mobile_sync
+2023-03-12 11:54:24.081208 +0100 ROUTER_CORE (info) Stuck delivery detection: Scan interval: 30 seconds, Delivery age threshold: 10 seconds
+2023-03-12 11:54:24.081326 +0100 ROUTER_CORE (info) Core module enabled: stuck_delivery_detection
+2023-03-12 11:54:24.082128 +0100 ROUTER_CORE (info) Core module enabled: heartbeat_server
+2023-03-12 11:54:24.082205 +0100 ROUTER_CORE (info) Core module present but disabled: heartbeat_edge
+2023-03-12 11:54:24.082301 +0100 ROUTER_CORE (info) Core module enabled: address_lookup_client
+2023-03-12 11:54:24.082352 +0100 ROUTER_CORE (info) Core module present but disabled: mesh_discovery_interior
+2023-03-12 11:54:24.082411 +0100 ROUTER_CORE (info) Core module present but disabled: mesh_discovery_edge
+2023-03-12 11:54:24.082473 +0100 ROUTER_CORE (info) Core module present but disabled: edge_addr_tracking
+2023-03-12 11:54:24.082528 +0100 ROUTER_CORE (info) Core module present but disabled: core_test_hooks
+2023-03-12 11:54:24.082578 +0100 ROUTER_CORE (info) Core module present but disabled: edge_router
+2023-03-12 11:54:24.083734 +0100 FLOW_LOG (info) Protocol logging started
+2023-03-12 11:54:24.084008 +0100 ROUTER_CORE (info) Protocol adaptor registered: tcp
+2023-03-12 11:54:24.084180 +0100 ROUTER_CORE (info) Protocol adaptor registered: http2
+2023-03-12 11:54:24.084418 +0100 ROUTER_CORE (info) Protocol adaptor registered: http/1.x
+2023-03-12 11:54:24.085748 +0100 ROUTER_CORE (info) Router Core thread running. 0/Standalone_ZIsKPjNEXUzhcok
+2023-03-12 11:54:24.085979 +0100 ROUTER_CORE (info) Protocol adaptor registered: amqp
+2023-03-12 11:54:24.086084 +0100 ROUTER_CORE (info) In-process subscription M/$management
+2023-03-12 11:54:24.105875 +0100 AGENT (info) Activating management agent on $_management_internal
+2023-03-12 11:54:24.105934 +0100 ROUTER_CORE (info) In-process subscription L/$management
+2023-03-12 11:54:24.106039 +0100 ROUTER_CORE (info) In-process subscription M/sfe.VbCkR:0
+2023-03-12 11:54:24.106079 +0100 ROUTER_CORE (info) In-process subscription L/$_management_internal
+2023-03-12 11:54:24.107232 +0100 POLICY (info) Policy configured maxConnections: 65535, policyDir: '',access rules enabled: 'false', use hostname patterns: 'false'
+2023-03-12 11:54:24.108581 +0100 POLICY (info) Policy fallback defaultVhost is defined: '$default'
+2023-03-12 11:54:24.112510 +0100 CONN_MGR (info) Configured Listener: 0.0.0.0:amqp proto=any, role=normal
+2023-03-12 11:54:24.113131 +0100 SERVER (notice) Operational, 4 Threads Running (process ID 2151406)
+2023-03-12 11:54:24.113479 +0100 SERVER (notice) Process VmSize 20.00 TiB (31.11 GiB available memory)
+2023-03-12 11:54:24.113508 +0100 SERVER (info) Running in DEBUG Mode
+2023-03-12 11:54:24.114054 +0100 SERVER (notice) Listening on 0.0.0.0:amqp
diff --git a/docs/notes/logging/sample_locations.log b/docs/notes/logging/sample_locations.log
new file mode 100644
index 000000000..b68901384
--- /dev/null
+++ b/docs/notes/logging/sample_locations.log
@@ -0,0 +1 @@
+2023-03-11 13:56:17.661912 +0000 HTTP_ADAPTOR (info) Listener httpListener/0.0.0.0:24162: stopped listening for client connections on 0.0.0.0:24162 (/build/src/adaptors/adaptor_listener.c:168)
diff --git a/docs/notes/logging/skrouterd_vector.vrl b/docs/notes/logging/skrouterd_vector.vrl
new file mode 100644
index 000000000..06f9b14ba
--- /dev/null
+++ b/docs/notes/logging/skrouterd_vector.vrl
@@ -0,0 +1,20 @@
+# https://vector.dev/docs/reference/vrl
+# https://vector.dev/docs/reference/vrl/#parse-custom-logs
+
+# language documentation at https://vrl.dev
+# https://vector.dev/docs/reference/vrl/expressions/
+# try your code in the VRL REPL, learn more at https://vrl.dev/examples
+
+# https://vector.dev/docs/reference/vrl/functions/#parse_regex
+# https://docs.rs/regex/latest/regex/#syntax
+# see also https://vector.dev/docs/reference/vrl/functions/#parse_grok
+
+#. = parse_regex!(.message, r'^(?P<timestamp>[^ ]+ [^ ]+ [^ ]+) (?P<component>[^ ]+) \((?P<severity>[^ ]+)\) (?P<message>.*)$')
+#. = parse_regex!(.message, r'^(?P<timestamp>\d+-\d+-\d+ \d+:\d+:\d+\.\d+ \+\d+) (?P<component>[^ ]+) \((?P<severity>[^ ]+)\) (?P<message>.*)$')
+
+# uses lazy capture in message field
+. = parse_regex!(.message, r'^(?P<timestamp>[^ ]+ [^ ]+ [^ ]+) (?P<component>[^ ]+) \((?P<severity>[^ ]+)\) (?P<message>.*?)(?: \((?P<source>.*:\d+)\))?$')
+#. = parse_regex!(.message, r'^(?P<timestamp>\d+-\d+-\d+ \d+:\d+:\d+\.\d+ \+\d+) (?P<component>[^ ]+) \((?P<severity>[^ ]+)\) (?P<message>.*?)(?: \((?P<source>.*:\d+)\))?$')
+
+# Coerce parsed fields
+.timestamp = parse_timestamp(.timestamp, "%Y-%m-%d %H:%M:%S.%N %z") ?? now()
diff --git a/docs/notes/logging/vector.conf b/docs/notes/logging/vector.conf
new file mode 100644
index 000000000..2cbbc729c
--- /dev/null
+++ b/docs/notes/logging/vector.conf
@@ -0,0 +1,17 @@
+# https://vector.dev/docs/reference/configuration/
+
+# https://vector.dev/docs/reference/configuration/sources/
+[sources.skrouterd_logs]
+type         = "stdin"
+
+# https://vector.dev/docs/reference/configuration/transforms/
+[transforms.skrouterd_parser]
+inputs = ["skrouterd_logs"]
+type   = "remap"
+file   = "skrouterd_vector.vrl"
+
+# https://vector.dev/docs/reference/configuration/sinks/
+[sinks.console]
+inputs     = ["skrouterd_parser"]
+type       = "console"
+encoding.codec = "json"

From 3345850173449b372f3a6256251db1a05297f262 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= <jdanek@redhat.com>
Date: Sun, 12 Mar 2023 20:02:54 +0100
Subject: [PATCH 2/2] elasticsearch wip

---
 docs/notes/logging/elasticsearch.config.yaml |  3 ++
 docs/notes/logging/readme.md                 | 37 ++++++++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 docs/notes/logging/elasticsearch.config.yaml

diff --git a/docs/notes/logging/elasticsearch.config.yaml b/docs/notes/logging/elasticsearch.config.yaml
new file mode 100644
index 000000000..51ac75348
--- /dev/null
+++ b/docs/notes/logging/elasticsearch.config.yaml
@@ -0,0 +1,3 @@
+path:
+  data: elastic_home/var/lib/elasticsearch
+  logs: elastic_home/var/log/elasticsearch
diff --git a/docs/notes/logging/readme.md b/docs/notes/logging/readme.md
index b4aac2896..22844ad0e 100644
--- a/docs/notes/logging/readme.md
+++ b/docs/notes/logging/readme.md
@@ -22,6 +22,10 @@ Fluentd [<parse> directive](https://docs.fluentd.org/configuration/parse-section
 Vector [remap with the VRL language](https://vector.dev/docs/reference/vrl/) or transform with an [arbitrary lua program](https://vector.dev/docs/reference/configuration/transforms/lua/)
 Elasticsearch [Grok filter plugin](https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html)
 
+AFAIK (TODO) OpenShift wants me to use grok in elasticsearch, as I haven't found where to configure this in fluentd or vector, but I might be wrong.
+
+* https://stackoverflow.com/questions/57460451/how-to-extract-and-visualize-values-from-a-log-entry-in-openshift-efk-stack
+
 #### Date and time
 
     ruby -e 'require "time"; puts Time.strptime("2023-03-12 11:54:24.084418 +0100", "%Y-%m-%d %H:%M:%S.%N %z")'
@@ -53,6 +57,39 @@ Test with
 
     vector -c vector.conf < sample.log
 
+#### Elasticsearch
+
+From https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html
+
+```shell
+# this then requires a root user to run elasticsearch
+sudo dnf install -y https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.6.2-x86_64.rpm
+
+# this allows running as regular user
+wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.6.2-linux-x86_64.tar.gz
+tar -xzf elasticsearch-8.6.2-linux-x86_64.tar.gz
+
+# probably best to use docker anyways
+docker network create elastic
+docker run --name elasticsearch --net elastic --rm -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -it docker.elastic.co/elasticsearch/elasticsearch:8.6.2
+docker run --name kibana --net elastic --rm -p 5601:5601 -it docker.elastic.co/kibana/kibana:8.6.2
+```
+
+The install will print superuser password, username is `elastic` (GUnJ9xh-RfxjMEC4v6BG)
+
+From https://www.elastic.co/guide/en/welcome-to-elastic/current/getting-started-general-purpose.html
+and https://www.elastic.co/guide/en/elasticsearch/reference/current/run-elasticsearch-locally.html
+
+Test with
+
+    ES_PATH_CONF=elasticsearch.config.yaml elasticsearch
+
+Upload logs with
+
+    curl -X POST 127.0.0.1:9200/skrouterd/_logs/1 -T sample.log
+
+Open Kibana dev console () and run Grok query 
+
 ### Visualization
 
 ## Kibana