From 6efbe8c9d5d9bbb4717cc806da2e7552698b65cd Mon Sep 17 00:00:00 2001 From: Fernando Briano Date: Mon, 18 Mar 2024 14:51:35 +0000 Subject: [PATCH] [Gem] Adds ES|QL Helper - Backports #2328 --- docs/helpers.asciidoc | 95 +++++++++++++++++++ .../lib/elasticsearch/helpers/esql_helper.rb | 71 ++++++++++++++ .../integration/client_integration_spec.rb | 2 +- .../integration/helpers/bulk_helper_spec.rb | 12 +-- .../integration/helpers/esql_helper_spec.rb | 94 ++++++++++++++++++ .../helpers/helpers_spec_helper.rb | 29 ++++++ .../integration/helpers/scroll_helper_spec.rb | 12 +-- 7 files changed, 292 insertions(+), 23 deletions(-) create mode 100644 elasticsearch/lib/elasticsearch/helpers/esql_helper.rb create mode 100644 elasticsearch/spec/integration/helpers/esql_helper_spec.rb create mode 100644 elasticsearch/spec/integration/helpers/helpers_spec_helper.rb diff --git a/docs/helpers.asciidoc b/docs/helpers.asciidoc index 9509cd8989..4fdb573275 100644 --- a/docs/helpers.asciidoc +++ b/docs/helpers.asciidoc @@ -160,3 +160,98 @@ end scroll_helper.clear ---- -- + +[discrete] +[[esql-helper]] +=== ES|QL Helper + +This functionality is Experimental and may be changed or removed completely in a future release. If you have any feedback on this helper, please https://github.com/elastic/elasticsearch-ruby/issues/new/choose[let us know]. + +The helper provides an object response from the ESQL `query` API instead of the default JSON value. + +To use the ES|QL helper, require it in your code: + +[source,ruby] +---- +require 'elasticsearch/helpers/esql_helper' +---- + +By default, the `query` API returns a Hash response with `columns` and `values` like so: + +[source,ruby] +---- +query = <[ + {"name"=>"@timestamp", "type"=>"date"}, + {"name"=>"client.ip", "type"=>"ip"}, + {"name"=>"event.duration", "type"=>"long"}, + {"name"=>"message", "type"=>"keyword"}, + {"name"=>"duration_ms", "type"=>"double"} +], +"values"=>[ + ["2023-10-23T12:15:03.360Z", "172.21.2.162", 3450233, "Connected to 10.1.0.3", 3.5], + ["2023-10-23T12:27:28.948Z", "172.21.2.113", 2764889, "Connected to 10.1.0.2", 2.8], + ["2023-10-23T13:33:34.937Z", "172.21.0.5", 1232382, "Disconnected", 1.2], + ["2023-10-23T13:51:54.732Z", "172.21.3.15", 725448, "Connection error", 0.7], + ["2023-10-23T13:52:55.015Z", "172.21.3.15", 8268153, "Connection error", 8.3], + ["2023-10-23T13:53:55.832Z", "172.21.3.15", 5033755, "Connection error", 5.0], + ["2023-10-23T13:55:01.543Z", "172.21.3.15", 1756467, "Connected to 10.1.0.1", 1.8] +]} +---- + +The helper returns an array of hashes with the columns as keys and the respective values. So for the previous example, it would return the following: + +[source,ruby] +---- +response = Elasticsearch::Helpers::ESQLHelper.query(client, query) + +puts response + +{"duration_ms"=>3.5, "message"=>"Connected to 10.1.0.3", "event.duration"=>3450233, "client.ip"=>"172.21.2.162", "@timestamp"=>"2023-10-23T12:15:03.360Z"} +{"duration_ms"=>2.8, "message"=>"Connected to 10.1.0.2", "event.duration"=>2764889, "client.ip"=>"172.21.2.113", "@timestamp"=>"2023-10-23T12:27:28.948Z"} +{"duration_ms"=>1.2, "message"=>"Disconnected", "event.duration"=>1232382, "client.ip"=>"172.21.0.5", "@timestamp"=>"2023-10-23T13:33:34.937Z"} +{"duration_ms"=>0.7, "message"=>"Connection error", "event.duration"=>725448, "client.ip"=>"172.21.3.15", "@timestamp"=>"2023-10-23T13:51:54.732Z"} +{"duration_ms"=>8.3, "message"=>"Connection error", "event.duration"=>8268153, "client.ip"=>"172.21.3.15", "@timestamp"=>"2023-10-23T13:52:55.015Z"} +---- + +Additionally, you can transform the data in the response by passing in a Hash of `column => Proc` values. You could use this for example to convert '@timestamp' into a DateTime object. Pass in a Hash to `query` as a `parser` defining a `Proc` for each value you'd like to parse: + +[source,ruby] +---- +require 'elasticsearch/helpers/esql_helper' + +parser = { + '@timestamp' => Proc.new { |t| DateTime.parse(t) } +} +response = Elasticsearch::Helpers::ESQLHelper.query(client, query, parser: parser) +response.first['@timestamp'] +# +---- + +You can pass in as many Procs as there are columns in the response. For example: + +[source,ruby] +---- +parser = { + '@timestamp' => Proc.new { |t| DateTime.parse(t) }, + 'client.ip' => Proc.new { |i| IPAddr.new(i) }, + 'event.duration' => Proc.new { |d| d.to_s } +} + +response = Elasticsearch::Helpers::ESQLHelper.query(client, query, parser: parser) + +puts response + +{"duration_ms"=>3.5, "message"=>"Connected to 10.1.0.3", "event.duration"=>"3450233", "client.ip"=>#, "@timestamp"=>#} +{"duration_ms"=>2.8, "message"=>"Connected to 10.1.0.2", "event.duration"=>"2764889", "client.ip"=>#, "@timestamp"=>#} +{"duration_ms"=>1.2, "message"=>"Disconnected", "event.duration"=>"1232382", "client.ip"=>#, "@timestamp"=>#} +{"duration_ms"=>0.7, "message"=>"Connection error", "event.duration"=>"725448", "client.ip"=>#, "@timestamp"=>#} +{"duration_ms"=>8.3, "message"=>"Connection error", "event.duration"=>"8268153", "client.ip"=>#, "@timestamp"=>#} +---- diff --git a/elasticsearch/lib/elasticsearch/helpers/esql_helper.rb b/elasticsearch/lib/elasticsearch/helpers/esql_helper.rb new file mode 100644 index 0000000000..9105850967 --- /dev/null +++ b/elasticsearch/lib/elasticsearch/helpers/esql_helper.rb @@ -0,0 +1,71 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Elasticsearch + module Helpers + # Elasticsearch Client Helper for the ES|QL API + # + # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-query-api.html + # + module ESQLHelper + # Query helper for ES|QL + # + # By default, the `esql.query` API returns a Hash response with the following keys: + # + # * `columns` with the value being an Array of `{ name: type }` Hashes for each column. + # + # * `values` with the value being an Array of Arrays with the values for each row. + # + # This helper function returns an Array of hashes with the columns as keys and the respective + # values: `{ column['name'] => value }`. + # + # @param client [Elasticsearch::Client] an instance of the Client to use for the query. + # @param query [Hash, String] The query to be passed to the ES|QL query API. + # @param params [Hash] options to pass to the ES|QL query API. + # @param parser [Hash] Hash of column name keys and Proc values to transform the value of + # a given column. + # @example Using the ES|QL helper + # require 'elasticsearch/helpers/esql_helper' + # query = <<~ESQL + # FROM sample_data + # | EVAL duration_ms = ROUND(event.duration / 1000000.0, 1) + # ESQL + # response = Elasticsearch::Helpers::ESQLHelper.query(client, query) + # + # @example Using the ES|QL helper with a parser + # response = Elasticsearch::Helpers::ESQLHelper.query( + # client, + # query, + # parser: { '@timestamp' => Proc.new { |t| DateTime.parse(t) } } + # ) + # + # @see https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/Helpers.html#_esql_helper + # + def self.query(client, query, params = {}, parser: {}) + response = client.esql.query({ body: { query: query }, format: 'json' }.merge(params)) + columns = response['columns'] + response['values'].map do |value| + (value.length - 1).downto(0).map do |index| + key = columns[index]['name'] + value[index] = parser[key].call value[index] if parser[key] + { key => value[index] } + end.reduce({}, :merge) + end + end + end + end +end diff --git a/elasticsearch/spec/integration/client_integration_spec.rb b/elasticsearch/spec/integration/client_integration_spec.rb index d8ff72a288..2ae47d70e9 100644 --- a/elasticsearch/spec/integration/client_integration_spec.rb +++ b/elasticsearch/spec/integration/client_integration_spec.rb @@ -54,7 +54,7 @@ end context 'Reports the right meta header' do - it 'Reports es service name and gem versio' do + it 'Reports es service name and gem version' do headers = client.transport.connections.first.connection.headers version = Class.new.extend(Elastic::Transport::MetaHeader).send(:client_meta_version, Elasticsearch::VERSION) expect(headers['x-elastic-client-meta']).to match /^es=#{version}/ diff --git a/elasticsearch/spec/integration/helpers/bulk_helper_spec.rb b/elasticsearch/spec/integration/helpers/bulk_helper_spec.rb index 26a8ec465c..8fb7cb5485 100644 --- a/elasticsearch/spec/integration/helpers/bulk_helper_spec.rb +++ b/elasticsearch/spec/integration/helpers/bulk_helper_spec.rb @@ -14,22 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -ELASTICSEARCH_URL = ENV['TEST_ES_SERVER'] || "http://localhost:#{(ENV['PORT'] || 9200)}" -raise URI::InvalidURIError unless ELASTICSEARCH_URL =~ /\A#{URI::DEFAULT_PARSER.make_regexp}\z/ - +require_relative 'helpers_spec_helper' require 'elasticsearch/helpers/bulk_helper' -require 'spec_helper' require 'tempfile' context 'Elasticsearch client helpers' do context 'Bulk helper' do - let(:client) do - Elasticsearch::Client.new( - host: ELASTICSEARCH_URL, - user: 'elastic', - password: 'changeme' - ) - end let(:index) { 'bulk_animals' } let(:params) { { refresh: 'wait_for' } } let(:bulk_helper) { Elasticsearch::Helpers::BulkHelper.new(client, index, params) } diff --git a/elasticsearch/spec/integration/helpers/esql_helper_spec.rb b/elasticsearch/spec/integration/helpers/esql_helper_spec.rb new file mode 100644 index 0000000000..83355aba45 --- /dev/null +++ b/elasticsearch/spec/integration/helpers/esql_helper_spec.rb @@ -0,0 +1,94 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +require_relative 'helpers_spec_helper' +require 'elasticsearch/helpers/esql_helper' + +context 'Elasticsearch client helpers' do + let(:index) { 'esql_helper_test' } + let(:body) { { size: 12, query: { match_all: {} } } } + let(:esql_helper) { Elasticsearch::Helpers::ESQLHelper } + let(:query) do + <<~ESQL + FROM #{index} + | EVAL duration_ms = ROUND(event.duration / 1000000.0, 1) + ESQL + end + + before do + client.indices.create( + index: index, + body: { + mappings: { + properties: { 'client.ip' => { type: 'ip' }, message: { type: 'keyword' } } + } + } + ) + client.bulk( + index: index, + body: [ + {'index': {}}, + {'@timestamp' => '2023-10-23T12:15:03.360Z', 'client.ip' => '172.21.2.162', message: 'Connected to 10.1.0.3', 'event.duration' => 3450233}, + {'index': {}}, + {'@timestamp' => '2023-10-23T12:27:28.948Z', 'client.ip' => '172.21.2.113', message: 'Connected to 10.1.0.2', 'event.duration' => 2764889}, + {'index': {}}, + {'@timestamp' => '2023-10-23T13:33:34.937Z', 'client.ip' => '172.21.0.5', message: 'Disconnected', 'event.duration' => 1232382}, + {'index': {}}, + {'@timestamp' => '2023-10-23T13:51:54.732Z', 'client.ip' => '172.21.3.15', message: 'Connection error', 'event.duration' => 725448}, + {'index': {}}, + {'@timestamp' => '2023-10-23T13:52:55.015Z', 'client.ip' => '172.21.3.15', message: 'Connection error', 'event.duration' => 8268153}, + {'index': {}}, + {'@timestamp' => '2023-10-23T13:53:55.832Z', 'client.ip' => '172.21.3.15', message: 'Connection error', 'event.duration' => 5033755}, + {'index': {}}, + {'@timestamp' => '2023-10-23T13:55:01.543Z', 'client.ip' => '172.21.3.15', message: 'Connected to 10.1.0.1', 'event.duration' => 1756467} + ], + refresh: true + ) + end + + after do + client.indices.delete(index: index) + end + + it 'returns an ESQL response as a relational key/value object' do + response = esql_helper.query(client, query) + expect(response.count).to eq 7 + expect(response.first.keys).to eq ['duration_ms', 'message', 'event.duration', 'client.ip', '@timestamp'] + response.each do |r| + expect(r['@timestamp']).to be_a String + expect(r['client.ip']).to be_a String + expect(r['message']).to be_a String + expect(r['event.duration']).to be_a Integer + end + end + + it 'parses iterated objects when procs are passed in' do + require 'ipaddr' + + parser = { + '@timestamp' => Proc.new { |t| DateTime.parse(t) }, + 'client.ip' => Proc.new { |i| IPAddr.new(i) }, + 'event.duration' => Proc.new { |d| d.to_s } + } + response = esql_helper.query(client, query, parser: parser) + response.each do |r| + expect(r['@timestamp']).to be_a DateTime + expect(r['client.ip']).to be_a IPAddr + expect(r['message']).to be_a String + expect(r['event.duration']).to be_a String + end + end +end diff --git a/elasticsearch/spec/integration/helpers/helpers_spec_helper.rb b/elasticsearch/spec/integration/helpers/helpers_spec_helper.rb new file mode 100644 index 0000000000..1c9b5de18c --- /dev/null +++ b/elasticsearch/spec/integration/helpers/helpers_spec_helper.rb @@ -0,0 +1,29 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require 'spec_helper' + +ELASTICSEARCH_URL = ENV['TEST_ES_SERVER'] || "http://localhost:#{(ENV['PORT'] || 9200)}" +raise URI::InvalidURIError unless ELASTICSEARCH_URL =~ /\A#{URI::DEFAULT_PARSER.make_regexp}\z/ + +def client + @client ||= Elasticsearch::Client.new( + host: ELASTICSEARCH_URL, + user: 'elastic', + password: 'changeme' + ) +end diff --git a/elasticsearch/spec/integration/helpers/scroll_helper_spec.rb b/elasticsearch/spec/integration/helpers/scroll_helper_spec.rb index caa18d0f74..8cd63f9869 100644 --- a/elasticsearch/spec/integration/helpers/scroll_helper_spec.rb +++ b/elasticsearch/spec/integration/helpers/scroll_helper_spec.rb @@ -14,20 +14,10 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -ELASTICSEARCH_URL = ENV['TEST_ES_SERVER'] || "http://localhost:#{(ENV['PORT'] || 9200)}" -raise URI::InvalidURIError unless ELASTICSEARCH_URL =~ /\A#{URI::DEFAULT_PARSER.make_regexp}\z/ - -require 'spec_helper' +require_relative 'helpers_spec_helper' require 'elasticsearch/helpers/scroll_helper' context 'Elasticsearch client helpers' do - let(:client) do - Elasticsearch::Client.new( - host: ELASTICSEARCH_URL, - user: 'elastic', - password: 'changeme' - ) - end let(:index) { 'books' } let(:body) { { size: 12, query: { match_all: {} } } } let(:scroll_helper) { Elasticsearch::Helpers::ScrollHelper.new(client, index, body) }