From ab0f3df3e9ea1ef2d19e05c8e218e66135ef4769 Mon Sep 17 00:00:00 2001 From: Dainius Jocas Date: Tue, 23 Mar 2021 11:10:54 +0200 Subject: [PATCH] Support multiple metrics for replay for impact (#6) * feat: support multiple metrics for replay for impact * chore: log which metric was used for _rank_eval --- src/replay/impact.clj | 47 ++++++++++++++++++++++++++++++++++--- test/replay/impact_test.clj | 31 +++++++++++++++++++++++- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/src/replay/impact.clj b/src/replay/impact.clj index 8c0130f..abe7bd9 100644 --- a/src/replay/impact.clj +++ b/src/replay/impact.clj @@ -98,19 +98,59 @@ (assoc-in [:_source :impact] impact)))) variation-ids))) +; https://www.elastic.co/guide/en/elasticsearch/reference/current/search-rank-eval.html +(def defaults-metric-configs + {:precision {:k 10 + :relevant_rating_threshold 1 + :ignore_unlabeled false} + :recall {:k 10 + :relevant_rating_threshold 1} + :mean_reciprocal_rank {:k 10 + :relevant_rating_threshold 1} + :dcg {:k 10 + :normalize false} + :expected_reciprocal_rank {:maximum_relevance 10 + :k 10}}) + +(defn get-top-k + "First check an explicit parameter then if it is provided in the metric. + If not found throws an exception" + [opts] + (or (get-in opts [:replay :top-k]) + (-> opts :replay :metric first last :k) + 10)) + +(defn get-metric + "Get the provided metric and merges it onto the default metric config." + [opts] + (let [k (get-top-k opts) + provided-metric (get-in opts [:replay :metric]) + metric-name (ffirst provided-metric) + provided-metric-config (get provided-metric metric-name) + default-metric-config (if metric-name + (get defaults-metric-configs metric-name) + (get defaults-metric-configs :precision))] + (when (nil? default-metric-config) + (throw (Exception. (format "Metric '%s' not supported by _rank_eval API. '%s'." + (name metric-name) (get opts :replay))))) + {(or metric-name :precision) + (merge default-metric-config + (select-keys (assoc provided-metric-config :k k) + (keys default-metric-config)))})) + (defn measure-impact [opts query-log-entry] (let [target-es-host (get-in opts [:replay :connection.url]) raw-endpoint (get-in query-log-entry [:_source :uri]) target-index (or (get-in opts [:replay :target-index]) (get-index-or-alias raw-endpoint)) - k (get-in opts [:replay :top-k]) + k (get-top-k opts) query-body (json/decode (get-in query-log-entry [:_source :request])) - metric {:precision {:k k :relevant_rating_threshold 1 :ignore_unlabeled false}} + metric (get-metric opts) pit (assoc (pit/init target-es-host target-index opts) :keep_alive "30s") baseline-ratings-url (format "%s%s" target-es-host (prepare-endpoint raw-endpoint)) baseline-ratings (get-baseline-ratings baseline-ratings-url query-body pit k (get-in opts [:replay :ignore-timeouts])) grouped-variations (get-grouped-query-variations query-body opts k) rank-eval-resp (query-rank-eval-api target-es-host target-index baseline-ratings grouped-variations metric pit)] - (println baseline-ratings) + (log/infof "RFI metric used: '%s'" metric) (construct-rfi-records rank-eval-resp query-log-entry grouped-variations baseline-ratings k))) (def defaults @@ -128,6 +168,7 @@ :connection.url "http://localhost:9200" :target-index nil :concurrency 1 + :metric nil :ignore-timeouts false} :sink {:connection.url "http://localhost:9200" :dest.index "impact_sink_index" diff --git a/test/replay/impact_test.clj b/test/replay/impact_test.clj index 0800e02..9875195 100644 --- a/test/replay/impact_test.clj +++ b/test/replay/impact_test.clj @@ -1,5 +1,5 @@ (ns replay.impact-test - (:require [clojure.test :refer :all] + (:require [clojure.test :refer [deftest is testing]] [replay.impact :as impact])) (deftest index-name-extraction @@ -108,3 +108,32 @@ :query {:match_all {}} :size 10}})} (impact/prepare-rank-eval-request ratings grouped-variations metric pit))))) + +(deftest metric-resolution + (testing "default k is 10 and metric is precision" + (is (= {:precision + {:ignore_unlabeled false + :k 10 + :relevant_rating_threshold 1}} + (impact/get-metric {:replay {}})))) + (testing "default metric to be precision with k being top-k" + (is (= {:precision + {:ignore_unlabeled false + :k 5 + :relevant_rating_threshold 1}} + (impact/get-metric {:replay {:top-k 5}})))) + (testing "metric to be dcg" + (is (= {:dcg {:k 10 + :normalize false}} + (impact/get-metric {:replay {:metric {:dcg {}}}})))) + (testing "metric to be dcg with non supported attributes removed" + (is (= {:dcg {:k 10 + :normalize false}} + (impact/get-metric {:replay {:metric {:dcg {:foo "bar"}}}})))) + + (testing "on non supported metrics an exception is thrown" + (is (= :exception (try + (impact/get-metric {:replay {:metric {:foo {}}}}) + (catch Exception e + (is (instance? Exception e)) + :exception))))))