From 878e80c7977749bdee7c5d31c5b4c433c26863d9 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 12:17:38 +0100
Subject: [PATCH 01/19] Clojure Collector: bump ring to 1.6.3 (closes #3778)

---
 2-collectors/clojure-collector/java-servlet/project.clj | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index f5fc1a5da7..54633232e9 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -22,14 +22,14 @@
   :url "http://www.apache.org/licenses/LICENSE-2.0"}
   :description "A SnowPlow event collector written in Clojure. AWS Elastic Beanstalk compatible."
   :dependencies     [[org.clojure/clojure "1.4.0"]
-                     [ring/ring-core "1.1.8"]
-                     [ring/ring-devel "1.1.8"]
+                     [ring/ring-core "1.6.3"]
+                     [ring/ring-devel "1.6.3"]
                      [compojure "1.1.3"]
                      [metrics-clojure "0.9.2"]
                      [metrics-clojure-ring "0.9.2"]
                      [commons-codec/commons-codec "1.7"]]
   ;; The jetty adapter is only used during development
-  :profiles         {:dev {:dependencies [[ring/ring-jetty-adapter "1.1.8"]]}}
+  :profiles         {:dev {:dependencies [[ring/ring-jetty-adapter "1.6.3"]]}}
   :war-resources-path   "war-resources"
   :plugins          [[lein-ring "0.8.3"]
                      [lein-beanstalk "0.2.6"]]

From 8aa5be947d17000ed0ff682dd6c029b31e1cae1a Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 15:54:01 +0100
Subject: [PATCH 02/19] Clojure Collector: bump clojure to 1.9.0 (closes #3779)

---
 2-collectors/clojure-collector/java-servlet/project.clj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index 54633232e9..d58fb4da75 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -21,7 +21,7 @@
   :license {:name "Apache Version 2.0"
   :url "http://www.apache.org/licenses/LICENSE-2.0"}
   :description "A SnowPlow event collector written in Clojure. AWS Elastic Beanstalk compatible."
-  :dependencies     [[org.clojure/clojure "1.4.0"]
+  :dependencies     [[org.clojure/clojure "1.9.0"]
                      [ring/ring-core "1.6.3"]
                      [ring/ring-devel "1.6.3"]
                      [compojure "1.1.3"]

From 87a70fa228f7399fa762a257691245ec3d689546 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 15:55:56 +0100
Subject: [PATCH 03/19] Clojure Collector: bump compojure to 1.6.1 (closes
 #3780)

---
 2-collectors/clojure-collector/java-servlet/project.clj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index d58fb4da75..12ed451fce 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -24,7 +24,7 @@
   :dependencies     [[org.clojure/clojure "1.9.0"]
                      [ring/ring-core "1.6.3"]
                      [ring/ring-devel "1.6.3"]
-                     [compojure "1.1.3"]
+                     [compojure "1.6.1"]
                      [metrics-clojure "0.9.2"]
                      [metrics-clojure-ring "0.9.2"]
                      [commons-codec/commons-codec "1.7"]]

From 496ca0fd08de4ecf3dd26e3d7daeb37573bf3d36 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 16:00:53 +0100
Subject: [PATCH 04/19] Clojure Collector: bump metrics-clojure to 2.10.0
 (closes #3781)

---
 2-collectors/clojure-collector/java-servlet/project.clj | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index 12ed451fce..f449a6368e 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -25,8 +25,8 @@
                      [ring/ring-core "1.6.3"]
                      [ring/ring-devel "1.6.3"]
                      [compojure "1.6.1"]
-                     [metrics-clojure "0.9.2"]
-                     [metrics-clojure-ring "0.9.2"]
+                     [metrics-clojure "2.10.0"]
+                     [metrics-clojure-ring "2.10.0"]
                      [commons-codec/commons-codec "1.7"]]
   ;; The jetty adapter is only used during development
   :profiles         {:dev {:dependencies [[ring/ring-jetty-adapter "1.6.3"]]}}

From e31438725e3a3c8036d23807945c331e167bdb7e Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 16:04:17 +0100
Subject: [PATCH 05/19] Clojure Collector: bump commons-codec to 1.11 (closes
 #3782)

---
 2-collectors/clojure-collector/java-servlet/project.clj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index f449a6368e..8530713c52 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -27,7 +27,7 @@
                      [compojure "1.6.1"]
                      [metrics-clojure "2.10.0"]
                      [metrics-clojure-ring "2.10.0"]
-                     [commons-codec/commons-codec "1.7"]]
+                     [commons-codec/commons-codec "1.11"]]
   ;; The jetty adapter is only used during development
   :profiles         {:dev {:dependencies [[ring/ring-jetty-adapter "1.6.3"]]}}
   :war-resources-path   "war-resources"

From fd2685597855188a6f77b6cac3c3999bc81300f0 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 16:08:41 +0100
Subject: [PATCH 06/19] Clojure Collector: bump lein-ring to 0.12.4 (closes
 #3783)

---
 2-collectors/clojure-collector/java-servlet/project.clj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index 8530713c52..02b6769766 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -31,7 +31,7 @@
   ;; The jetty adapter is only used during development
   :profiles         {:dev {:dependencies [[ring/ring-jetty-adapter "1.6.3"]]}}
   :war-resources-path   "war-resources"
-  :plugins          [[lein-ring "0.8.3"]
+  :plugins          [[lein-ring "0.12.4"]
                      [lein-beanstalk "0.2.6"]]
   :ring {:handler snowplow.clojure-collector.beanstalk/app}) ; .beanstalk -> .core if you don't need Beanstalk support
 

From 5d3c4d5f48968e703a955f6644d16135154b3735 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 16:12:22 +0100
Subject: [PATCH 07/19] Clojure Collector: remove lein-beanstalk (closes #3784)

---
 2-collectors/clojure-collector/java-servlet/project.clj | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index 02b6769766..f0dc984d9f 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -31,7 +31,6 @@
   ;; The jetty adapter is only used during development
   :profiles         {:dev {:dependencies [[ring/ring-jetty-adapter "1.6.3"]]}}
   :war-resources-path   "war-resources"
-  :plugins          [[lein-ring "0.12.4"]
-                     [lein-beanstalk "0.2.6"]]
+  :plugins          [[lein-ring "0.12.4"]]
   :ring {:handler snowplow.clojure-collector.beanstalk/app}) ; .beanstalk -> .core if you don't need Beanstalk support
 

From 044a4049f4ef7fc4bf99cf4f0fbe97f5d81d5eb8 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 16:26:45 +0100
Subject: [PATCH 08/19] Clojure Collector: do not allow dependencies requiring
 an HTTP repository (closes #3559)

---
 2-collectors/clojure-collector/java-servlet/project.clj | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index f0dc984d9f..6ecfb586b5 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -13,10 +13,6 @@
 ;;;; Copyright: Copyright (c) 2012-2013 Snowplow Analytics Ltd
 ;;;; License:   Apache License Version 2.0
 
-(require 'cemerick.pomegranate.aether)
-(cemerick.pomegranate.aether/register-wagon-factory!
-  "http" #(org.apache.maven.wagon.providers.http.HttpWagon.))
-
 (defproject snowplow/clojure-collector "2.0.0" ;; MUST also bump version in server.xml
   :license {:name "Apache Version 2.0"
   :url "http://www.apache.org/licenses/LICENSE-2.0"}

From 980b2243403c07d955b9d001b4ebdf072b215444 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 30 May 2018 17:33:20 +0100
Subject: [PATCH 09/19] Clojure Collector: make cookie path configurable
 (closes #2739)

---
 .../src/snowplow/clojure_collector/config.clj    |  8 +++++++-
 .../src/snowplow/clojure_collector/core.clj      |  1 +
 .../src/snowplow/clojure_collector/responses.clj | 16 ++++++++++------
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/config.clj b/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/config.clj
index 2ed69659aa..f336652662 100644
--- a/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/config.clj
+++ b/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/config.clj
@@ -24,6 +24,7 @@
 (def ^:const duration-varnames ["PARAM4", "SP_DURATION"])
 (def ^:const cross-domain-policy-domain-varnames ["PARAM5", "SP_CDP_DOMAIN"])
 (def ^:const cross-domain-policy-secure-varnames ["PARAM6", "SP_CDP_SECURE"])
+(def ^:const path-varnames ["PARAM7", "SP_PATH"])
 
 ;; Defaults
 (def ^:const default-p3p-header "policyref=\"/w3c/p3p.xml\", CP=\"NOI DSP COR NID PSA OUR IND COM NAV STA\"")
@@ -69,9 +70,14 @@
 (def domain
   "Get the domain the name cookies will be set on.
    Can be a wildcard e.g. '.foo.com'.
-   If undefined we'll just use the FQDN of the host"
+   If undefined we'll just use the FQDN of the host."
   (get-var domain-varnames))
 
+(def path
+  "Get the path the cookies will be set on.
+   If undefined we'll just use /"
+  (get-var path-varnames))
+
 (def cross-domain-policy-domain
   "Get the cross domain policy domain.
   See the specification for reference:
diff --git a/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/core.clj b/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/core.clj
index 9bbd1de678..2cf272f4d2 100644
--- a/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/core.clj
+++ b/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/core.clj
@@ -35,6 +35,7 @@
     cookies
     config/duration
     config/domain
+    config/path
     config/p3p-header
     pixel
     vendor
diff --git a/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/responses.clj b/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/responses.clj
index 0fdccf8f61..d0ab25046b 100644
--- a/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/responses.clj
+++ b/2-collectors/clojure-collector/java-servlet/src/snowplow/clojure_collector/responses.clj
@@ -38,13 +38,17 @@
 
 (defn- set-cookie
   "Sets a Snowplow cookie with visitor `id`,
-   to last `duration` seconds for `domain`.
+   to last `duration` seconds for `domain` at `path`.
    If domain is nil, leave out so the FQDN
-   of the host can be used instead"
-  [id duration domain]
+   of the host can be used instead.
+   If path is nil, path will be /."
+  [id duration domain path]
   (merge
     {:value    id
-     :expires (now-plus duration)}
+     :expires (now-plus duration)
+     :path (if (nil? path)
+       "/"
+       path)}
    (when-let [d domain]
     {:domain   d})))
 
@@ -95,11 +99,11 @@
 (defn send-cookie-pixel-or-200-or-redirect
   "Respond with the cookie and either a
    transparent pixel, a 200 or a redirect"
-  [cookies duration domain p3p-header pixel vendor params]
+  [cookies duration domain path p3p-header pixel vendor params]
   (let [id      (generate-id cookies)
         cookies (if (= duration 0)
                   {}
-                  {cookie-name (set-cookie id duration domain)})
+                  {cookie-name (set-cookie id duration domain path)})
         headers {"P3P" p3p-header}]
     (if (= vendor "r")
       (send-redirect cookies headers params)

From 311a4dcc60a8bc826cd7c190d9164344d1523aaa Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 6 Jun 2018 10:14:04 +0200
Subject: [PATCH 10/19] EmrEtlRunner: replace Sluice by aws-sdk-s3 (closes
 #3524)

---
 3-enrich/emr-etl-runner/Gemfile               |   2 +-
 3-enrich/emr-etl-runner/Gemfile.lock          | 104 +++++-------------
 .../lib/snowplow-emr-etl-runner.rb            |   1 +
 .../lib/snowplow-emr-etl-runner/emr_job.rb    |  94 +++++++---------
 .../lib/snowplow-emr-etl-runner/s3.rb         | 101 +++++++++++++++++
 .../spec/snowplow-emr-etl-runner/s3_spec.rb   |  81 ++++++++++++++
 6 files changed, 254 insertions(+), 129 deletions(-)
 create mode 100644 3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/s3.rb
 create mode 100644 3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/s3_spec.rb

diff --git a/3-enrich/emr-etl-runner/Gemfile b/3-enrich/emr-etl-runner/Gemfile
index 98ef9bba0b..6eb50b1d03 100755
--- a/3-enrich/emr-etl-runner/Gemfile
+++ b/3-enrich/emr-etl-runner/Gemfile
@@ -25,7 +25,7 @@ gem "avro", "~> 1.8.1"
 gem "awrence", "~> 0.1.0"
 gem "snowplow-tracker", "~> 0.5.2"
 gem "iglu-ruby-client", ">= 0.1.0"
-gem "sluice", "~> 0.4.0"
+gem "aws-sdk-s3", "~> 1"
 gem "diplomat", "~> 2.0.1"
 
 group :development do
diff --git a/3-enrich/emr-etl-runner/Gemfile.lock b/3-enrich/emr-etl-runner/Gemfile.lock
index 6ac13aec88..5afa1ad3d9 100644
--- a/3-enrich/emr-etl-runner/Gemfile.lock
+++ b/3-enrich/emr-etl-runner/Gemfile.lock
@@ -1,12 +1,26 @@
 GEM
   remote: https://rubygems.org/
   specs:
-    CFPropertyList (2.3.5)
     addressable (2.5.0)
       public_suffix (~> 2.0, >= 2.0.2)
     avro (1.8.1)
       multi_json
     awrence (0.1.0)
+    aws-eventstream (1.0.0)
+    aws-partitions (1.89.1)
+    aws-sdk-core (3.21.2)
+      aws-eventstream (~> 1.0)
+      aws-partitions (~> 1.0)
+      aws-sigv4 (~> 1.0)
+      jmespath (~> 1.0)
+    aws-sdk-kms (1.5.0)
+      aws-sdk-core (~> 3)
+      aws-sigv4 (~> 1.0)
+    aws-sdk-s3 (1.13.0)
+      aws-sdk-core (~> 3, >= 3.21.2)
+      aws-sdk-kms (~> 1)
+      aws-sigv4 (~> 1.0)
+    aws-sigv4 (1.0.2)
     builder (3.2.3)
     contracts (0.11.0)
     coveralls (0.8.19)
@@ -26,67 +40,24 @@ GEM
       fog-aws (~> 1.0)
       rest-client (~> 1.0)
       unf (~> 0.1)
-    excon (0.52.0)
+    excon (0.62.0)
     faraday (0.12.2)
       multipart-post (>= 1.2, < 3)
-    fission (0.5.0)
-      CFPropertyList (~> 2.2)
-    fog (1.25.0)
-      fog-brightbox (~> 0.4)
-      fog-core (~> 1.25)
-      fog-json
-      fog-profitbricks
-      fog-radosgw (>= 0.0.2)
-      fog-sakuracloud (>= 0.0.4)
-      fog-softlayer
-      fog-terremark
-      fog-vmfusion
-      fog-voxel
-      fog-xml (~> 0.1.1)
-      ipaddress (~> 0.5)
-      nokogiri (~> 1.5, >= 1.5.11)
-      opennebula
-    fog-aws (1.4.0)
+    fog-aws (1.4.1)
       fog-core (~> 1.38)
       fog-json (~> 1.0)
       fog-xml (~> 0.1)
       ipaddress (~> 0.8)
-    fog-brightbox (0.11.0)
-      fog-core (~> 1.22)
-      fog-json
-      inflecto (~> 0.0.2)
-    fog-core (1.42.0)
+    fog-core (1.45.0)
       builder
-      excon (~> 0.49)
+      excon (~> 0.58)
       formatador (~> 0.2)
     fog-json (1.0.2)
       fog-core (~> 1.0)
       multi_json (~> 1.10)
-    fog-profitbricks (3.0.0)
-      fog-core (~> 1.42)
-      fog-json (~> 1.0)
-    fog-radosgw (0.0.5)
-      fog-core (>= 1.21.0)
-      fog-json
-      fog-xml (>= 0.0.1)
-    fog-sakuracloud (1.7.5)
-      fog-core
-      fog-json
-    fog-softlayer (1.1.4)
-      fog-core
-      fog-json
-    fog-terremark (0.1.0)
-      fog-core
-      fog-xml
-    fog-vmfusion (0.1.0)
-      fission
+    fog-xml (0.1.3)
       fog-core
-    fog-voxel (0.1.0)
-      fog-core
-      fog-xml
-    fog-xml (0.1.2)
-      fog-core
-      nokogiri (~> 1.5, >= 1.5.11)
+      nokogiri (>= 1.5.11, < 2.0.0)
     formatador (0.2.5)
     http-cookie (1.0.3)
       domain_name (~> 0.5)
@@ -95,31 +66,21 @@ GEM
     iglu-ruby-client (0.1.0)
       httparty (<= 0.14.0)
       json-schema (~> 2.7.0, >= 2.7.0)
-    inflecto (0.0.2)
     ipaddress (0.8.3)
-    jruby-jars (9.1.7.0)
-    jruby-rack (1.1.20)
+    jmespath (1.4.0)
+    jruby-jars (9.2.0.0)
+    jruby-rack (1.1.21)
     json (2.0.3-java)
     json-schema (2.7.0)
       addressable (>= 2.4)
     mime-types (2.99.3)
-    multi_json (1.12.1)
+    multi_json (1.13.1)
     multi_xml (0.6.0)
     multipart-post (2.0.0)
-    net-ssh (2.9.4)
     netrc (0.11.0)
-    nokogiri (1.7.0.1-java)
-    opennebula (5.4.0)
-      json
-      nokogiri
-      rbvmomi
+    nokogiri (1.8.2-java)
     public_suffix (2.0.5)
-    rake (12.0.0)
-    rbvmomi (1.11.3)
-      builder (~> 3.0)
-      json (>= 1.8)
-      nokogiri (~> 1.5)
-      trollop (~> 2.1)
+    rake (12.3.1)
     rest-client (1.8.0)
       http-cookie (>= 1.0.2, < 2.0)
       mime-types (>= 1.16, < 3.0)
@@ -143,19 +104,14 @@ GEM
       json (>= 1.8, < 3)
       simplecov-html (~> 0.10.0)
     simplecov-html (0.10.0)
-    sluice (0.4.0)
-      contracts (~> 0.9)
-      fog (= 1.25)
-      net-ssh (~> 2.9.2)
     snowplow-tracker (0.5.2)
       contracts (~> 0.7, <= 0.11)
     term-ansicolor (1.4.0)
       tins (~> 1.0)
     thor (0.19.4)
     tins (1.13.2)
-    trollop (2.1.2)
     unf (0.1.4-java)
-    warbler (2.0.4)
+    warbler (2.0.5)
       jruby-jars (>= 9.0.0.0)
       jruby-rack (>= 1.1.1, < 1.3)
       rake (>= 10.1.0)
@@ -167,13 +123,13 @@ PLATFORMS
 DEPENDENCIES
   avro (~> 1.8.1)
   awrence (~> 0.1.0)
+  aws-sdk-s3 (~> 1)
   contracts (~> 0.9, <= 0.11)
   coveralls
   diplomat (~> 2.0.1)
   elasticity (~> 6.0.12)
   iglu-ruby-client (>= 0.1.0)
   rspec (~> 3.5.0)
-  sluice (~> 0.4.0)
   snowplow-tracker (~> 0.5.2)
   warbler
 
@@ -181,4 +137,4 @@ RUBY VERSION
    ruby 2.3.1p0 (jruby 9.1.6.0)
 
 BUNDLED WITH
-   1.15.3
+   1.15.4
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb
index 3c74e433fc..d9db70a10f 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb
@@ -21,6 +21,7 @@
 require_relative 'snowplow-emr-etl-runner/monitoring/snowplow'
 require_relative 'snowplow-emr-etl-runner/cli'
 require_relative 'snowplow-emr-etl-runner/job_result'
+require_relative 'snowplow-emr-etl-runner/s3'
 require_relative 'snowplow-emr-etl-runner/emr_job'
 require_relative 'snowplow-emr-etl-runner/lock/lock'
 require_relative 'snowplow-emr-etl-runner/lock/file_lock'
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
index 57465d21aa..d36d8c15a5 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
@@ -15,7 +15,7 @@
 
 require 'set'
 require 'elasticity'
-require 'sluice'
+require 'aws-sdk-s3'
 require 'awrence'
 require 'json'
 require 'base64'
@@ -23,7 +23,6 @@
 require 'iglu-client'
 require 'securerandom'
 require 'tempfile'
-require 'fog'
 
 # Ruby class to execute Snowplow's Hive jobs against Amazon EMR
 # using Elasticity (https://github.com/rslifka/elasticity).
@@ -55,6 +54,7 @@ class EmrJob
 
       include Monitoring::Logging
       include Snowplow::EmrEtlRunner::Utils
+      include Snowplow::EmrEtlRunner::S3
 
       # Initializes our wrapper for the Amazon EMR client.
       Contract Bool, Bool, Bool, Bool, Bool, Bool, Bool, Bool, ArchiveStep, ArchiveStep, ConfigHash, ArrayOf[String], String, TargetsHash, RdbLoaderSteps => EmrJob
@@ -83,10 +83,10 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
         etl_tstamp = (run_tstamp.to_f * 1000).to_i.to_s
         output_codec = output_codec_from_compression_format(config.dig(:enrich, :output_compression))
 
-        s3 = Sluice::Storage::S3::new_fog_s3_from(
-          config[:aws][:s3][:region],
-          config[:aws][:access_key_id],
-          config[:aws][:secret_access_key])
+        s3 = Aws::S3::Client.new(
+          :access_key_id => config[:aws][:access_key_id],
+          :secret_access_key => config[:aws][:secret_access_key],
+          :region => config[:aws][:s3][:region])
 
         ami_version = Gem::Version.new(config[:aws][:emr][:ami_version])
 
@@ -137,18 +137,15 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
 
         # staging
         if staging
-          csbr_processing_loc = Sluice::Storage::S3::Location.new(csbr[:processing])
-          unless Sluice::Storage::S3::is_empty?(s3, csbr_processing_loc)
-            raise DirectoryNotEmptyError, "Cannot safely add staging step to jobflow, #{csbr_processing_loc} is not empty"
+          unless empty?(s3, csbr[:processing])
+            raise DirectoryNotEmptyError, "Cannot safely add staging step to jobflow, #{csbr[:processing]} is not empty"
           end
 
           src_pattern = collector_format == 'clj-tomcat' ? '.*localhost\_access\_log.*\.txt.*' : '.+'
           src_pattern_regex = Regexp.new src_pattern
           non_empty_locs = csbr[:in].select { |l|
-            loc = Sluice::Storage::S3::Location.new(l)
-            files = Sluice::Storage::S3::list_files(s3, loc)
-              .select { |f| !(f.key =~ src_pattern_regex).nil? }
-            files.length > 0
+            not empty?(s3, l,
+              lambda { |k| !(k =~ /\/$/) and !(k =~ /\$folder\$$/) and !(k =~ src_pattern_regex).nil? })
           }
 
           if non_empty_locs.empty?
@@ -369,9 +366,8 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
             end
 
           # Late check whether our enrichment directory is empty. We do an early check too
-          csbe_good_loc = Sluice::Storage::S3::Location.new(csbe[:good])
-          unless Sluice::Storage::S3::is_empty?(s3, csbe_good_loc)
-            raise DirectoryNotEmptyError, "Cannot safely add enrichment step to jobflow, #{csbe_good_loc} is not empty"
+          unless empty?(s3, csbe[:good])
+            raise DirectoryNotEmptyError, "Cannot safely add enrichment step to jobflow, #{csbe[:good]} is not empty"
           end
           @jobflow.add_step(enrich_step)
 
@@ -399,16 +395,12 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
 
         # Staging data produced by Stream Enrich
         if staging_stream_enrich
-          csbe_good_loc = Sluice::Storage::S3::Location.new(csbe[:good])
-          unless Sluice::Storage::S3::is_empty?(s3, csbe_good_loc)
-            raise DirectoryNotEmptyError, "Cannot safely add stream staging step to jobflow, #{csbe_good_loc} is not empty"
+          unless empty?(s3, csbe[:good])
+            raise DirectoryNotEmptyError, "Cannot safely add stream staging step to jobflow, #{csbe[:good]} is not empty"
           end
 
-          stream_enrich_loc = Sluice::Storage::S3::Location.new(csbe[:stream])
-
           src_pattern_regex = Regexp.new STREAM_ENRICH_REGEXP
-          files = Sluice::Storage::S3::list_files(s3, stream_enrich_loc).select { |f| !(f.key =~ src_pattern_regex).nil? }
-          if files.empty?
+          if empty?(s3, csbe[:stream], lambda { |k| !(k =~ /\/$/) and !(k =~ /\$folder\$$/) and !(k =~ src_pattern_regex).nil? })
             raise NoDataToProcessError, "No Snowplow enriched stream logs to process since last run"
           end
 
@@ -431,11 +423,11 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
 
           # Add processing manifest if available
           processing_manifest = get_processing_manifest(targets)
-          processing_manifest_shred_args = 
-            if not processing_manifest.nil? 
+          processing_manifest_shred_args =
+            if not processing_manifest.nil?
               if Gem::Version.new(config[:storage][:versions][:rdb_shredder]) >= SHRED_JOB_WITH_PROCESSING_MANIFEST
                 { 'processing-manifest-table' => processing_manifest, 'item-id' => shred_final_output }
-              else 
+              else
                 {}
               end
             else
@@ -457,7 +449,6 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
               "--outputCodec", "none",
               "--s3Endpoint" , s3_endpoint
             ]
-
             copy_to_hdfs_step.name << ": Enriched S3 -> HDFS"
             @jobflow.add_step(copy_to_hdfs_step)
           end
@@ -496,9 +487,8 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
             end
 
           # Late check whether our target directory is empty
-          csbs_good_loc = Sluice::Storage::S3::Location.new(csbs[:good])
-          unless Sluice::Storage::S3::is_empty?(s3, csbs_good_loc)
-            raise DirectoryNotEmptyError, "Cannot safely add shredding step to jobflow, #{csbs_good_loc} is not empty"
+          unless empty?(s3, csbs[:good])
+            raise DirectoryNotEmptyError, "Cannot safely add shredding step to jobflow, #{csbs[:good]} is not empty"
           end
           @jobflow.add_step(shred_step)
 
@@ -674,23 +664,26 @@ def run(config)
       Contract String, String, String, String => nil
       def output_rdb_loader_logs(region, aws_access_key_id, aws_secret_key, log_level)
 
-        s3 = Sluice::Storage::S3::new_fog_s3_from(region, aws_access_key_id, aws_secret_key)
-
-        loc = Sluice::Storage::S3::Location.new(@rdb_loader_log_base)
+        s3 = Aws::S3::Client.new(
+          :access_key_id => aws_access_key_id,
+          :secret_access_key => aws_secret_key,
+          :region => region)
 
-        if @rdb_loader_logs.empty? or Sluice::Storage::S3::is_empty?(s3, loc)
+        if @rdb_loader_logs.empty? or empty?(s3, @rdb_loader_log_base)
           logger.info "No RDB Loader logs"
         else
           logger.info "RDB Loader logs"
 
           @rdb_loader_logs.each do |l|
             tmp = Tempfile.new("rdbloader")
-            uri = URI.parse(l[1])
-            bucket, key = uri.host, uri.path[1..-1]
-            logger.debug "Downloading #{uri} to #{tmp.path}"
+            bucket, key = parse_bucket_prefix(l[1])
+            logger.debug "Downloading #{l[1]} to #{tmp.path}"
             begin
-              log = s3.directories.get(bucket).files.head(key)
-              Sluice::Storage::S3::download_file(s3, log, tmp)
+              s3.get_object({
+                response_target: tmp,
+                bucket: bucket,
+                key: key,
+              })
               if log_level == 'info'
                 logger.info l[0]
                 logger.info tmp.read
@@ -774,22 +767,15 @@ def get_rdb_loader_steps(config, targets, resolver, jar, rdbloader_steps, skip_m
       # +s3+:: AWS S3 client
       # +s3_path+:: Full S3 path to folder
       def get_latest_run_id(s3, s3_path)
-        uri = URI.parse(s3_path)
-        folders = s3.directories.get(uri.host, delimiter: '/', prefix: uri.path[1..-1]).files.common_prefixes
-        # each is mandatory, otherwise there'll be pagination issues if there are > 1k objects
-        # cf snowplow/snowplow#3434
-        run_folders = []
-        folders.each { |f|
-          if f.include?('run=')
-            run_folders << f
-          end
-        }
-        begin
-          folder = run_folders[-1].split('/')[-1]
-          folder.slice('run='.length, folder.length)
-        rescue NoMethodError => _
+        run_id_regex = /.*\/run=((\d|-)+)\/.*/
+        folders = list_object_names(s3, s3_path,
+            lambda { |k| !(k =~ /\$folder\$$/) and !k[run_id_regex, 1].nil? })
+          .map { |k| k[run_id_regex, 1] }
+        if folders.empty?
           logger.error "No run folders in [#{s3_path}] found"
           raise UnexpectedStateError, "No run folders in [#{s3_path}] found"
+        else
+          folders.first
         end
       end
 
@@ -802,7 +788,7 @@ def get_latest_run_id(s3, s3_path)
       # +name+:: step description to show in EMR console
       #
       # Returns a step ready for adding to the Elasticity Jobflow.
-      Contract String, String, String, String, String => Elasticity::S3DistCpStep
+      Contract String, String, String, String, String, Bool => Elasticity::S3DistCpStep
       def get_archive_step(good_path, archive_path, run_id_folder, s3_endpoint, name)
         archive_step = Elasticity::S3DistCpStep.new(legacy = @legacy)
         archive_step.arguments = [
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/s3.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/s3.rb
new file mode 100644
index 0000000000..6877436782
--- /dev/null
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/s3.rb
@@ -0,0 +1,101 @@
+# Copyright (c) 2012-2018 Snowplow Analytics Ltd. All rights reserved.
+#
+# This program is licensed to you under the Apache License Version 2.0,
+# and you may not use this file except in compliance with the Apache License Version 2.0.
+# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the Apache License Version 2.0 is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
+
+# Author::    Ben Fradet (mailto:support@snowplowanalytics.com)
+# Copyright:: Copyright (c) 2012-2018 Snowplow Analytics Ltd
+# License::   Apache License Version 2.0
+
+require 'aws-sdk-s3'
+require 'contracts'
+require 'pathname'
+require 'uri'
+
+module Snowplow
+  module EmrEtlRunner
+    module S3
+
+      include Contracts
+
+      # Check a location on S3 is empty.
+      #
+      # Parameters:
+      # +client+:: S3 client
+      # +location+:: S3 url of the folder to check for emptiness
+      # +key_filter+:: filter to apply on the keys, filters folders and $folder$ files by default
+      def empty?(client, location,
+          key_filter = lambda { |k| !(k =~ /\/$/) and !(k =~ /\$folder\$$/) })
+        bucket, prefix = parse_bucket_prefix(location)
+        empty_impl(client, bucket, prefix, key_filter)
+      end
+
+      # List all object names satisfying a key filter.
+      #
+      # Parameters:
+      # +client+:: S3 client
+      # +location+:: S3 url of the folder to list the object names for
+      # +key_filter+:: filter to apply on the keys
+      def list_object_names(client, location, key_filter)
+        bucket, prefix = parse_bucket_prefix(location)
+        list_object_names_impl(client, bucket, prefix, key_filter)
+      end
+
+      # Extract the bucket and prefix from an S3 url.
+      #
+      # Parameters:
+      # +location+:: the S3 url to parse
+      Contract String => [String, String]
+      def parse_bucket_prefix(location)
+        u = URI.parse(location)
+        return u.host, u.path[1..-1]
+      end
+
+    private
+
+      def list_object_names_impl(client, bucket, prefix, key_filter, max_keys = 50, token = nil)
+        response = list_objects(client, bucket, prefix, max_keys, token)
+        filtered = response.contents
+          .select { |c| key_filter[c.key] }
+          .map { |c| c.key }
+        if response.is_truncated
+          filtered + list_object_names_impl(
+            client, bucket, prefix, key_filter, max_keys, response.next_continuation_token)
+        else
+          filtered
+        end
+      end
+
+      def empty_impl(client, bucket, prefix, key_filter, max_keys = 50, token = nil)
+        response = list_objects(client, bucket, prefix, max_keys, token)
+        filtered = response.contents.select { |c| key_filter[c.key] }
+        if filtered.empty?
+          if response.is_truncated
+            empty_impl(client, bucket, prefix, key_filter, max_keys, response.next_continuation_token)
+          else
+            true
+          end
+        else
+          false
+        end
+      end
+
+      def list_objects(client, bucket, prefix, max_keys, token)
+        options = {
+          bucket: bucket,
+          prefix: prefix,
+          max_keys: max_keys,
+        }
+        options[:continuation_token] = token if !token.nil?
+        client.list_objects_v2(options)
+      end
+
+    end
+  end
+end
diff --git a/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/s3_spec.rb b/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/s3_spec.rb
new file mode 100644
index 0000000000..80c4bb482d
--- /dev/null
+++ b/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/s3_spec.rb
@@ -0,0 +1,81 @@
+# Copyright (c) 2012-2018 Snowplow Analytics Ltd. All rights reserved.
+#
+# This program is licensed to you under the Apache License Version 2.0,
+# and you may not use this file except in compliance with the Apache License Version 2.0.
+# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the Apache License Version 2.0 is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
+
+# Author::    Ben Fradet (mailto:support@snowplowanalytics.com)
+# Copyright:: Copyright (c) 2012-2018 Snowplow Analytics Ltd
+# License::   Apache License Version 2.0
+
+require 'aws-sdk-s3'
+require 'spec_helper'
+
+S3 = Snowplow::EmrEtlRunner::S3
+
+describe S3 do
+  subject { Object.new.extend described_class }
+
+  s3 = Aws::S3::Client.new(stub_responses: true)
+
+  describe '#empty?' do
+    it 'should take a client and location argument' do
+      expect(subject).to respond_to(:empty?).with(2).argument
+    end
+
+    it 'should take a client, a location and a filter argument' do
+      expect(subject).to respond_to(:empty?).with(3).argument
+    end
+
+    it 'should check a folder on S3 is empty' do
+      s3.stub_responses(:list_objects_v2,
+        { contents: [{ key: 'prefix/example1.jpg' }, { key: 'prefix/example2.jpg' }]})
+      expect(subject.empty?(s3, 's3://bucket/prefix')).to eq(false)
+    end
+
+    it 'should filter folders by default' do
+      s3.stub_responses(:list_objects_v2, { contents: [{ key: 'prefix/folder/' }]})
+      expect(subject.empty?(s3, 's3://bucket/prefix')).to eq(true)
+    end
+
+    it 'should filter $folder$ files by default' do
+      s3.stub_responses(:list_objects_v2, { contents: [{ key: 'prefix/something_$folder$' }]})
+      expect(subject.empty?(s3, 's3://bucket/prefix')).to eq(true)
+    end
+
+    it 'should support custom filters' do
+      s3.stub_responses(:list_objects_v2, { contents: [{ key: 'abc' }]})
+      expect(subject.empty?(s3, 's3://bucket/prefix', lambda { |k| k.length == 3})).to eq(false)
+    end
+  end
+
+  describe '#list_object_names' do
+    it 'should take a client, a location and a filter argument' do
+      expect(subject).to respond_to(:list_object_names).with(3).argument
+    end
+
+    it 'should filter file names based on the filter' do
+      s3.stub_responses(:list_objects_v2, { contents: [{ key: 'abc' }, { key: 'defg' }]})
+      expect(subject.list_object_names(s3, 's3://bucket/prefix', lambda { |k| k.length == 3}))
+        .to eq(['abc'])
+    end
+
+  end
+
+  describe '#parse_bucket_prefix' do
+    it 'should take a s3 url argument' do
+      expect(subject).to respond_to(:parse_bucket_prefix).with(1).argument
+    end
+
+    it 'should parse the bucket and prefix' do
+      expect(subject.parse_bucket_prefix('s3://bucket/prefix')).to eq(['bucket', 'prefix'])
+      expect(subject.parse_bucket_prefix('s3://bucket/prefix/file.jpg')).to eq(
+        ['bucket', 'prefix/file.jpg'])
+    end
+  end
+end

From b8d09def1c81b1a3bb326cc968e57042dea85e95 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 6 Jun 2018 13:05:16 +0200
Subject: [PATCH 11/19] EmrEtlRunner: make protocol in Snowplow monitoring
 configurable (closes #3791)

---
 3-enrich/emr-etl-runner/config/config.yml.sample      |  1 +
 .../emr-etl-runner/config/stream_config.yml.sample    |  1 +
 .../snowplow-emr-etl-runner/monitoring/snowplow.rb    | 11 ++++++++---
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/3-enrich/emr-etl-runner/config/config.yml.sample b/3-enrich/emr-etl-runner/config/config.yml.sample
index f2bb962e9a..dd071d2586 100644
--- a/3-enrich/emr-etl-runner/config/config.yml.sample
+++ b/3-enrich/emr-etl-runner/config/config.yml.sample
@@ -75,5 +75,6 @@ monitoring:
     level: DEBUG # You can optionally switch to INFO for production
   snowplow:
     method: get
+    protocol: http
     app_id: ADD HERE # e.g. snowplow
     collector: ADD HERE # e.g. d3rkrsqld9gmqf.cloudfront.net
diff --git a/3-enrich/emr-etl-runner/config/stream_config.yml.sample b/3-enrich/emr-etl-runner/config/stream_config.yml.sample
index ac76d3f6a4..bf30c53c87 100644
--- a/3-enrich/emr-etl-runner/config/stream_config.yml.sample
+++ b/3-enrich/emr-etl-runner/config/stream_config.yml.sample
@@ -67,3 +67,4 @@ monitoring:
     method: get
     app_id: ADD HERE # e.g. snowplow
     collector: ADD HERE # e.g. d3rkrsqld9gmqf.cloudfront.net
+    protocol: http
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/monitoring/snowplow.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/monitoring/snowplow.rb
index 8717cbb439..94829d3165 100644
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/monitoring/snowplow.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/monitoring/snowplow.rb
@@ -28,7 +28,6 @@ class Snowplow
         include Contracts
 
         # Constants
-        PROTOCOL = "http"
         PORT = 80
         BUFFER_SIZE = 0
         APPLICATION_CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.monitoring.batch/application_context/jsonschema/1-0-0"
@@ -40,6 +39,7 @@ class Snowplow
 
         # Parameters
         @@method = "get"
+        @@protocol = "http"
         @@collector_uri = nil
         @@app_id = nil
 
@@ -51,9 +51,14 @@ def self.parameterize(config)
           cm = config[:monitoring]
           cms = cm[:snowplow]
           @@method = cms[:method].downcase || @@method
+          @@protocol = if not cms[:protocol].nil?
+            cms[:protocol].downcase
+          else
+            @@protocol
+          end
           @@collector_uri = cms[:collector] # Could be nil
           @@app_id = cms[:app_id] # Could be nil
-          
+
           @@app_context = SnowplowTracker::SelfDescribingJson.new(APPLICATION_CONTEXT_SCHEMA, {
             :name => NAME,
             :version => VERSION,
@@ -70,7 +75,7 @@ def initialize
           @tracker =
             if @@collector_uri
               emitter = SnowplowTracker::Emitter.new(@@collector_uri, {
-                :protocol => PROTOCOL,
+                :protocol => @@protocol,
                 :method => @@method,
                 :port => PORT,
                 :buffer_size => BUFFER_SIZE

From f708724df99b599013ad22c99ddd8b36c42cc61d Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 6 Jun 2018 13:06:29 +0200
Subject: [PATCH 12/19] EmrEtlRunner: make port in Snowplow monitoring
 configurable (closes #3236)

---
 3-enrich/emr-etl-runner/config/config.yml.sample             | 1 +
 3-enrich/emr-etl-runner/config/stream_config.yml.sample      | 1 +
 .../lib/snowplow-emr-etl-runner/monitoring/snowplow.rb       | 5 +++--
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/3-enrich/emr-etl-runner/config/config.yml.sample b/3-enrich/emr-etl-runner/config/config.yml.sample
index dd071d2586..64eb75aca6 100644
--- a/3-enrich/emr-etl-runner/config/config.yml.sample
+++ b/3-enrich/emr-etl-runner/config/config.yml.sample
@@ -76,5 +76,6 @@ monitoring:
   snowplow:
     method: get
     protocol: http
+    port: 80
     app_id: ADD HERE # e.g. snowplow
     collector: ADD HERE # e.g. d3rkrsqld9gmqf.cloudfront.net
diff --git a/3-enrich/emr-etl-runner/config/stream_config.yml.sample b/3-enrich/emr-etl-runner/config/stream_config.yml.sample
index bf30c53c87..f1958e3f98 100644
--- a/3-enrich/emr-etl-runner/config/stream_config.yml.sample
+++ b/3-enrich/emr-etl-runner/config/stream_config.yml.sample
@@ -68,3 +68,4 @@ monitoring:
     app_id: ADD HERE # e.g. snowplow
     collector: ADD HERE # e.g. d3rkrsqld9gmqf.cloudfront.net
     protocol: http
+    port: 80
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/monitoring/snowplow.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/monitoring/snowplow.rb
index 94829d3165..d7f52c6020 100644
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/monitoring/snowplow.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/monitoring/snowplow.rb
@@ -28,7 +28,6 @@ class Snowplow
         include Contracts
 
         # Constants
-        PORT = 80
         BUFFER_SIZE = 0
         APPLICATION_CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.monitoring.batch/application_context/jsonschema/1-0-0"
         JOB_STATUS_SCHEMA = "iglu:com.snowplowanalytics.monitoring.batch/emr_job_status/jsonschema/1-0-0"
@@ -40,6 +39,7 @@ class Snowplow
         # Parameters
         @@method = "get"
         @@protocol = "http"
+        @@port = 80
         @@collector_uri = nil
         @@app_id = nil
 
@@ -56,6 +56,7 @@ def self.parameterize(config)
           else
             @@protocol
           end
+          @@port = cms[:port] || @@port
           @@collector_uri = cms[:collector] # Could be nil
           @@app_id = cms[:app_id] # Could be nil
 
@@ -77,7 +78,7 @@ def initialize
               emitter = SnowplowTracker::Emitter.new(@@collector_uri, {
                 :protocol => @@protocol,
                 :method => @@method,
-                :port => PORT,
+                :port => @@port,
                 :buffer_size => BUFFER_SIZE
               })
 

From f8c1574e876c7a97fb7facb4253a753f1e6a2737 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 6 Jun 2018 15:10:20 +0200
Subject: [PATCH 13/19] EmrEtlRunner: add --ignore-lock-on-start option (closes
 #3537)

---
 .../emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb    | 9 ++++++---
 .../emr-etl-runner/lib/snowplow-emr-etl-runner/runner.rb | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb
index 10f317a086..6d75acc339 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb
@@ -56,7 +56,8 @@ def self.get_args_config_enrichments_resolver
         options = {
           :debug => false,
           :skip => [],
-          :include => []
+          :include => [],
+          :ignore_lock_on_start => false,
         }
 
         commands = {
@@ -72,6 +73,7 @@ def self.get_args_config_enrichments_resolver
             opts.on('-x', "--skip {#{SKIPPABLES.to_a.join(',')}}", Array, 'skip the specified step(s)') { |config| options[:skip] = config }
             opts.on('-i', "--include {#{INCLUDES.to_a.join(',')}}", Array, 'include additional step(s)') { |config| options[:include] = config }
             opts.on('-l', '--lock PATH', 'where to store the lock') { |config| options[:lock] = config }
+            opts.on('--ignore-lock-on-start', 'ignore the lock if it is set when starting') { |config| options[:ignore_lock_on_start] = true }
             opts.on('--consul ADDRESS', 'address to the Consul server') { |config| options[:consul] = config }
           end,
           'generate emr-config' => OptionParser.new do |opts|
@@ -183,6 +185,7 @@ def self.process_options(options, optparse, cmd_name)
           :resume_from => options[:resume_from],
           :include => options[:include],
           :lock => options[:lock],
+          :ignore_lock_on_start => options[:ignore_lock_on_start],
           :consul => options[:consul]
         }
 
@@ -267,7 +270,7 @@ def self.load_targets(targets_path)
           Dir.entries(targets_path).select do |f|
             f.end_with?('.json')
           end.map do |f|
-            json = JSON.parse(File.read(targets_path + '/' + f), {:symbolize_names => true}) 
+            json = JSON.parse(File.read(targets_path + '/' + f), {:symbolize_names => true})
             id = json.dig(:data, :id)
             unless id.nil?
               ids.push(id)
@@ -322,7 +325,7 @@ def self.validate_and_coalesce(args, config)
           if args[:resume_from] == "enrich"
             raise ConfigError, 'cannot resume from enrich in stream enrich mode'
           end
-          if args[:skip].include?('staging') || args[:skip].include?('enrich') 
+          if args[:skip].include?('staging') || args[:skip].include?('enrich')
             raise ConfigError, 'cannot skip staging nor enrich in stream enrich mode. Either skip staging_stream_enrich or resume from shred'
           end
           if args[:skip].include?('archive_raw') || args[:resume_from] == "archive_raw"
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/runner.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/runner.rb
index b998281e39..612f86762c 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/runner.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/runner.rb
@@ -39,7 +39,7 @@ def self.get_steps(skips, resume, enriched_stream)
             not skips.include?('shred')),
           :es => ((resume.nil? or [ 'enrich', 'shred', 'elasticsearch' ].include?(resume)) and
             not skips.include?('elasticsearch')),
-          :archive_raw => (enriched_stream.nil? and (resume.nil? or [ 'enrich', 'shred', 'elasticsearch', 'archive_raw' ].include?(resume)) and 
+          :archive_raw => (enriched_stream.nil? and (resume.nil? or [ 'enrich', 'shred', 'elasticsearch', 'archive_raw' ].include?(resume)) and
             not skips.include?('archive_raw')),
           :rdb_load => ((resume.nil? or [ 'enrich', 'shred', 'elasticsearch', 'archive_raw', 'rdb_load' ].include?(resume)) and
             not skips.include?('rdb_load')),
@@ -122,7 +122,7 @@ def run
         end
 
         lock = get_lock(@args[:lock], @args[:consul])
-        if not lock.nil?
+        if not lock.nil? and not @args[:ignore_lock_on_start]
           lock.try_lock
         end
 

From 156643074a9e68fa1d22cb5da7a07ad81e2efa41 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Fri, 8 Jun 2018 17:50:54 +0200
Subject: [PATCH 14/19] EmrEtlRunner: handle SSE-S3 encrypted S3 buckets
 (closes #3456)

---
 .../emr-etl-runner/config/config.yml.sample   |  1 +
 .../config/stream_config.yml.sample           |  1 +
 .../lib/snowplow-emr-etl-runner/contracts.rb  |  4 +-
 .../lib/snowplow-emr-etl-runner/emr_job.rb    | 42 ++++++++++++++++---
 .../resources/sparse_config.yml               |  1 +
 .../resources/stream_config.yml               |  1 +
 6 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/3-enrich/emr-etl-runner/config/config.yml.sample b/3-enrich/emr-etl-runner/config/config.yml.sample
index 64eb75aca6..35fbcfbb52 100644
--- a/3-enrich/emr-etl-runner/config/config.yml.sample
+++ b/3-enrich/emr-etl-runner/config/config.yml.sample
@@ -8,6 +8,7 @@ aws:
       assets: s3://snowplow-hosted-assets # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket
       jsonpath_assets: # If you have defined your own JSON Schemas, add the s3:// path to your own JSON Path files in your own bucket here
       log: ADD HERE
+      encrypted: false # Whether the buckets below are enrcrypted using server side encryption (SSE-S3)
       raw:
         in:                  # This is a YAML array of one or more in buckets - you MUST use hyphens before each entry in the array, as below
           - ADD HERE         # e.g. s3://my-old-collector-bucket
diff --git a/3-enrich/emr-etl-runner/config/stream_config.yml.sample b/3-enrich/emr-etl-runner/config/stream_config.yml.sample
index f1958e3f98..d8c9043745 100644
--- a/3-enrich/emr-etl-runner/config/stream_config.yml.sample
+++ b/3-enrich/emr-etl-runner/config/stream_config.yml.sample
@@ -8,6 +8,7 @@ aws:
       assets: s3://snowplow-hosted-assets # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket
       jsonpath_assets: # If you have defined your own JSON Schemas, add the s3:// path to your own JSON Path files in your own bucket here
       log: ADD HERE
+      encrypted: false
       enriched:
         good: ADD HERE       # e.g. s3://my-out-bucket/enriched/good
         archive: ADD HERE    # Where to archive enriched events to, e.g. s3://my-archive-bucket/enriched
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb
index 30ac6fbc69..c474636280 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb
@@ -42,7 +42,8 @@ module EmrEtlRunner
       :skip => Maybe[ArrayOf[String]],
       :include => Maybe[ArrayOf[String]],
       :lock => Maybe[String],
-      :consul => Maybe[String]
+      :consul => Maybe[String],
+      :ignore_lock_on_start => Maybe[Bool]
       })
 
     # The Hash containing the buckets field from the configuration YAML
@@ -50,6 +51,7 @@ module EmrEtlRunner
       :assets => String,
       :jsonpath_assets => Maybe[String],
       :log => String,
+      :encrypted => Bool,
       :raw => Maybe[({
         :in => ArrayOf[String],
         :processing => String,
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
index d36d8c15a5..2557346d41 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
@@ -82,6 +82,7 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
         @rdb_loader_logs = []   # pairs of target name and associated log
         etl_tstamp = (run_tstamp.to_f * 1000).to_i.to_s
         output_codec = output_codec_from_compression_format(config.dig(:enrich, :output_compression))
+        encrypted = config[:aws][:s3][:buckets][:encrypted]
 
         s3 = Aws::S3::Client.new(
           :access_key_id => config[:aws][:access_key_id],
@@ -163,6 +164,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
               if collector_format == 'clj-tomcat'
                 staging_step.arguments = staging_step.arguments + [ '--groupBy', '.*/_*(.+)' ]
               end
+              if encrypted
+                staging_step.arguments = staging_step.arguments + [ '--s3ServerSideEncryption' ]
+              end
               staging_step.name << ": Raw #{l} -> Raw Staging S3"
               @jobflow.add_step(staging_step)
             }
@@ -318,6 +322,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
           if collector_format == "clj-tomcat" then
             compact_to_hdfs_step.arguments << "--outputCodec" << "none"
           end
+          if encrypted
+            compact_to_hdfs_step.arguments = compact_to_hdfs_step.arguments + [ '--s3ServerSideEncryption' ]
+          end
           compact_to_hdfs_step.name << ": Raw S3 -> Raw HDFS"
           @jobflow.add_step(compact_to_hdfs_step)
 
@@ -379,6 +386,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
             "--srcPattern" , PARTFILE_REGEXP,
             "--s3Endpoint" , s3_endpoint
           ] + output_codec
+          if encrypted
+            copy_to_s3_step.arguments = copy_to_s3_step.arguments + [ '--s3ServerSideEncryption' ]
+          end
           copy_to_s3_step.name << ": Enriched HDFS -> S3"
           @jobflow.add_step(copy_to_s3_step)
 
@@ -389,6 +399,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
             "--srcPattern" , SUCCESS_REGEXP,
             "--s3Endpoint" , s3_endpoint
           ]
+          if encrypted
+            copy_success_file_step.arguments = copy_success_file_step.arguments + [ '--s3ServerSideEncryption' ]
+          end
           copy_success_file_step.name << ": Enriched HDFS _SUCCESS -> S3"
           @jobflow.add_step(copy_success_file_step)
         end
@@ -412,6 +425,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
             "--srcPattern" , STREAM_ENRICH_REGEXP,
             "--deleteOnSuccess"
           ]
+          if encrypted
+            staging_step.arguments = staging_step.arguments + [ '--s3ServerSideEncryption' ]
+          end
           staging_step.name << ": Stream Enriched #{csbe[:stream]} -> Enriched Staging S3"
           @jobflow.add_step(staging_step)
         end
@@ -449,6 +465,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
               "--outputCodec", "none",
               "--s3Endpoint" , s3_endpoint
             ]
+            if encrypted
+              copy_to_hdfs_step.arguments = copy_to_hdfs_step.arguments + [ '--s3ServerSideEncryption' ]
+            end
             copy_to_hdfs_step.name << ": Enriched S3 -> HDFS"
             @jobflow.add_step(copy_to_hdfs_step)
           end
@@ -500,6 +519,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
             "--srcPattern" , PARTFILE_REGEXP,
             "--s3Endpoint" , s3_endpoint
           ] + output_codec
+          if encrypted
+            copy_to_s3_step.arguments = copy_to_s3_step.arguments + [ '--s3ServerSideEncryption' ]
+          end
           copy_to_s3_step.name << ": Shredded HDFS -> S3"
           @jobflow.add_step(copy_to_s3_step)
 
@@ -510,6 +532,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
             "--srcPattern" , SUCCESS_REGEXP,
             "--s3Endpoint" , s3_endpoint
           ]
+          if encrypted
+            copy_success_file_step.arguments = copy_success_file_step.arguments + [ '--s3ServerSideEncryption' ]
+          end
           copy_success_file_step.name << ": Shredded HDFS _SUCCESS -> S3"
           @jobflow.add_step(copy_success_file_step)
         end
@@ -529,6 +554,9 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
             "--s3Endpoint" , s3_endpoint,
             "--deleteOnSuccess"
           ]
+          if encrypted
+            archive_raw_step.arguments = archive_raw_step.arguments + [ '--s3ServerSideEncryption' ]
+          end
           archive_raw_step.name << ": Raw Staging S3 -> Raw Archive S3"
           @jobflow.add_step(archive_raw_step)
         end
@@ -542,22 +570,22 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
         end
 
         if archive_enriched == 'pipeline'
-          archive_enriched_step = get_archive_step(csbe[:good], csbe[:archive], run_id, s3_endpoint, ": Enriched S3 -> Enriched Archive S3")
+          archive_enriched_step = get_archive_step(csbe[:good], csbe[:archive], run_id, s3_endpoint, ": Enriched S3 -> Enriched Archive S3", encrypted)
           @jobflow.add_step(archive_enriched_step)
         elsif archive_enriched == 'recover'
           latest_run_id = get_latest_run_id(s3, csbe[:good])
-          archive_enriched_step = get_archive_step(csbe[:good], csbe[:archive], latest_run_id, s3_endpoint, ': Enriched S3 -> S3 Enriched Archive')
+          archive_enriched_step = get_archive_step(csbe[:good], csbe[:archive], latest_run_id, s3_endpoint, ': Enriched S3 -> S3 Enriched Archive', encrypted)
           @jobflow.add_step(archive_enriched_step)
         else    # skip
           nil
         end
 
         if archive_shredded == 'pipeline'
-          archive_shredded_step = get_archive_step(csbs[:good], csbs[:archive], run_id, s3_endpoint, ": Shredded S3 -> Shredded Archive S3")
+          archive_shredded_step = get_archive_step(csbs[:good], csbs[:archive], run_id, s3_endpoint, ": Shredded S3 -> Shredded Archive S3", encrypted)
           @jobflow.add_step(archive_shredded_step)
         elsif archive_shredded == 'recover'
           latest_run_id = get_latest_run_id(s3, csbs[:good])
-          archive_shredded_step = get_archive_step(csbs[:good], csbs[:archive], latest_run_id, s3_endpoint, ": Shredded S3 -> S3 Shredded Archive")
+          archive_shredded_step = get_archive_step(csbs[:good], csbs[:archive], latest_run_id, s3_endpoint, ": Shredded S3 -> S3 Shredded Archive", encrypted)
           @jobflow.add_step(archive_shredded_step)
         else    # skip
           nil
@@ -786,10 +814,11 @@ def get_latest_run_id(s3, s3_path)
       # +archive_path+:: enriched:archive or shredded:archive full S3 path
       # +run_id_folder+:: run id foler name (2017-05-10-02-45-30, without `=run`)
       # +name+:: step description to show in EMR console
+      # +encrypted+:: whether the destination bucket is encrypted
       #
       # Returns a step ready for adding to the Elasticity Jobflow.
       Contract String, String, String, String, String, Bool => Elasticity::S3DistCpStep
-      def get_archive_step(good_path, archive_path, run_id_folder, s3_endpoint, name)
+      def get_archive_step(good_path, archive_path, run_id_folder, s3_endpoint, name, encrypted)
         archive_step = Elasticity::S3DistCpStep.new(legacy = @legacy)
         archive_step.arguments = [
           "--src"        , partition_by_run(good_path, run_id_folder),
@@ -797,6 +826,9 @@ def get_archive_step(good_path, archive_path, run_id_folder, s3_endpoint, name)
           "--s3Endpoint" , s3_endpoint,
           "--deleteOnSuccess"
         ]
+        if encrypted
+          archive_step.arguments = archive_step.arguments + [ '--s3ServerSideEncryption' ]
+        end
         archive_step.name << name
         archive_step
       end
diff --git a/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/resources/sparse_config.yml b/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/resources/sparse_config.yml
index 5ba478efe5..f3beae1bb8 100644
--- a/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/resources/sparse_config.yml
+++ b/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/resources/sparse_config.yml
@@ -9,6 +9,7 @@ aws:
       assets: spha # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket
       jsonpath_assets: # If you have defined your own JSON Schemas, add the s3:// path to your own JSON Path files in your own bucket here
       log: s3://not-a-bucket
+      encrypted: false
       raw:
         in:                  # Multiple in buckets are permitted
           - ri         # e.g. s3://my-archive-bucket/raw
diff --git a/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/resources/stream_config.yml b/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/resources/stream_config.yml
index c124147372..a2d1fc412a 100644
--- a/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/resources/stream_config.yml
+++ b/3-enrich/emr-etl-runner/spec/snowplow-emr-etl-runner/resources/stream_config.yml
@@ -9,6 +9,7 @@ aws:
       assets: spha # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket
       jsonpath_assets: # If you have defined your own JSON Schemas, add the s3:// path to your own JSON Path files in your own bucket here
       log: s3://not-a-bucket
+      encrypted: false
       enriched:
         good: eg       # e.g. s3://my-out-bucket/enriched/good
         archive: ea    # Where to archive enriched events to, e.g. s3://my-out-bucket/enriched/archive

From 8a3b15d93cc60fc8cc2180bb3604ee1140c0d914 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Tue, 12 Jun 2018 18:19:08 +0200
Subject: [PATCH 15/19] EmrEtlRunner: add ability to specify an EMR security
 configuration (closes #3798)

---
 3-enrich/emr-etl-runner/config/config.yml.sample              | 1 +
 3-enrich/emr-etl-runner/config/stream_config.yml.sample       | 1 +
 .../emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb   | 1 +
 .../emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb     | 4 ++++
 4 files changed, 7 insertions(+)

diff --git a/3-enrich/emr-etl-runner/config/config.yml.sample b/3-enrich/emr-etl-runner/config/config.yml.sample
index 35fbcfbb52..5747da50ae 100644
--- a/3-enrich/emr-etl-runner/config/config.yml.sample
+++ b/3-enrich/emr-etl-runner/config/config.yml.sample
@@ -33,6 +33,7 @@ aws:
     placement: ADD HERE     # Set this if not running in VPC. Leave blank otherwise
     ec2_subnet_id: ADD HERE # Set this if running in VPC. Leave blank otherwise
     ec2_key_name: ADD HERE
+    security_configuration: ADD HERE # Specify your EMR security configuration if needed. Leave blank otherwise
     bootstrap: []           # Set this to specify custom boostrap actions. Leave empty otherwise
     software:
       hbase:                # Optional. To launch on cluster, provide version, "0.92.0", keep quotes. Leave empty otherwise.
diff --git a/3-enrich/emr-etl-runner/config/stream_config.yml.sample b/3-enrich/emr-etl-runner/config/stream_config.yml.sample
index d8c9043745..2c3f730597 100644
--- a/3-enrich/emr-etl-runner/config/stream_config.yml.sample
+++ b/3-enrich/emr-etl-runner/config/stream_config.yml.sample
@@ -26,6 +26,7 @@ aws:
     placement: ADD HERE     # Set this if not running in VPC. Leave blank otherwise
     ec2_subnet_id: ADD HERE # Set this if running in VPC. Leave blank otherwise
     ec2_key_name: ADD HERE
+    security_configuration: ADD HERE # Specify your EMR security configuration if needed. Leave blank otherwise
     bootstrap: []           # Set this to specify custom boostrap actions. Leave empty otherwise
     software:
       hbase:                # Optional. To launch on cluster, provide version, "0.92.0", keep quotes. Leave empty otherwise.
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb
index c474636280..d9032b1d77 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb
@@ -97,6 +97,7 @@ module EmrEtlRunner
           :placement => Maybe[String],
           :ec2_subnet_id => Maybe[String],
           :ec2_key_name => String,
+          :security_configuration => Maybe[String],
           :bootstrap => Maybe[ArrayOf[String]],
           :software => ({
             :hbase => Maybe[String],
diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
index 2557346d41..94d06e5638 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
@@ -123,6 +123,10 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
           @jobflow.ec2_subnet_id      = config[:aws][:emr][:ec2_subnet_id]
         end
 
+        unless config[:aws][:emr][:security_configuration].nil?
+          @jobflow.security_configuration = config[:aws][:emr][:security_configuration]
+        end
+
         @jobflow.log_uri              = config[:aws][:s3][:buckets][:log]
         @jobflow.enable_debugging     = debug
         @jobflow.visible_to_all_users = true

From 927a35278e9fe6c59d58a0f25ce76eab9f0e2ea7 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 13 Jun 2018 17:26:20 +0200
Subject: [PATCH 16/19] Clojure Collector: bump to 2.1.0 (closes #3801)

---
 2-collectors/clojure-collector/java-servlet/project.clj         | 2 +-
 .../java-servlet/war-resources/.ebextensions/server.xml         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/2-collectors/clojure-collector/java-servlet/project.clj b/2-collectors/clojure-collector/java-servlet/project.clj
index 6ecfb586b5..07e95e26dc 100644
--- a/2-collectors/clojure-collector/java-servlet/project.clj
+++ b/2-collectors/clojure-collector/java-servlet/project.clj
@@ -13,7 +13,7 @@
 ;;;; Copyright: Copyright (c) 2012-2013 Snowplow Analytics Ltd
 ;;;; License:   Apache License Version 2.0
 
-(defproject snowplow/clojure-collector "2.0.0" ;; MUST also bump version in server.xml
+(defproject snowplow/clojure-collector "2.1.0" ;; MUST also bump version in server.xml
   :license {:name "Apache Version 2.0"
   :url "http://www.apache.org/licenses/LICENSE-2.0"}
   :description "A SnowPlow event collector written in Clojure. AWS Elastic Beanstalk compatible."
diff --git a/2-collectors/clojure-collector/java-servlet/war-resources/.ebextensions/server.xml b/2-collectors/clojure-collector/java-servlet/war-resources/.ebextensions/server.xml
index be895c3f50..f6ac7d5d90 100644
--- a/2-collectors/clojure-collector/java-servlet/war-resources/.ebextensions/server.xml
+++ b/2-collectors/clojure-collector/java-servlet/war-resources/.ebextensions/server.xml
@@ -135,7 +135,7 @@
         <Valve className="com.snowplowanalytics.snowplow.collectors.clojure.SnowplowAccessLogValve" directory="logs"
                prefix="localhost_access_log" suffix=".txt" rotatable="false" requestAttributesEnabled="true"
                encoding="UTF-8"
-               pattern="%{yyyy-MM-dd}t&#9;%{HH:mm:ss}t&#9;-&#9;%b&#9;%a&#9;%m&#9;%h&#9;%U&#9;%s&#9;%{Referer}i&#9;%{User-Agent}I&#9;%q&amp;cv=clj-2.0.0-%v&amp;nuid=%{sp}C&#9;-&#9;-&#9;-&#9;%~&#9;%w" />
+               pattern="%{yyyy-MM-dd}t&#9;%{HH:mm:ss}t&#9;-&#9;%b&#9;%a&#9;%m&#9;%h&#9;%U&#9;%s&#9;%{Referer}i&#9;%{User-Agent}I&#9;%q&amp;cv=clj-2.1.0-%v&amp;nuid=%{sp}C&#9;-&#9;-&#9;-&#9;%~&#9;%w" />
 
 
       </Host>

From 3371cb6efab22bb7c6f3ec2182cb61d28f0181ce Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Wed, 13 Jun 2018 17:28:03 +0200
Subject: [PATCH 17/19] EmrEtlRunner: bump to 0.33.0 (closes #3800)

---
 3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb
index d9db70a10f..0258240221 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner.rb
@@ -36,6 +36,6 @@
 module Snowplow
   module EmrEtlRunner
     NAME    = "snowplow-emr-etl-runner"
-    VERSION = "0.32.0"
+    VERSION = "0.33.0"
   end
 end

From 56656dd0d813af105869f2d1cb45d836d16848bf Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Thu, 14 Jun 2018 15:38:16 +0200
Subject: [PATCH 18/19] EmrEtlRunner: check the processing folder for emptiness
 when resuming from enrich (closes #3803)

---
 .../emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb    | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
index 94d06e5638..6dfe823879 100755
--- a/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
+++ b/3-enrich/emr-etl-runner/lib/snowplow-emr-etl-runner/emr_job.rb
@@ -305,6 +305,11 @@ def initialize(debug, staging, enrich, staging_stream_enrich, shred, es, archive
 
           raw_input = csbr[:processing]
 
+          # When resuming from enrich, we need to check for emptiness of the processing bucket
+          if !staging and empty?(s3, raw_input)
+            raise NoDataToProcessError, "No Snowplow logs in #{raw_input}, can't resume from enrich"
+          end
+
           # for ndjson/urbanairship we can group by everything, just aim for the target size
           group_by = is_ua_ndjson(collector_format) ? ".*\/(\w+)\/.*" : ".*([0-9]+-[0-9]+-[0-9]+)-[0-9]+.*"
 

From 34cd6ca3f298356b23d06756bdeaabb4d3f4c1d3 Mon Sep 17 00:00:00 2001
From: Ben Fradet <benjamin.fradet@gmail.com>
Date: Thu, 14 Jun 2018 15:38:55 +0200
Subject: [PATCH 19/19] Prepared for release

---
 CHANGELOG | 21 +++++++++++++++++++++
 README.md |  2 +-
 VERSION   |  2 +-
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index ecf8541886..b09d274015 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,24 @@
+Release 108 Val Camonica (2018-07-24)
+-------------------------------------
+EmrEtlRunner: bump to 0.33.0 (#3800)
+EmrEtlRunner: add ability to specify an EMR security configuration (#3798)
+EmrEtlRunner: handle SSE-S3 encrypted S3 buckets (#3456)
+EmrEtlRunner: replace Sluice by aws-sdk-s3 (#3524)
+EmrEtlRunner: add --ignore-lock-on-start option (#3537)
+EmrEtlRunner: check the processing folder for emptiness when resuming from enrich (#3803)
+EmrEtlRunner: make port in Snowplow monitoring configurable (#3236)
+EmrEtlRunner: make protocol in Snowplow monitoring configurable (#3791)
+Clojure Collector: bump to 2.1.0 (#3801)
+Clojure Collector: make cookie path configurable (#2739)
+Clojure Collector: do not allow dependencies requiring an HTTP repository (#3559)
+Clojure Collector: bump lein-ring to 0.12.4 (#3783)
+Clojure Collector: bump commons-codec to 1.11 (#3782)
+Clojure Collector: bump metrics-clojure to 2.10.0 (#3781)
+Clojure Collector: bump compojure to 1.6.1 (#3780)
+Clojure Collector: bump clojure to 1.9.0 (#3779)
+Clojure Collector: bump ring to 1.6.3 (#3778)
+Clojure Collector: remove lein-beanstalk (#3784)
+
 Release 107 Trypillia (2018-07-17)
 ----------------------------------
 Enrich: update example config for PII to version 2-0-0 (#3812)
diff --git a/README.md b/README.md
index 1a51536e8f..73ec5f042b 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ limitations under the License.
 [travis-image]: https://travis-ci.org/snowplow/snowplow.png?branch=master
 [travis]: http://travis-ci.org/snowplow/snowplow
 
-[release-image]: https://img.shields.io/badge/release-107_Trypillia-orange.svg?style=flat
+[release-image]: https://img.shields.io/badge/release-108_Val_Camonica-orange.svg?style=flat
 [releases]: https://github.com/snowplow/snowplow/releases
 
 [license-image]: http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat
diff --git a/VERSION b/VERSION
index 9fd6ca9538..d333070a13 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-r107-trypillia
+r108-val-camonica