Skip to content

Commit

Permalink
Merge pull request #3300 from Zak-Kent/pdb-4832-report-parition-index…
Browse files Browse the repository at this point in the history
…-on-id

(PDB-4832) Add report id index in partitions
  • Loading branch information
austb authored Jul 31, 2020
2 parents 5a01bdf + 0b11700 commit fb015c1
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 38 deletions.
2 changes: 1 addition & 1 deletion ext/test/upgrade-and-exit
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,6 @@ psql -U puppetdb puppetdb -c 'select max(version) from schema_migrations;' \
> "$tmpdir/out"
cat "$tmpdir/out"
# This must be updated every time we add a new migration
grep -qE ' 75$' "$tmpdir/out"
grep -qE ' 76$' "$tmpdir/out"

test ! -e "$PDBBOX"/var/mq-migrated
2 changes: 1 addition & 1 deletion resources/ext/cli/delete-reports.erb
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ chown "$pg_user:$pg_user" "$tmp_dir"

# Verify that the PuppetDB schema version it the expected value
# so that we do not incorrectly delete the report data.
expected_schema_ver=75
expected_schema_ver=76
su - "$pg_user" -s /bin/sh -c "$psql_cmd -p $pg_port -d $pdb_db_name -c 'COPY ( SELECT max(version) FROM schema_migrations ) TO STDOUT;' > $tmp_dir/schema_ver"
actual_schema_ver="$(cat "$tmp_dir/schema_ver")"
if test "$actual_schema_ver" -ne $expected_schema_ver; then
Expand Down
15 changes: 13 additions & 2 deletions src/puppetlabs/puppetdb/scf/migrate.clj
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@
[clojure.set :as set]
[clojure.string :as str]
[puppetlabs.puppetdb.scf.storage :as scf]
[puppetlabs.puppetdb.scf.partitioning :as partitioning])
[puppetlabs.puppetdb.scf.partitioning :as partitioning
:refer [get-temporal-partitions]])
(:import [org.postgresql.util PGobject]
[java.time LocalDate ZonedDateTime ZoneId OffsetDateTime]
(java.sql Timestamp)
Expand Down Expand Up @@ -1954,6 +1955,15 @@
(jdbc/do-commands
"ALTER TABLE reports ADD COLUMN report_type text DEFAULT 'agent' NOT NULL"))

(defn add-report-partition-indexes-on-id
[]
(doseq [{:keys [table part] :as huh} (get-temporal-partitions "reports")
:let [idx-name (str "idx_reports_id_" part)]]
(jdbc/do-commands
(format "create unique index if not exists %s on %s using btree (id)"
(jdbc/double-quote idx-name)
(jdbc/double-quote table)))))

(def migrations
"The available migrations, as a map from migration version to migration function."
{00 require-schema-migrations-table
Expand Down Expand Up @@ -2015,7 +2025,8 @@
; or resource events, you also update the delete-reports
; cli command.
74 reports-partitioning
75 add-report-type-to-reports})
75 add-report-type-to-reports
76 add-report-partition-indexes-on-id})

(defn desired-schema-version []
"The newest migration this PuppetDB instance knows about. Anything
Expand Down
87 changes: 57 additions & 30 deletions src/puppetlabs/puppetdb/scf/partitioning.clj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@
(java.time.temporal ChronoUnit)
(java.time.format DateTimeFormatter)))

(defn get-temporal-partitions
"Returns a vector of {:table full-table-name :part partition-key}
values for all the existing partitions associated with the
name-prefix, e.g. request for \"reports\" might produce a vector of
maps like {:table \"reports_20200802z\" :part \"20200802z\"}."
[name-prefix]
;; FIXME: use this in other relevant places.
;; FIXME: restrict to our schema.
(mapv (fn [{:keys [tablename]}]
{:table tablename
:part (subs tablename (inc (count name-prefix)))})
(jdbc/query-to-vec
(str "select tablename from pg_tables where tablename ~ "
(jdbc/single-quote
(str "^" name-prefix "_[0-9]{8}z$"))))))

(defn date-suffix
[date]
(let [formatter (.withZone (DateTimeFormatter/BASIC_ISO_DATE) (ZoneId/of "UTC"))]
Expand Down Expand Up @@ -95,6 +111,12 @@
(format "CREATE UNIQUE INDEX IF NOT EXISTS resource_events_hash_%s ON %s (event_hash)"
iso-week-year full-table-name)])))

;; This var is used in testing to simulate migration 74 being applied without
;; adding the idx_reports_id index to partitions. Changing this behavior in
;; migration 74 should be safe because the index creation is guarded by
;; 'if not exists' in both the changed migration 74 and in the newer 76.
(def ^:dynamic add-report-id-idx? true)

(defn create-reports-partition
"Creates a partition in the reports table"
[date]
Expand All @@ -115,33 +137,38 @@
" FOREIGN KEY (status_id) REFERENCES report_statuses(id) ON DELETE CASCADE")
iso-week-year)])
(fn [full-table-name iso-week-year]
[(format "CREATE INDEX IF NOT EXISTS idx_reports_compound_id_%s ON %s USING btree (producer_timestamp, certname, hash) WHERE (start_time IS NOT NULL)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS idx_reports_noop_pending_%s ON %s USING btree (noop_pending) WHERE (noop_pending = true)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS idx_reports_prod_%s ON %s USING btree (producer_id)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS idx_reports_producer_timestamp_%s ON %s USING btree (producer_timestamp)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS idx_reports_producer_timestamp_by_hour_certname_%s ON %s USING btree (date_trunc('hour'::text, timezone('UTC'::text, producer_timestamp)), producer_timestamp, certname)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_cached_catalog_status_on_fail_%s ON %s USING btree (cached_catalog_status) WHERE (cached_catalog_status = 'on_failure'::text)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_catalog_uuid_idx_%s ON %s USING btree (catalog_uuid)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_certname_idx_%s ON %s USING btree (certname)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_end_time_idx_%s ON %s USING btree (end_time)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_environment_id_idx_%s ON %s USING btree (environment_id)"
iso-week-year full-table-name)
(format "CREATE UNIQUE INDEX IF NOT EXISTS reports_hash_expr_idx_%s ON %s USING btree (encode(hash, 'hex'::text))"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_job_id_idx_%s ON %s USING btree (job_id) WHERE (job_id IS NOT NULL)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_noop_idx_%s ON %s USING btree (noop) WHERE (noop = true)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_status_id_idx_%s ON %s USING btree (status_id)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_tx_uuid_expr_idx_%s ON %s USING btree (((transaction_uuid)::text))"
iso-week-year full-table-name)])))
(let [indexes
[(format "CREATE INDEX IF NOT EXISTS idx_reports_compound_id_%s ON %s USING btree (producer_timestamp, certname, hash) WHERE (start_time IS NOT NULL)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS idx_reports_noop_pending_%s ON %s USING btree (noop_pending) WHERE (noop_pending = true)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS idx_reports_prod_%s ON %s USING btree (producer_id)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS idx_reports_producer_timestamp_%s ON %s USING btree (producer_timestamp)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS idx_reports_producer_timestamp_by_hour_certname_%s ON %s USING btree (date_trunc('hour'::text, timezone('UTC'::text, producer_timestamp)), producer_timestamp, certname)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_cached_catalog_status_on_fail_%s ON %s USING btree (cached_catalog_status) WHERE (cached_catalog_status = 'on_failure'::text)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_catalog_uuid_idx_%s ON %s USING btree (catalog_uuid)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_certname_idx_%s ON %s USING btree (certname)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_end_time_idx_%s ON %s USING btree (end_time)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_environment_id_idx_%s ON %s USING btree (environment_id)"
iso-week-year full-table-name)
(format "CREATE UNIQUE INDEX IF NOT EXISTS reports_hash_expr_idx_%s ON %s USING btree (encode(hash, 'hex'::text))"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_job_id_idx_%s ON %s USING btree (job_id) WHERE (job_id IS NOT NULL)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_noop_idx_%s ON %s USING btree (noop) WHERE (noop = true)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_status_id_idx_%s ON %s USING btree (status_id)"
iso-week-year full-table-name)
(format "CREATE INDEX IF NOT EXISTS reports_tx_uuid_expr_idx_%s ON %s USING btree (((transaction_uuid)::text))"
iso-week-year full-table-name)]]
(if add-report-id-idx?
(conj indexes (format "CREATE UNIQUE INDEX IF NOT EXISTS idx_reports_id_%s ON %s USING btree (id)"
iso-week-year full-table-name))
indexes)))))
52 changes: 51 additions & 1 deletion test/puppetlabs/puppetdb/scf/migrate_partitioning_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,19 @@
:primary? false
:user "pdb_test"}
:same nil}
{:left-only nil
:right-only
{:schema "public"
:table table-name
:index (str "idx_reports_id_" part-name)
:index_keys ["id"]
:type "btree"
:unique? true
:functional? false
:is_partial false
:primary? false
:user "pdb_test"}
:same nil}
{:left-only nil
:right-only {:schema "public"
:table table-name
Expand Down Expand Up @@ -1263,4 +1276,41 @@
:deferrable? "NO"}
:same nil}]))
dates))}
(diff-schema-maps before-migration (schema-info-map *db*))))))
(diff-schema-maps before-migration (schema-info-map *db*))))))

(deftest migration-76-schema-diff
(clear-db-for-testing!)
;; don't add the idx_reports_id index when fast forwarding past migration 74
(binding [partitioning/add-report-id-idx? false]
(fast-forward-to-migration! 75))

(let [before-migration (schema-info-map *db*)
today (ZonedDateTime/now (ZoneId/of "UTC"))
days-range (range -4 4)
dates (map #(.plusDays today %) days-range)
part-names (map #(str/lower-case (partitioning/date-suffix %)) dates)]
(apply-migration-for-testing! 76)

(is (= {:index-diff (into
[]
cat
(map
(fn [part-name]
(let [table-name (str "reports_" part-name)]
[{:left-only nil
:right-only
{:schema "public"
:table table-name
:index (str "idx_reports_id_" part-name)
:index_keys ["id"]
:type "btree"
:unique? true
:functional? false
:is_partial false
:primary? false
:user "pdb_test"}
:same nil}]))
part-names))
:table-diff nil
:constraint-diff nil}
(diff-schema-maps before-migration (schema-info-map *db*))))))
56 changes: 54 additions & 2 deletions test/puppetlabs/puppetdb/scf/migrate_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
[puppetlabs.kitchensink.core :as kitchensink]
[puppetlabs.puppetdb.scf.storage-utils :as sutils
:refer [db-serialize]]
[puppetlabs.puppetdb.testutils :as utils]
[cheshire.core :as json]
[clojure.java.jdbc :as sql]
[puppetlabs.puppetdb.scf.migrate :refer :all]
Expand All @@ -18,7 +19,9 @@
[puppetlabs.kitchensink.core :as ks]
[puppetlabs.puppetdb.testutils.db :refer [*db* with-test-db]]
[puppetlabs.puppetdb.scf.hash :as shash]
[puppetlabs.puppetdb.time :refer [ago days now to-timestamp]])
[puppetlabs.puppetdb.time :refer [ago days now to-timestamp]]
[puppetlabs.puppetdb.scf.partitioning :as part]
[clojure.string :as str])
(:import (java.time ZoneId ZonedDateTime)
(java.sql Timestamp)))

Expand Down Expand Up @@ -1414,7 +1417,15 @@
(is (= 1
(count hashes)))
(is (= expected
(first hashes)))))))
(first hashes)))

(testing "idx_reports_id index present in all partitions"
(let [assert-index-exists (fn [index indexes]
(is (true? (some #(str/includes? % index) indexes))))]
;; check that idx_reports_id is present in all paritions
(dorun (->> (utils/partition-names "reports")
(map utils/table-indexes)
(map (partial assert-index-exists "idx_reports_id"))))))))))

(deftest migration-75-add-report-type-column-with-default
(testing "reports should get default value of 'agent' for report_type"
Expand Down Expand Up @@ -1444,3 +1455,44 @@
(apply-migration-for-testing! 75)
(is (= "agent" (-> (query-to-vec "select * from reports")
first :report_type)))))))

(deftest migration-76-is-a-no-op-if-74-already-added-idx-reports-id
(testing "Index created with new version of migration 74"
(jdbc/with-db-connection *db*
(clear-db-for-testing!)
(let [assert-index-exists (fn [index indexes]
(is (true? (some #(str/includes? % index) indexes))))
;; check that idx_reports_id is present in all paritions
check-idx-reports-id #(dorun
(->>
(utils/partition-names "reports")
(map utils/table-indexes)
(map (partial assert-index-exists "idx_reports_id"))))]
(fast-forward-to-migration! 75)
;; migration 74 should have added the parition indexes
(check-idx-reports-id)

(apply-migration-for-testing! 76)
;; migration 76 should be a no-op
(check-idx-reports-id)))))

(deftest migration-76-adds-report-id-idx-when-not-added-by-migration-74
(testing "All report paritions have idx_reports_id index when old version of 74 applied"
(jdbc/with-db-connection *db*
(clear-db-for-testing!)
;; don't add the idx_reports_id index when fast forwarding past migration 74
(binding [part/add-report-id-idx? false]
(fast-forward-to-migration! 75))
(let [assert-no-index (fn [index indexes]
(is (nil? (some #(str/includes? % index) indexes))))]
;; check that idx_reports_id wasn't added by migration 74
(dorun (->> (utils/partition-names "reports")
(map utils/table-indexes)
(map (partial assert-no-index "idx_reports_id")))))
(apply-migration-for-testing! 76)
(let [assert-index-exists (fn [index indexes]
(is (some #(str/includes? % index) indexes)))]
;; check that idx_reports_id is now present in all paritions
(dorun (->> (utils/partition-names "reports")
(map utils/table-indexes)
(map (partial assert-index-exists "idx_reports_id"))))))))
13 changes: 12 additions & 1 deletion test/puppetlabs/puppetdb/scf/storage_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -1749,7 +1749,18 @@
"COMMIT TRANSACTION")
(store-example-report! report timestamp)
(is (= [{:certname certname}]
(query-to-vec ["SELECT certname FROM reports"]))))
(query-to-vec ["SELECT certname FROM reports"])))

(testing "Index is created in on demand partitions"
(let [assert-index-exists (fn [index indexes]
(is (true? (some #(str/includes? % index) indexes))))

partition (tu/partition-names "reports")]
;; check that idx_reports_id index is present in on demand paritions
(is (= 1 (count partition)))
(dorun (->> partition
(map tu/table-indexes)
(map (partial assert-index-exists "idx_reports_id_")))))))

(deftest-db report-with-event-timestamp
(let [z-report (update-event-timestamps report "2011-01-01T12:00:01Z")
Expand Down
22 changes: 22 additions & 0 deletions test/puppetlabs/puppetdb/testutils.clj
Original file line number Diff line number Diff line change
Expand Up @@ -428,3 +428,25 @@

(def default-timeout-ms
(* 1000 60 5))

(defn partition-names
"Return all partition names given the parent table name"
[table]
(let [inhparent (str "public." table)]
(->> ["SELECT inhrelid::regclass AS child
FROM pg_catalog.pg_inherits
WHERE inhparent = ?::regclass;"
inhparent]
jdbc/query-to-vec
(map :child)
(map #(.toString %)))))

(defn table-indexes
"Return the index definitions for the given table name"
[table]
(->> ["SELECT tablename, indexdef
FROM pg_indexes
WHERE schemaname = 'public' AND tablename = ?;"
table]
jdbc/query-to-vec
(map :indexdef)))

0 comments on commit fb015c1

Please sign in to comment.