Skip to content

Commit

Permalink
dry sql
Browse files Browse the repository at this point in the history
  • Loading branch information
cesara committed Feb 7, 2025
1 parent a32eab6 commit 863767b
Show file tree
Hide file tree
Showing 6 changed files with 305 additions and 602 deletions.
302 changes: 1 addition & 301 deletions apps/zbugs/docker/1gb_upstream/init.sql
Original file line number Diff line number Diff line change
@@ -1,280 +1,3 @@
DROP TABLE IF EXISTS "user",
"issue",
"comment",
"label",
"issueLabel",
"emoji",
"userPref",
"zero.schemaVersions" CASCADE;

-- user

CREATE TABLE "user" (
"id" VARCHAR PRIMARY KEY,
"login" VARCHAR NOT NULL,
"name" VARCHAR,
"avatar" VARCHAR,
"role" VARCHAR DEFAULT 'user' NOT NULL,
"githubID" INTEGER NOT NULL
);

CREATE UNIQUE INDEX user_login_idx ON "user" (login);
CREATE UNIQUE INDEX user_githubid_idx ON "user" ("githubID");

-- issue

CREATE TABLE issue (
"id" VARCHAR PRIMARY KEY,
"shortID" INTEGER GENERATED BY DEFAULT AS IDENTITY (START WITH 3000),
"title" VARCHAR(128) NOT NULL,
"open" BOOLEAN NOT NULL,
"modified" double precision DEFAULT (EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000),
"created" double precision DEFAULT (EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000),
"creatorID" VARCHAR REFERENCES "user"(id) NOT NULL,
"assigneeID" VARCHAR REFERENCES "user"(id),
-- Size chosen because max we currently have in legacy data is ~9KB.
"description" VARCHAR(10240) DEFAULT '',
"visibility" VARCHAR DEFAULT 'public' NOT NULL
);


CREATE OR REPLACE FUNCTION update_modified_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.modified = (EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER issue_set_last_modified
BEFORE INSERT OR UPDATE ON issue
FOR EACH ROW
EXECUTE FUNCTION update_modified_column();

CREATE OR REPLACE FUNCTION issue_set_created_on_insert()
RETURNS TRIGGER AS $$
BEGIN
NEW.created = (EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER issue_set_created_on_insert_trigger
BEFORE INSERT ON issue
FOR EACH ROW
EXECUTE FUNCTION issue_set_created_on_insert();

-- viewState

CREATE TABLE "viewState" (
"userID" VARCHAR REFERENCES "user"(id) ON DELETE CASCADE,
"issueID" VARCHAR REFERENCES issue(id) ON DELETE CASCADE,
"viewed" double precision,
PRIMARY KEY ("userID", "issueID")
);

-- comment

CREATE TABLE comment (
id VARCHAR PRIMARY KEY,
"issueID" VARCHAR REFERENCES issue(id) ON DELETE CASCADE,
"created" double precision,
"body" TEXT NOT NULL,
"creatorID" VARCHAR REFERENCES "user"(id)
);

CREATE OR REPLACE FUNCTION update_issue_modified_time()
RETURNS TRIGGER AS $$
BEGIN
UPDATE issue
SET modified = EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000
WHERE id = NEW."issueID";
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER update_issue_modified_time_on_comment
AFTER INSERT ON comment
FOR EACH ROW
EXECUTE FUNCTION update_issue_modified_time();

CREATE OR REPLACE FUNCTION comment_set_created_on_insert()
RETURNS TRIGGER AS $$
BEGIN
NEW.created = (EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER comment_set_created_on_insert_trigger
BEFORE INSERT ON comment
FOR EACH ROW
EXECUTE FUNCTION comment_set_created_on_insert();

CREATE OR REPLACE FUNCTION validate_comment_body_length()
RETURNS TRIGGER AS $$
BEGIN
IF NEW.body IS NOT NULL THEN
-- The launch post has a special case maxlength of 1024 because trolls
IF NEW."issueID" = 'duuW9Nyj5cTNLlimp9Qje' AND LENGTH(NEW.body) > 1024 THEN
RAISE EXCEPTION 'Column value exceeds maximum allowed length of %', 1024;
END IF;
-- Length chosen because we have some old comments that are ~44KB.
IF LENGTH(NEW.body) > 64*1024 THEN
RAISE EXCEPTION 'Column value exceeds maximum allowed length of %', 64*1024;
END IF;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER check_comment_body_length
BEFORE INSERT OR UPDATE ON comment
FOR EACH ROW
EXECUTE FUNCTION validate_comment_body_length();


-- label

CREATE TABLE label (
"id" VARCHAR PRIMARY KEY,
"name" VARCHAR NOT NULL
);

-- issueLabel

CREATE TABLE "issueLabel" (
"labelID" VARCHAR REFERENCES label(id),
"issueID" VARCHAR REFERENCES issue(id) ON DELETE CASCADE,
PRIMARY KEY ("labelID", "issueID")
);

-- emoji

CREATE TABLE emoji (
"id" VARCHAR PRIMARY KEY,
"value" VARCHAR NOT NULL,
"annotation" VARCHAR,
-- The PK of the "subject" (either issue or comment) that the emoji is attached to
-- We cannot use a FK to enforce referential integrity. Instead we use a trigger to enforce this.
-- We wil also need a custom secondary index on this since the FK won't give it to us.
"subjectID" VARCHAR NOT NULL,
"creatorID" VARCHAR REFERENCES "user"(id) ON DELETE CASCADE,
"created" double precision DEFAULT (EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000),

UNIQUE ("subjectID", "creatorID", "value")
);
CREATE INDEX emoji_created_idx ON emoji (created);
CREATE INDEX emoji_subject_id_idx ON emoji ("subjectID");

CREATE OR REPLACE FUNCTION emoji_check_subject_id()
RETURNS TRIGGER AS $$
BEGIN
-- Check if subjectID exists in the issue table
IF EXISTS (SELECT 1 FROM issue WHERE id = NEW."subjectID") THEN
NULL; -- Do nothing
ELSIF EXISTS (SELECT 1 FROM comment WHERE id = NEW."subjectID") THEN
NULL; -- Do nothing
ELSE
RAISE EXCEPTION 'id ''%'' does not exist in issue or comment', NEW."subjectID";
END IF;

PERFORM update_issue_modified_on_emoji_change(NEW."subjectID");

RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE TRIGGER emoji_check_subject_id_update_trigger
BEFORE INSERT OR UPDATE ON emoji
FOR EACH ROW
EXECUTE FUNCTION emoji_check_subject_id();

CREATE OR REPLACE FUNCTION emoji_set_created_on_insert()
RETURNS TRIGGER AS $$
BEGIN
NEW.created = EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE TRIGGER emoji_set_created_on_insert_trigger
BEFORE INSERT ON emoji
FOR EACH ROW
EXECUTE FUNCTION emoji_set_created_on_insert();

-- Delete emoji when issue is deleted
CREATE OR REPLACE FUNCTION delete_emoji_on_issue_delete()
RETURNS TRIGGER AS $$
BEGIN
DELETE FROM emoji WHERE "subjectID" = OLD.id;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER delete_emoji_on_issue_delete_trigger
AFTER DELETE ON issue
FOR EACH ROW
EXECUTE FUNCTION delete_emoji_on_issue_delete();

-- Delete emoji when comment is deleted
CREATE OR REPLACE FUNCTION delete_emoji_on_comment_delete()
RETURNS TRIGGER AS $$
BEGIN
DELETE FROM emoji WHERE "subjectID" = OLD.id;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER delete_emoji_on_comment_delete_trigger
AFTER DELETE ON comment
FOR EACH ROW
EXECUTE FUNCTION delete_emoji_on_comment_delete();

-- When an emoji is added or deleted we find the issue and update the modified time
CREATE OR REPLACE FUNCTION update_issue_modified_on_emoji_change("subjectID" VARCHAR)
RETURNS VOID AS $$
BEGIN
UPDATE issue
SET modified = EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000
FROM (
SELECT issue.id AS id
FROM issue JOIN comment ON issue.id=comment."issueID"
WHERE comment.id = "subjectID" OR issue.id = "subjectID"
) AS subquery
WHERE issue.id = subquery.id;
END;
$$ LANGUAGE plpgsql;

-- userPref

CREATE TABLE "userPref" (
"key" VARCHAR NOT NULL,
"value" VARCHAR NOT NULL,
"userID" VARCHAR REFERENCES "user"(id) ON DELETE CASCADE,

PRIMARY KEY ("userID", "key")
);

-- zero.schemaVersions

CREATE SCHEMA IF NOT EXISTS zero;

CREATE TABLE IF NOT EXISTS zero."schemaVersions" (
"minSupportedVersion" INT4,
"maxSupportedVersion" INT4,

-- Ensure that there is only a single row in the table.
-- Application code can be agnostic to this column, and
-- simply invoke UPDATE statements on the version columns.
"lock" BOOL PRIMARY KEY DEFAULT true,
CONSTRAINT zero_schema_versions_single_row_constraint CHECK (lock)
);

INSERT INTO zero."schemaVersions" ("lock", "minSupportedVersion", "maxSupportedVersion")
VALUES (true, 3, 5) ON CONFLICT DO NOTHING;


COPY "user"
FROM
'/data/users.csv' WITH CSV HEADER;
Expand Down Expand Up @@ -353,27 +76,4 @@ FROM

COPY "comment"
FROM
'/data/comments_6.csv' WITH CSV HEADER;


-- Create the indices on upstream so we can copy to downstream on replication.
-- We have discussed that, in the future, the indices of the Zero replica
-- can / should diverge from the indices of the upstream. This is because
-- the Zero replica could be serving a different set of applications than the
-- upstream. If that is true, it would be beneficial to have indices dedicated
-- to those use cases. This may not be true, however.
--
-- Until then, I think it makes the most sense to copy the indices from upstream
-- to the replica. The argument in favor of this is that it gives the user a single
-- place to manage indices and it saves us a step in setting up our demo apps.
CREATE INDEX issuelabel_issueid_idx ON "issueLabel" ("issueID");

CREATE INDEX issue_modified_idx ON issue (modified);

CREATE INDEX issue_created_idx ON issue (created);

CREATE INDEX issue_open_modified_idx ON issue (open, modified);

CREATE INDEX comment_issueid_idx ON "comment" ("issueID");

VACUUM;
'/data/comments_6.csv' WITH CSV HEADER;
4 changes: 3 additions & 1 deletion apps/zbugs/docker/docker-compose-1gb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ services:
service: postgres_primary
volumes:
- zbugs_pgdata_upstream:/var/lib/postgresql/data
- ./1gb_upstream:/docker-entrypoint-initdb.d
- ./share_sql/base.sql:/docker-entrypoint-initdb.d/a.sql
- ./1gb_upstream/init.sql:/docker-entrypoint-initdb.d/b.sql
- ./share_sql/index.sql:/docker-entrypoint-initdb.d/c.sql
- ./data/1gb:/data
postgres_replica:
extends:
Expand Down
4 changes: 3 additions & 1 deletion apps/zbugs/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ services:
service: postgres_primary
volumes:
- zbugs_pgdata_upstream:/var/lib/postgresql/data
- ./github_upstream:/docker-entrypoint-initdb.d
- ./share_sql/base.sql:/docker-entrypoint-initdb.d/a.sql
- ./github_upstream/init.sql:/docker-entrypoint-initdb.d/b.sql
- ./share_sql/index.sql:/docker-entrypoint-initdb.d/c.sql
- ./data/github:/data
postgres_replica:
extends:
Expand Down
Loading

0 comments on commit 863767b

Please sign in to comment.