From 9f62867e7f2fb82f83a0fb43203cc984c0b9c8e0 Mon Sep 17 00:00:00 2001 From: Goulven Champenois Date: Thu, 13 Feb 2025 23:49:07 +0100 Subject: [PATCH] Add audit model --- app/controllers/sites_controller.rb | 6 +- app/models/audit.rb | 32 ++++ app/models/site.rb | 46 ++++- app/views/sites/index.html.erb | 2 +- db/migrate/20250213152147_create_audits.rb | 18 ++ db/schema.rb | 19 +- spec/factories/audits.rb | 22 +++ spec/factories/sites.rb | 2 +- spec/models/audit_spec.rb | 203 +++++++++++++++++++++ spec/models/site_spec.rb | 116 ++++++++++++ 10 files changed, 458 insertions(+), 8 deletions(-) create mode 100644 app/models/audit.rb create mode 100644 db/migrate/20250213152147_create_audits.rb create mode 100644 spec/factories/audits.rb create mode 100644 spec/models/audit_spec.rb create mode 100644 spec/models/site_spec.rb diff --git a/app/controllers/sites_controller.rb b/app/controllers/sites_controller.rb index a49fe3b..7c39b74 100644 --- a/app/controllers/sites_controller.rb +++ b/app/controllers/sites_controller.rb @@ -4,7 +4,7 @@ class SitesController < ApplicationController # GET /sites def index - @sites = Site.all + @pagy, @sites = pagy Site.includes(:audit) end # GET /sites/1 @@ -18,7 +18,7 @@ def edit; end # POST /sites def create - @site = Site.new(site_params) + @site = Site.find_or_create_by_url(site_params) if @site.save redirect_to @site, notice: t(".notice") else @@ -44,7 +44,7 @@ def destroy private def set_site - @site = params[:id].present? ? Site.friendly.find(params.expect(:id)) : Site.new + @site = params[:id].present? ? Site.friendly.find(params.expect(:id)) : Site.new_with_audit end def redirect_old_slugs diff --git a/app/models/audit.rb b/app/models/audit.rb new file mode 100644 index 0000000..4ce2a28 --- /dev/null +++ b/app/models/audit.rb @@ -0,0 +1,32 @@ +class Audit < ApplicationRecord + MAX_ATTEMPTS = 3 + MAX_RUNTIME = 1.hour.freeze + + belongs_to :site, touch: true + + validates :url, presence: true, url: true + normalizes :url, with: ->(url) { URI.parse(url).normalize.to_s } + + enum :status, ["pending", "running", "passed", "retryable", "failed"].index_by(&:itself), validate: true, default: :pending + + scope :sort_by_newest, -> { order(created_at: :desc) } + scope :sort_by_url, -> { order(Arel.sql("REGEXP_REPLACE(url, '^https?://(www\.)?', '') ASC")) } + scope :due, -> { pending.where("run_at <= now()") } + scope :past, -> { where(status: [:passed, :failed]) } + scope :scheduled, -> { where("run_at > now()") } + scope :retryable, -> { failed.where(attempts: ...MAX_ATTEMPTS) } + scope :to_run, -> { due.or(retryable) } + scope :clean, -> { passed.where(attempts: 0) } + scope :late, -> { pending.where("run_at <= ?", 1.hour.ago) } + scope :retried, -> { passed.where(attempts: 1..) } + scope :stalled, -> { running.where("run_at < ?", MAX_RUNTIME.ago) } + scope :crashed, -> { failed.where(attempts: MAX_ATTEMPTS) } + + delegate :hostname, :path, to: :parsed_url + + def run_at = super || Time.zone.now + def due? = pending? && run_at <= Time.zone.now + def runnable? = due? || retryable? + def parsed_url = @parsed_url ||= URI.parse(url).normalize + def url_without_scheme = @url_without_scheme ||= [hostname, path == "/" ? nil : path].compact.join(nil) +end diff --git a/app/models/site.rb b/app/models/site.rb index 943d340..cefbf84 100644 --- a/app/models/site.rb +++ b/app/models/site.rb @@ -1,7 +1,49 @@ class Site < ApplicationRecord extend FriendlyId - friendly_id :url, use: [:slugged, :history] + has_many :audits, dependent: :destroy + has_one_of_many :audit, -> { order("audits.created_at DESC") }, class_name: "Audit" - attribute :url, :string + friendly_id :url_without_scheme, use: [:slugged, :history] + + delegate :url, :url_without_scheme, to: :audit + + class << self + def find_or_create_by_url(attributes) + url = attributes.to_h.delete(:url) + attributes.to_h.delete(:name) if attributes[:name].blank? + # Ignore http/https duplicates when searching + normalized_url = [url, url.sub(/^https?/, url.start_with?("https") ? "http" : "https")] + joins(:audits).find_by(audits: { url: normalized_url })&.tap { it.update(attributes) } \ + || create_with_audit(url:, **attributes) + end + + def create_with_audit(url: nil, **attributes) + new_with_audit(url:, **attributes).tap(&:save) + end + + def new_with_audit(url: nil, **attributes) + new(attributes).tap { |site| site.audits.build(url:) } + end + end + + def new(attributes = nil) + return super if attributes.nil? + + attributes = attributes.to_h.symbolize_keys + + if url = attributes.delete(:url) + self.class.find_or_create_by(url:, **attributes) + else + super + end + end + + def url=(new_url) + audit = audits.build(url: new_url) + end + + def to_title = url_without_scheme + def audit = super || audits.last || audits.build + def should_generate_new_friendly_id? = new_record? || (audit && slug != url_without_scheme.parameterize) end diff --git a/app/views/sites/index.html.erb b/app/views/sites/index.html.erb index 653546f..25feee6 100644 --- a/app/views/sites/index.html.erb +++ b/app/views/sites/index.html.erb @@ -9,7 +9,7 @@ <% t.with_body do %> <% @sites.each do |site| %> - <%= link_to site.name, site %> + <%= link_to site.url_without_scheme, site %> <%= l site.updated_at, format: :compact %>
diff --git a/db/migrate/20250213152147_create_audits.rb b/db/migrate/20250213152147_create_audits.rb new file mode 100644 index 0000000..44226b0 --- /dev/null +++ b/db/migrate/20250213152147_create_audits.rb @@ -0,0 +1,18 @@ +class CreateAudits < ActiveRecord::Migration[8.0] + def change + create_table :audits do |t| + t.belongs_to :site, null: false, foreign_key: true + t.string :url, null: false + t.string :status, null: false + t.integer :attempts, null: false, default: 0 + t.datetime :run_at, null: false, default: -> { "CURRENT_TIMESTAMP" } + + t.timestamps + + t.index :url + t.index [:status, :run_at], name: "index_audits_on_status_and_run_at" + t.index "REGEXP_REPLACE(url, '^https?://(www\.)?', '')", name: "index_audits_on_normalized_url" + t.index :attempts, where: "status = 'failed' AND attempts > 0", name: "index_audits_on_retryable" + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 91194fe..ebd9aa5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,10 +10,25 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_02_13_082907) do +ActiveRecord::Schema[8.0].define(version: 2025_02_13_152147) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" + create_table "audits", force: :cascade do |t| + t.bigint "site_id", null: false + t.string "url", null: false + t.string "status", null: false + t.integer "attempts", default: 0, null: false + t.datetime "run_at", default: -> { "CURRENT_TIMESTAMP" }, null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index "regexp_replace((url)::text, '^https?://(www.)?'::text, ''::text)", name: "index_audits_on_normalized_url" + t.index ["attempts"], name: "index_audits_on_retryable", where: "(((status)::text = 'failed'::text) AND (attempts > 0))" + t.index ["site_id"], name: "index_audits_on_site_id" + t.index ["status", "run_at"], name: "index_audits_on_status_and_run_at" + t.index ["url"], name: "index_audits_on_url" + end + create_table "friendly_id_slugs", force: :cascade do |t| t.string "slug", null: false t.integer "sluggable_id", null: false @@ -32,4 +47,6 @@ t.datetime "updated_at", null: false t.index ["slug"], name: "index_sites_on_slug", unique: true end + + add_foreign_key "audits", "sites" end diff --git a/spec/factories/audits.rb b/spec/factories/audits.rb new file mode 100644 index 0000000..b27baff --- /dev/null +++ b/spec/factories/audits.rb @@ -0,0 +1,22 @@ +FactoryBot.define do + factory :audit do + site { association :site, url:, strategy: :build } + url { "https://example.com" } + + trait :passed do + status { "passed" } + end + + trait :failed do + status { "failed" } + end + + trait :pending do + status { "pending" } + end + + trait :running do + status { "running" } + end + end +end diff --git a/spec/factories/sites.rb b/spec/factories/sites.rb index a45a80a..f3cdd5a 100644 --- a/spec/factories/sites.rb +++ b/spec/factories/sites.rb @@ -1,5 +1,5 @@ FactoryBot.define do factory :site do - sequence(:slug) { |n| "www.example-#{n}.com" } + sequence(:url) { |n| "https://www.example-#{n}.com/" } end end diff --git a/spec/models/audit_spec.rb b/spec/models/audit_spec.rb new file mode 100644 index 0000000..e4225f3 --- /dev/null +++ b/spec/models/audit_spec.rb @@ -0,0 +1,203 @@ +require "rails_helper" + +RSpec.describe Audit do + let(:site) { create(:site) } + subject(:audit) { build(:audit, site: nil) } + + it "has a valid factory" do + audit = build(:audit) + expect(audit).to be_valid + end + + describe "associations" do + it { is_expected.to belong_to(:site).touch(true) } + end + + describe "validations" do + it { is_expected.to allow_value("https://example.com").for(:url) } + it { is_expected.not_to allow_value("not-a-url").for(:url) } + end + + describe "normalization" do + it "normalizes URLs" do + audit.url = "HTTPS://EXAMPLE.COM/path/" + expect(audit.url).to eq("https://example.com/path/") + end + end + + describe "enums" do + it do + should define_enum_for(:status) + .validating + .with_values(["pending", "running", "passed", "retryable", "failed"].index_by(&:itself)) + .backed_by_column_of_type(:string) + .with_default(:pending) + end + end + + describe "scopes" do + before { site.audit.destroy } + it "sort_by_newest returns audits in descending order by creation date" do + oldest = create(:audit, site:, created_at: 3.days.ago) + older = create(:audit, site:, created_at: 2.days.ago) + newer = create(:audit, site:, created_at: 1.day.ago) + + expect(described_class.sort_by_newest).to eq([newer, older, oldest]) + end + + it "sort_by_url orders by URL ignoring protocol and www" do + alpha = create(:audit, site:, url: "https://alpha.com/path") + beta = create(:audit, site:, url: "http://www.beta.com") + gamma = create(:audit, site:, url: "https://www.gamma.com") + + expect(described_class.sort_by_url).to eq([alpha, beta, gamma]) + end + + it "due returns pending audits with run_at in the past" do + past_pending = create(:audit, :pending, site:, run_at: 1.hour.ago) + create(:audit, :pending, site:, run_at: 1.hour.from_now) # future + create(:audit, :passed, site:, run_at: 2.hours.ago) # not pending + + expect(described_class.due).to eq([past_pending]) + end + + it "to_run returns due and retryable audits" do + past_pending = create(:audit, :pending, site:, run_at: 1.hour.ago) + retryable = create(:audit, :failed, site:, attempts: Audit::MAX_ATTEMPTS - 1) + + # Create records that shouldn't be included + create(:audit, :pending, site:, run_at: 1.hour.from_now) # not due + create(:audit, :failed, site:, attempts: Audit::MAX_ATTEMPTS) # not retryable + create(:audit, :passed, site:) # wrong status + + expect(described_class.to_run).to match_array([past_pending, retryable]) + end + + context "with audits" do + let!(:passed_audit) { create(:audit, :passed, site:, created_at: 7.days.ago, url: "https://beta.com") } + let!(:failed_audit) { create(:audit, :failed, site:, created_at: 6.days.ago, url: "https://alpha.com") } + let!(:pending_audit) { create(:audit, :pending, site:, run_at: 1.hour.ago, created_at: 5.days.ago) } + let!(:future_audit) { create(:audit, :pending, site:, run_at: 1.hour.from_now, created_at: 4.days.ago) } + let!(:retried_audit) { create(:audit, :passed, site:, attempts: 2, created_at: 3.days.ago) } + let!(:running_audit) { create(:audit, :running, site:, run_at: 2.hours.ago, created_at: 2.days.ago) } + let!(:crashed_audit) { create(:audit, :failed, site:, attempts: Audit::MAX_ATTEMPTS, created_at: 1.day.ago) } + + it "past returns passed and failed audits" do + expect(described_class.past).to contain_exactly(passed_audit, failed_audit, retried_audit, crashed_audit) + end + + it "scheduled returns audits with future run_at" do + expect(described_class.scheduled).to eq([future_audit]) + end + + it "retryable returns failed audits with attempts less than MAX_ATTEMPTS" do + expect(described_class.retryable).to eq([failed_audit]) + end + + it "clean returns passed audits with no attempts" do + expect(described_class.clean).to eq([passed_audit]) + end + + it "late returns pending audits overdue by an hour" do + expect(described_class.late).to eq([pending_audit]) + end + + it "retried returns passed audits with attempts" do + expect(described_class.retried).to eq([retried_audit]) + end + + it "stalled returns running audits older than MAX_RUNTIME" do + expect(described_class.stalled).to eq([running_audit]) + end + + it "crashed returns failed audits with MAX_ATTEMPTS" do + expect(described_class.crashed).to eq([crashed_audit]) + end + end + end + + describe "instance methods" do + describe "#run_at" do + it "returns the set time when present" do + time = 1.hour.from_now + audit.run_at = time + expect(audit.run_at).to be_within(1.second).of(time) + end + + it "returns current time when not set" do + expect(audit.run_at).to be_within(1.second).of(Time.zone.now) + end + end + + describe "#due?" do + it "returns true for pending audits with past run_at" do + audit.status = "pending" + audit.run_at = 1.minute.ago + expect(audit).to be_due + end + + it "returns false for pending audits with future run_at" do + audit.status = "pending" + audit.run_at = 1.minute.from_now + expect(audit).not_to be_due + end + + it "returns false for non-pending audits" do + audit.status = "running" + audit.run_at = 1.minute.ago + expect(audit).not_to be_due + end + end + + describe "#runnable?" do + it "returns true when due" do + allow(audit).to receive(:due?).and_return(true) + expect(audit).to be_runnable + end + + it "returns true when retryable" do + allow(audit).to receive(:retryable?).and_return(true) + expect(audit).to be_runnable + end + + it "returns false when neither due nor retryable" do + allow(audit).to receive(:due?).and_return(false) + allow(audit).to receive(:retryable?).and_return(false) + expect(audit).not_to be_runnable + end + end + + describe "#parsed_url" do + it "returns a parsed and normalized URI" do + audit.url = "https://example.com/path/" + expect(audit.parsed_url).to be_a(URI::HTTPS) + expect(audit.parsed_url.to_s).to eq(audit.url) + end + + it "memoizes the result" do + audit.url = "https://example.com" + expect(URI).to receive(:parse).twice.and_call_original # Once when fetching url, once in the method + 2.times { audit.parsed_url } + end + end + + describe "#url_without_scheme" do + it "returns hostname for root path" do + audit.url = "https://example.com" + expect(audit.url_without_scheme).to eq("example.com") + end + + it "returns hostname and path for non-root path" do + audit.url = "https://example.com/path" + expect(audit.url_without_scheme).to eq("example.com/path") + end + + it "memoizes the result" do + audit.url = "https://example.com" + first_result = audit.url_without_scheme + allow(audit).to receive(:hostname).and_return("different.com") + expect(audit.url_without_scheme).to eq(first_result) + end + end + end +end diff --git a/spec/models/site_spec.rb b/spec/models/site_spec.rb new file mode 100644 index 0000000..f1b5926 --- /dev/null +++ b/spec/models/site_spec.rb @@ -0,0 +1,116 @@ +require "rails_helper" + +RSpec.describe Site, type: :model do + let(:url) { "https://example.com/" } + + describe "associations" do + it { should have_many(:audits).dependent(:destroy) } + end + + describe "delegations" do + let(:site) { create(:site) } + let!(:audit) { create(:audit, site:, url: "https://example.com") } + + it { should delegate_method(:url).to(:audit) } + it { should delegate_method(:url_without_scheme).to(:audit) } + + it "delegates to the most recent audit" do + new_audit = create(:audit, site:, url: "https://new-example.com") + expect(site.reload.url).to eq(new_audit.url) + end + end + + describe ".find_or_create_by_url" do + let(:http_url) { "http://example.com" } + + context "when site with URL exists" do + let!(:existing_site) { described_class.create_with_audit(url:) } + + it "returns existing site for exact URL match" do + expect(described_class.find_or_create_by_url(url:)).to eq(existing_site) + end + + it "returns existing site when only scheme differs" do + expect(described_class.find_or_create_by_url(url: http_url)).to eq(existing_site) + end + + it "finds site with historical URLs" do + new_url = "https://new-example.com" + existing_site.audits.create!(url: new_url) + + expect(described_class.find_or_create_by_url(url:)).to eq(existing_site) + expect(described_class.find_or_create_by_url(url: new_url)).to eq(existing_site) + end + end + + context "when site does not exist" do + it "creates a new site with audit" do + expect { + site = described_class.find_or_create_by_url(url:) + expect(site).to be_persisted + expect(site.audit.url).to eq(url) + }.to change(described_class, :count).by(1) + .and change(Audit, :count).by(1) + end + end + end + + describe ".new_with_audit" do + it "builds a new site with associated audit" do + site = described_class.new_with_audit(url:) + + expect(site).to be_new_record + expect(site.audits.size).to eq(1) + expect(site.audit).to eq(site.audits.first) + expect(site.audit.url).to eq(url) + end + end + + describe ".create_with_audit" do + it "creates a new site with associated audit" do + expect { + site = described_class.create_with_audit(url:) + expect(site).to be_persisted + expect(site.audits.size).to eq(1) + expect(site.audit).to be_persisted + expect(site.audit.url).to eq(url) + }.to change(described_class, :count).by(1) + .and change(Audit, :count).by(1) + end + end + + describe "#to_title" do + let(:site) { create(:site, url:) } + let!(:audit) { create(:audit, site:) } + + it "returns the URL without scheme from the latest audit" do + expect(site.to_title).to eq(audit.url_without_scheme) + end + + it "updates when new audit is created" do + new_audit = create(:audit, site:, url: "https://new-example.com") + expect(site.reload.to_title).to eq(new_audit.url_without_scheme) + end + end + + describe "friendly_id" do + let(:url) { "https://example.com/path?query=1" } + let(:site) { described_class.create_with_audit(url:) } + + it "generates slug from url_without_scheme" do + expect(site.slug).to be_present + expect(site.slug).to eq(site.audit.url_without_scheme.parameterize) + end + + it "maintains history of slugs" do + old_slug = site.slug + new_url = "https://new-example.com" + + site.audits.create!(url: new_url) + site.save! + + expect(site.reload.slug).not_to eq(old_slug) + expect(described_class.friendly.find(old_slug)).to eq(site) + end + end +end