diff --git a/app/controllers/sites_controller.rb b/app/controllers/sites_controller.rb
index a49fe3b..7c39b74 100644
--- a/app/controllers/sites_controller.rb
+++ b/app/controllers/sites_controller.rb
@@ -4,7 +4,7 @@ class SitesController < ApplicationController
# GET /sites
def index
- @sites = Site.all
+ @pagy, @sites = pagy Site.includes(:audit)
end
# GET /sites/1
@@ -18,7 +18,7 @@ def edit; end
# POST /sites
def create
- @site = Site.new(site_params)
+ @site = Site.find_or_create_by_url(site_params)
if @site.save
redirect_to @site, notice: t(".notice")
else
@@ -44,7 +44,7 @@ def destroy
private
def set_site
- @site = params[:id].present? ? Site.friendly.find(params.expect(:id)) : Site.new
+ @site = params[:id].present? ? Site.friendly.find(params.expect(:id)) : Site.new_with_audit
end
def redirect_old_slugs
diff --git a/app/models/audit.rb b/app/models/audit.rb
new file mode 100644
index 0000000..4ce2a28
--- /dev/null
+++ b/app/models/audit.rb
@@ -0,0 +1,32 @@
+class Audit < ApplicationRecord
+ MAX_ATTEMPTS = 3
+ MAX_RUNTIME = 1.hour.freeze
+
+ belongs_to :site, touch: true
+
+ validates :url, presence: true, url: true
+ normalizes :url, with: ->(url) { URI.parse(url).normalize.to_s }
+
+ enum :status, ["pending", "running", "passed", "retryable", "failed"].index_by(&:itself), validate: true, default: :pending
+
+ scope :sort_by_newest, -> { order(created_at: :desc) }
+ scope :sort_by_url, -> { order(Arel.sql("REGEXP_REPLACE(url, '^https?://(www\.)?', '') ASC")) }
+ scope :due, -> { pending.where("run_at <= now()") }
+ scope :past, -> { where(status: [:passed, :failed]) }
+ scope :scheduled, -> { where("run_at > now()") }
+ scope :retryable, -> { failed.where(attempts: ...MAX_ATTEMPTS) }
+ scope :to_run, -> { due.or(retryable) }
+ scope :clean, -> { passed.where(attempts: 0) }
+ scope :late, -> { pending.where("run_at <= ?", 1.hour.ago) }
+ scope :retried, -> { passed.where(attempts: 1..) }
+ scope :stalled, -> { running.where("run_at < ?", MAX_RUNTIME.ago) }
+ scope :crashed, -> { failed.where(attempts: MAX_ATTEMPTS) }
+
+ delegate :hostname, :path, to: :parsed_url
+
+ def run_at = super || Time.zone.now
+ def due? = pending? && run_at <= Time.zone.now
+ def runnable? = due? || retryable?
+ def parsed_url = @parsed_url ||= URI.parse(url).normalize
+ def url_without_scheme = @url_without_scheme ||= [hostname, path == "/" ? nil : path].compact.join(nil)
+end
diff --git a/app/models/site.rb b/app/models/site.rb
index 943d340..cefbf84 100644
--- a/app/models/site.rb
+++ b/app/models/site.rb
@@ -1,7 +1,49 @@
class Site < ApplicationRecord
extend FriendlyId
- friendly_id :url, use: [:slugged, :history]
+ has_many :audits, dependent: :destroy
+ has_one_of_many :audit, -> { order("audits.created_at DESC") }, class_name: "Audit"
- attribute :url, :string
+ friendly_id :url_without_scheme, use: [:slugged, :history]
+
+ delegate :url, :url_without_scheme, to: :audit
+
+ class << self
+ def find_or_create_by_url(attributes)
+ url = attributes.to_h.delete(:url)
+ attributes.to_h.delete(:name) if attributes[:name].blank?
+ # Ignore http/https duplicates when searching
+ normalized_url = [url, url.sub(/^https?/, url.start_with?("https") ? "http" : "https")]
+ joins(:audits).find_by(audits: { url: normalized_url })&.tap { it.update(attributes) } \
+ || create_with_audit(url:, **attributes)
+ end
+
+ def create_with_audit(url: nil, **attributes)
+ new_with_audit(url:, **attributes).tap(&:save)
+ end
+
+ def new_with_audit(url: nil, **attributes)
+ new(attributes).tap { |site| site.audits.build(url:) }
+ end
+ end
+
+ def new(attributes = nil)
+ return super if attributes.nil?
+
+ attributes = attributes.to_h.symbolize_keys
+
+ if url = attributes.delete(:url)
+ self.class.find_or_create_by(url:, **attributes)
+ else
+ super
+ end
+ end
+
+ def url=(new_url)
+ audit = audits.build(url: new_url)
+ end
+
+ def to_title = url_without_scheme
+ def audit = super || audits.last || audits.build
+ def should_generate_new_friendly_id? = new_record? || (audit && slug != url_without_scheme.parameterize)
end
diff --git a/app/views/sites/index.html.erb b/app/views/sites/index.html.erb
index 653546f..25feee6 100644
--- a/app/views/sites/index.html.erb
+++ b/app/views/sites/index.html.erb
@@ -9,7 +9,7 @@
<% t.with_body do %>
<% @sites.each do |site| %>
- <%= link_to site.name, site %> |
+ <%= link_to site.url_without_scheme, site %> |
<%= l site.updated_at, format: :compact %>
diff --git a/db/migrate/20250213152147_create_audits.rb b/db/migrate/20250213152147_create_audits.rb
new file mode 100644
index 0000000..44226b0
--- /dev/null
+++ b/db/migrate/20250213152147_create_audits.rb
@@ -0,0 +1,18 @@
+class CreateAudits < ActiveRecord::Migration[8.0]
+ def change
+ create_table :audits do |t|
+ t.belongs_to :site, null: false, foreign_key: true
+ t.string :url, null: false
+ t.string :status, null: false
+ t.integer :attempts, null: false, default: 0
+ t.datetime :run_at, null: false, default: -> { "CURRENT_TIMESTAMP" }
+
+ t.timestamps
+
+ t.index :url
+ t.index [:status, :run_at], name: "index_audits_on_status_and_run_at"
+ t.index "REGEXP_REPLACE(url, '^https?://(www\.)?', '')", name: "index_audits_on_normalized_url"
+ t.index :attempts, where: "status = 'failed' AND attempts > 0", name: "index_audits_on_retryable"
+ end
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 91194fe..ebd9aa5 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,10 +10,25 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema[8.0].define(version: 2025_02_13_082907) do
+ActiveRecord::Schema[8.0].define(version: 2025_02_13_152147) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_catalog.plpgsql"
+ create_table "audits", force: :cascade do |t|
+ t.bigint "site_id", null: false
+ t.string "url", null: false
+ t.string "status", null: false
+ t.integer "attempts", default: 0, null: false
+ t.datetime "run_at", default: -> { "CURRENT_TIMESTAMP" }, null: false
+ t.datetime "created_at", null: false
+ t.datetime "updated_at", null: false
+ t.index "regexp_replace((url)::text, '^https?://(www.)?'::text, ''::text)", name: "index_audits_on_normalized_url"
+ t.index ["attempts"], name: "index_audits_on_retryable", where: "(((status)::text = 'failed'::text) AND (attempts > 0))"
+ t.index ["site_id"], name: "index_audits_on_site_id"
+ t.index ["status", "run_at"], name: "index_audits_on_status_and_run_at"
+ t.index ["url"], name: "index_audits_on_url"
+ end
+
create_table "friendly_id_slugs", force: :cascade do |t|
t.string "slug", null: false
t.integer "sluggable_id", null: false
@@ -32,4 +47,6 @@
t.datetime "updated_at", null: false
t.index ["slug"], name: "index_sites_on_slug", unique: true
end
+
+ add_foreign_key "audits", "sites"
end
diff --git a/spec/factories/audits.rb b/spec/factories/audits.rb
new file mode 100644
index 0000000..b27baff
--- /dev/null
+++ b/spec/factories/audits.rb
@@ -0,0 +1,22 @@
+FactoryBot.define do
+ factory :audit do
+ site { association :site, url:, strategy: :build }
+ url { "https://example.com" }
+
+ trait :passed do
+ status { "passed" }
+ end
+
+ trait :failed do
+ status { "failed" }
+ end
+
+ trait :pending do
+ status { "pending" }
+ end
+
+ trait :running do
+ status { "running" }
+ end
+ end
+end
diff --git a/spec/factories/sites.rb b/spec/factories/sites.rb
index a45a80a..f3cdd5a 100644
--- a/spec/factories/sites.rb
+++ b/spec/factories/sites.rb
@@ -1,5 +1,5 @@
FactoryBot.define do
factory :site do
- sequence(:slug) { |n| "www.example-#{n}.com" }
+ sequence(:url) { |n| "https://www.example-#{n}.com/" }
end
end
diff --git a/spec/models/audit_spec.rb b/spec/models/audit_spec.rb
new file mode 100644
index 0000000..e4225f3
--- /dev/null
+++ b/spec/models/audit_spec.rb
@@ -0,0 +1,203 @@
+require "rails_helper"
+
+RSpec.describe Audit do
+ let(:site) { create(:site) }
+ subject(:audit) { build(:audit, site: nil) }
+
+ it "has a valid factory" do
+ audit = build(:audit)
+ expect(audit).to be_valid
+ end
+
+ describe "associations" do
+ it { is_expected.to belong_to(:site).touch(true) }
+ end
+
+ describe "validations" do
+ it { is_expected.to allow_value("https://example.com").for(:url) }
+ it { is_expected.not_to allow_value("not-a-url").for(:url) }
+ end
+
+ describe "normalization" do
+ it "normalizes URLs" do
+ audit.url = "HTTPS://EXAMPLE.COM/path/"
+ expect(audit.url).to eq("https://example.com/path/")
+ end
+ end
+
+ describe "enums" do
+ it do
+ should define_enum_for(:status)
+ .validating
+ .with_values(["pending", "running", "passed", "retryable", "failed"].index_by(&:itself))
+ .backed_by_column_of_type(:string)
+ .with_default(:pending)
+ end
+ end
+
+ describe "scopes" do
+ before { site.audit.destroy }
+ it "sort_by_newest returns audits in descending order by creation date" do
+ oldest = create(:audit, site:, created_at: 3.days.ago)
+ older = create(:audit, site:, created_at: 2.days.ago)
+ newer = create(:audit, site:, created_at: 1.day.ago)
+
+ expect(described_class.sort_by_newest).to eq([newer, older, oldest])
+ end
+
+ it "sort_by_url orders by URL ignoring protocol and www" do
+ alpha = create(:audit, site:, url: "https://alpha.com/path")
+ beta = create(:audit, site:, url: "http://www.beta.com")
+ gamma = create(:audit, site:, url: "https://www.gamma.com")
+
+ expect(described_class.sort_by_url).to eq([alpha, beta, gamma])
+ end
+
+ it "due returns pending audits with run_at in the past" do
+ past_pending = create(:audit, :pending, site:, run_at: 1.hour.ago)
+ create(:audit, :pending, site:, run_at: 1.hour.from_now) # future
+ create(:audit, :passed, site:, run_at: 2.hours.ago) # not pending
+
+ expect(described_class.due).to eq([past_pending])
+ end
+
+ it "to_run returns due and retryable audits" do
+ past_pending = create(:audit, :pending, site:, run_at: 1.hour.ago)
+ retryable = create(:audit, :failed, site:, attempts: Audit::MAX_ATTEMPTS - 1)
+
+ # Create records that shouldn't be included
+ create(:audit, :pending, site:, run_at: 1.hour.from_now) # not due
+ create(:audit, :failed, site:, attempts: Audit::MAX_ATTEMPTS) # not retryable
+ create(:audit, :passed, site:) # wrong status
+
+ expect(described_class.to_run).to match_array([past_pending, retryable])
+ end
+
+ context "with audits" do
+ let!(:passed_audit) { create(:audit, :passed, site:, created_at: 7.days.ago, url: "https://beta.com") }
+ let!(:failed_audit) { create(:audit, :failed, site:, created_at: 6.days.ago, url: "https://alpha.com") }
+ let!(:pending_audit) { create(:audit, :pending, site:, run_at: 1.hour.ago, created_at: 5.days.ago) }
+ let!(:future_audit) { create(:audit, :pending, site:, run_at: 1.hour.from_now, created_at: 4.days.ago) }
+ let!(:retried_audit) { create(:audit, :passed, site:, attempts: 2, created_at: 3.days.ago) }
+ let!(:running_audit) { create(:audit, :running, site:, run_at: 2.hours.ago, created_at: 2.days.ago) }
+ let!(:crashed_audit) { create(:audit, :failed, site:, attempts: Audit::MAX_ATTEMPTS, created_at: 1.day.ago) }
+
+ it "past returns passed and failed audits" do
+ expect(described_class.past).to contain_exactly(passed_audit, failed_audit, retried_audit, crashed_audit)
+ end
+
+ it "scheduled returns audits with future run_at" do
+ expect(described_class.scheduled).to eq([future_audit])
+ end
+
+ it "retryable returns failed audits with attempts less than MAX_ATTEMPTS" do
+ expect(described_class.retryable).to eq([failed_audit])
+ end
+
+ it "clean returns passed audits with no attempts" do
+ expect(described_class.clean).to eq([passed_audit])
+ end
+
+ it "late returns pending audits overdue by an hour" do
+ expect(described_class.late).to eq([pending_audit])
+ end
+
+ it "retried returns passed audits with attempts" do
+ expect(described_class.retried).to eq([retried_audit])
+ end
+
+ it "stalled returns running audits older than MAX_RUNTIME" do
+ expect(described_class.stalled).to eq([running_audit])
+ end
+
+ it "crashed returns failed audits with MAX_ATTEMPTS" do
+ expect(described_class.crashed).to eq([crashed_audit])
+ end
+ end
+ end
+
+ describe "instance methods" do
+ describe "#run_at" do
+ it "returns the set time when present" do
+ time = 1.hour.from_now
+ audit.run_at = time
+ expect(audit.run_at).to be_within(1.second).of(time)
+ end
+
+ it "returns current time when not set" do
+ expect(audit.run_at).to be_within(1.second).of(Time.zone.now)
+ end
+ end
+
+ describe "#due?" do
+ it "returns true for pending audits with past run_at" do
+ audit.status = "pending"
+ audit.run_at = 1.minute.ago
+ expect(audit).to be_due
+ end
+
+ it "returns false for pending audits with future run_at" do
+ audit.status = "pending"
+ audit.run_at = 1.minute.from_now
+ expect(audit).not_to be_due
+ end
+
+ it "returns false for non-pending audits" do
+ audit.status = "running"
+ audit.run_at = 1.minute.ago
+ expect(audit).not_to be_due
+ end
+ end
+
+ describe "#runnable?" do
+ it "returns true when due" do
+ allow(audit).to receive(:due?).and_return(true)
+ expect(audit).to be_runnable
+ end
+
+ it "returns true when retryable" do
+ allow(audit).to receive(:retryable?).and_return(true)
+ expect(audit).to be_runnable
+ end
+
+ it "returns false when neither due nor retryable" do
+ allow(audit).to receive(:due?).and_return(false)
+ allow(audit).to receive(:retryable?).and_return(false)
+ expect(audit).not_to be_runnable
+ end
+ end
+
+ describe "#parsed_url" do
+ it "returns a parsed and normalized URI" do
+ audit.url = "https://example.com/path/"
+ expect(audit.parsed_url).to be_a(URI::HTTPS)
+ expect(audit.parsed_url.to_s).to eq(audit.url)
+ end
+
+ it "memoizes the result" do
+ audit.url = "https://example.com"
+ expect(URI).to receive(:parse).twice.and_call_original # Once when fetching url, once in the method
+ 2.times { audit.parsed_url }
+ end
+ end
+
+ describe "#url_without_scheme" do
+ it "returns hostname for root path" do
+ audit.url = "https://example.com"
+ expect(audit.url_without_scheme).to eq("example.com")
+ end
+
+ it "returns hostname and path for non-root path" do
+ audit.url = "https://example.com/path"
+ expect(audit.url_without_scheme).to eq("example.com/path")
+ end
+
+ it "memoizes the result" do
+ audit.url = "https://example.com"
+ first_result = audit.url_without_scheme
+ allow(audit).to receive(:hostname).and_return("different.com")
+ expect(audit.url_without_scheme).to eq(first_result)
+ end
+ end
+ end
+end
diff --git a/spec/models/site_spec.rb b/spec/models/site_spec.rb
new file mode 100644
index 0000000..f1b5926
--- /dev/null
+++ b/spec/models/site_spec.rb
@@ -0,0 +1,116 @@
+require "rails_helper"
+
+RSpec.describe Site, type: :model do
+ let(:url) { "https://example.com/" }
+
+ describe "associations" do
+ it { should have_many(:audits).dependent(:destroy) }
+ end
+
+ describe "delegations" do
+ let(:site) { create(:site) }
+ let!(:audit) { create(:audit, site:, url: "https://example.com") }
+
+ it { should delegate_method(:url).to(:audit) }
+ it { should delegate_method(:url_without_scheme).to(:audit) }
+
+ it "delegates to the most recent audit" do
+ new_audit = create(:audit, site:, url: "https://new-example.com")
+ expect(site.reload.url).to eq(new_audit.url)
+ end
+ end
+
+ describe ".find_or_create_by_url" do
+ let(:http_url) { "http://example.com" }
+
+ context "when site with URL exists" do
+ let!(:existing_site) { described_class.create_with_audit(url:) }
+
+ it "returns existing site for exact URL match" do
+ expect(described_class.find_or_create_by_url(url:)).to eq(existing_site)
+ end
+
+ it "returns existing site when only scheme differs" do
+ expect(described_class.find_or_create_by_url(url: http_url)).to eq(existing_site)
+ end
+
+ it "finds site with historical URLs" do
+ new_url = "https://new-example.com"
+ existing_site.audits.create!(url: new_url)
+
+ expect(described_class.find_or_create_by_url(url:)).to eq(existing_site)
+ expect(described_class.find_or_create_by_url(url: new_url)).to eq(existing_site)
+ end
+ end
+
+ context "when site does not exist" do
+ it "creates a new site with audit" do
+ expect {
+ site = described_class.find_or_create_by_url(url:)
+ expect(site).to be_persisted
+ expect(site.audit.url).to eq(url)
+ }.to change(described_class, :count).by(1)
+ .and change(Audit, :count).by(1)
+ end
+ end
+ end
+
+ describe ".new_with_audit" do
+ it "builds a new site with associated audit" do
+ site = described_class.new_with_audit(url:)
+
+ expect(site).to be_new_record
+ expect(site.audits.size).to eq(1)
+ expect(site.audit).to eq(site.audits.first)
+ expect(site.audit.url).to eq(url)
+ end
+ end
+
+ describe ".create_with_audit" do
+ it "creates a new site with associated audit" do
+ expect {
+ site = described_class.create_with_audit(url:)
+ expect(site).to be_persisted
+ expect(site.audits.size).to eq(1)
+ expect(site.audit).to be_persisted
+ expect(site.audit.url).to eq(url)
+ }.to change(described_class, :count).by(1)
+ .and change(Audit, :count).by(1)
+ end
+ end
+
+ describe "#to_title" do
+ let(:site) { create(:site, url:) }
+ let!(:audit) { create(:audit, site:) }
+
+ it "returns the URL without scheme from the latest audit" do
+ expect(site.to_title).to eq(audit.url_without_scheme)
+ end
+
+ it "updates when new audit is created" do
+ new_audit = create(:audit, site:, url: "https://new-example.com")
+ expect(site.reload.to_title).to eq(new_audit.url_without_scheme)
+ end
+ end
+
+ describe "friendly_id" do
+ let(:url) { "https://example.com/path?query=1" }
+ let(:site) { described_class.create_with_audit(url:) }
+
+ it "generates slug from url_without_scheme" do
+ expect(site.slug).to be_present
+ expect(site.slug).to eq(site.audit.url_without_scheme.parameterize)
+ end
+
+ it "maintains history of slugs" do
+ old_slug = site.slug
+ new_url = "https://new-example.com"
+
+ site.audits.create!(url: new_url)
+ site.save!
+
+ expect(site.reload.slug).not_to eq(old_slug)
+ expect(described_class.friendly.find(old_slug)).to eq(site)
+ end
+ end
+end
|