-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12427 from empirical-org/develop
Add LabeledEntry model (#12422)
- Loading branch information
Showing
9 changed files
with
473 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
22 changes: 22 additions & 0 deletions
22
services/QuillLMS/db/migrate/20240925185730_create_labeled_entries.evidence.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# frozen_string_literal: true | ||
|
||
# This migration comes from evidence (originally 20240925184213) | ||
require 'neighbor' | ||
class CreateLabeledEntries < ActiveRecord::Migration[7.1] | ||
def change | ||
create_table :evidence_labeled_entries do |t| | ||
t.boolean :approved | ||
t.text :entry, null: false | ||
t.text :label, null: false | ||
t.text :label_transformed, null: false | ||
t.jsonb :metadata | ||
t.integer :prompt_id, null: false | ||
t.vector :embedding, limit: 1536, null: false | ||
|
||
t.timestamps | ||
end | ||
|
||
add_index :evidence_labeled_entries, :prompt_id | ||
add_index :evidence_labeled_entries, [:prompt_id, :entry], unique: true | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
services/QuillLMS/engines/evidence/app/models/evidence/labeled_entry.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# frozen_string_literal: true | ||
|
||
# == Schema Information | ||
# | ||
# Table name: evidence_labeled_entries | ||
# | ||
# id :bigint not null, primary key | ||
# approved :boolean | ||
# embedding :vector(1536) not null | ||
# entry :text not null | ||
# label :text not null | ||
# label_transformed :text not null | ||
# metadata :jsonb | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# prompt_id :integer not null | ||
# | ||
# Indexes | ||
# | ||
# index_evidence_labeled_entries_on_prompt_id (prompt_id) | ||
# index_evidence_labeled_entries_on_prompt_id_and_entry (prompt_id,entry) UNIQUE | ||
# | ||
|
||
require 'neighbor' | ||
|
||
module Evidence | ||
class LabeledEntry < ApplicationRecord | ||
# Dimension and model are coupled: https://platform.openai.com/docs/guides/embeddings | ||
DIMENSION = 1536 | ||
MODEL = 'text-embedding-3-small' | ||
|
||
DISTANCE_METRIC = 'cosine' | ||
COLLAPSED_OPTIMAL_LABEL = 'Optimal' | ||
|
||
belongs_to :prompt | ||
|
||
has_neighbors :embedding | ||
|
||
validates :embedding, presence: true | ||
validates :label, presence: true | ||
validates :label_transformed, presence: true | ||
validates :prompt, presence: true | ||
validates :entry, presence: true | ||
|
||
before_validation :set_embedding, :set_transformed_label, :set_entry | ||
|
||
def nearest_neighbor | ||
nearest_neighbors(:embedding, distance: DISTANCE_METRIC) | ||
.where(prompt_id:) | ||
.first | ||
end | ||
|
||
def nearest_label | ||
val = nearest_neighbor | ||
|
||
{ distance: val&.neighbor_distance, label: val&.label } | ||
end | ||
|
||
private def set_entry | ||
self.entry = entry.strip if entry.present? | ||
end | ||
|
||
private def set_embedding | ||
return if entry.blank? || embedding.present? | ||
|
||
self.embedding = Evidence::OpenAI::EmbeddingFetcher.run(dimension: DIMENSION, input: entry, model: MODEL) | ||
end | ||
|
||
private def set_transformed_label | ||
if label.present? && label.match?(/\AOptimal_\d+\z/) | ||
self.label_transformed = COLLAPSED_OPTIMAL_LABEL | ||
else | ||
self.label_transformed = label | ||
end | ||
end | ||
end | ||
end |
19 changes: 19 additions & 0 deletions
19
services/QuillLMS/engines/evidence/db/migrate/20240925184213_create_labeled_entries.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'neighbor' | ||
class CreateLabeledEntries < ActiveRecord::Migration[7.1] | ||
def change | ||
create_table :evidence_labeled_entries do |t| | ||
t.text :entry, null: false | ||
t.text :label, null: false | ||
t.text :label_transformed, null: false | ||
t.jsonb :metadata | ||
t.integer :prompt_id, null: false | ||
t.vector :embedding, limit: 1536, null: false | ||
|
||
t.timestamps | ||
end | ||
|
||
add_index :evidence_labeled_entries, :prompt_id | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
33 changes: 33 additions & 0 deletions
33
services/QuillLMS/engines/evidence/spec/factories/evidence/labeled_entries.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# frozen_string_literal: true | ||
|
||
# == Schema Information | ||
# | ||
# Table name: evidence_labeled_entries | ||
# | ||
# id :bigint not null, primary key | ||
# approved :boolean | ||
# embedding :vector(1536) not null | ||
# entry :text not null | ||
# label :text not null | ||
# label_transformed :text not null | ||
# metadata :jsonb | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# prompt_id :integer not null | ||
# | ||
# Indexes | ||
# | ||
# index_evidence_labeled_entries_on_prompt_id (prompt_id) | ||
# index_evidence_labeled_entries_on_prompt_id_and_entry (prompt_id,entry) UNIQUE | ||
# | ||
|
||
FactoryBot.define do | ||
factory :evidence_labeled_entry, class: 'Evidence::LabeledEntry' do | ||
entry { Faker::Lorem.sentence } | ||
embedding { Array.new(Evidence::LabeledEntry::DIMENSION) { rand(-1.0..1.0) } } | ||
label { "Label_#{rand(0..10)}" } | ||
label_transformed { label } | ||
|
||
association :prompt, factory: :evidence_prompt | ||
end | ||
end |
Oops, something went wrong.