Skip to content

Commit

Permalink
ai_search_column fill mechanism
Browse files Browse the repository at this point in the history
  • Loading branch information
martinheppner committed Oct 31, 2024
1 parent a6817d2 commit 84b1e49
Showing 1 changed file with 27 additions and 21 deletions.
48 changes: 27 additions & 21 deletions src/jobs/sync.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,35 @@ export async function fixTours(){
await knex.raw(`UPDATE tour SET search_column = to_tsvector( 'french', full_text ) WHERE text_lang ='fr';`);

// set ai_search_column
let ids = [];
// We reuse the vectors, of those tours, where full_text was not changed
await knex.raw(`UPDATE tour
SET ai_search_column = temp_tour_full_text.ai_search_column
FROM temp_tour_full_text
WHERE tour.ai_search_column IS NULL
AND tour.id=temp_tour_full_text.id
AND tour.full_text=temp_tour_full_text.full_text;`);

let rounds = 0;
let limit_rows = 5;
try {
const id_result = await knex.raw(`SELECT id FROM tour WHERE ai_search_column IS NULL;`);
ids = id_result.rows;
let count_query = knex.raw(`SELECT COUNT(*) AS row_count FROM tour WHERE ai_search_column IS NULL;`);
let sql_count_call = await count_query;
rounds = Math.ceil(sql_count_call.rows[0].row_count / limit_rows);
} catch (error) {
console.error("Error getting empty ai_search_column:", error);
console.log("Error retrieving count:", error);
}
if (ids.length > 0) {
try {
for (const id of ids) {
await knex.raw(`UPDATE tour SET ai_search_column=get_embedding(full_text) WHERE id=${id.id} IS NULL;`);

if (rounds > 0) {
for (let i=1; i<=rounds; i++) {
try {
await knex.raw(`UPDATE tour
SET ai_search_column = get_embedding(full_text)
FROM (SELECT id FROM tour WHERE ai_search_column IS NULL LIMIT ${limit_rows}) AS b
WHERE tour.id=b.id`);
}
catch (error) {
console.error("Error updating ai_search_column:", error);
}
} catch (error) {
console.error("Error updating ai_search_column:", error);
}
}

Expand Down Expand Up @@ -870,8 +885,8 @@ export async function syncTours(){
await knex.raw(`UPDATE kpi SET VALUE=0 WHERE name='total_tours';`);

// This is to store away the vectors. If full_text is not changed, we do not have to recalculate them.
await knex.raw(`DROP TABLE IF EXISTS temp_tour_full_text;`);
await knex.raw(`CREATE TABLE temp_tour_full_text AS SELECT id, full_text, ai_search_column FROM tour WHERE ai_search_column IS NOT NULL;`);
// await knex.raw(`DROP TABLE IF EXISTS temp_tour_full_text;`);
// await knex.raw(`CREATE TABLE temp_tour_full_text AS SELECT id, full_text, ai_search_column FROM tour WHERE ai_search_column IS NOT NULL;`);

// Table tours will be rebuild from scratch
await knex.raw(`TRUNCATE tour;`);
Expand Down Expand Up @@ -937,15 +952,6 @@ export async function syncTours(){
bulk_insert_tours(result[0]);
}
}


// Finally we reuse the vectors, of those tours, where full_text was not changed
await knex.raw(`UPDATE tour
SET ai_search_column = temp_tour_full_text.ai_search_column
FROM temp_tour_full_text
WHERE tour.ai_search_column IS NULL
AND tour.id=temp_tour_full_text.id
AND tour.full_text=temp_tour_full_text.full_text;`);
}


Expand Down

0 comments on commit 84b1e49

Please sign in to comment.