From 3c3d4f8accfc27317e13ba7f1f61b4d0adf8e4ac Mon Sep 17 00:00:00 2001 From: Gyubong Lee Date: Fri, 7 Feb 2025 16:11:50 +0900 Subject: [PATCH 1/3] fix(BA-584): Remove foreign key constraint from `EndpointRow.image` column (#3599) Backported-from: main (25.1) Backported-to: 24.03 Backport-of: 3599 --- changes/3599.fix.md | 1 + ...e4_remove_fk_constraint_from_endpoints_.py | 37 +++++++++++++++ src/ai/backend/manager/models/endpoint.py | 46 +++++++++++++++---- src/ai/backend/manager/models/image.py | 20 +++++--- 4 files changed, 89 insertions(+), 15 deletions(-) create mode 100644 changes/3599.fix.md create mode 100644 src/ai/backend/manager/models/alembic/versions/ecc9f6322be4_remove_fk_constraint_from_endpoints_.py diff --git a/changes/3599.fix.md b/changes/3599.fix.md new file mode 100644 index 00000000000..953b26912dd --- /dev/null +++ b/changes/3599.fix.md @@ -0,0 +1 @@ +Remove foreign key constraint from `EndpointRow.image` column. diff --git a/src/ai/backend/manager/models/alembic/versions/ecc9f6322be4_remove_fk_constraint_from_endpoints_.py b/src/ai/backend/manager/models/alembic/versions/ecc9f6322be4_remove_fk_constraint_from_endpoints_.py new file mode 100644 index 00000000000..a0556cf86c9 --- /dev/null +++ b/src/ai/backend/manager/models/alembic/versions/ecc9f6322be4_remove_fk_constraint_from_endpoints_.py @@ -0,0 +1,37 @@ +"""Remove foreign key constraint from endpoints.image column + +Revision ID: ecc9f6322be4 +Revises: f6ca2f2d04c1 +Create Date: 2025-02-07 00:58:05.211395 + +""" + +from alembic import op + +from ai.backend.manager.models.base import GUID + +# revision identifiers, used by Alembic. +revision = "ecc9f6322be4" +down_revision = "f6ca2f2d04c1" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.drop_constraint("fk_endpoints_image_images", "endpoints", type_="foreignkey") + op.alter_column("endpoints", "image", existing_type=GUID, nullable=True) + op.create_check_constraint( + constraint_name="ck_image_required_unless_destroyed", + table_name="endpoints", + condition="lifecycle_stage = 'destroyed' OR image IS NOT NULL", + ) + + +def downgrade() -> None: + op.create_foreign_key( + "fk_endpoints_image_images", "endpoints", "images", ["image"], ["id"], ondelete="RESTRICT" + ) + op.alter_column("endpoints", "image", existing_type=GUID, nullable=False) + op.drop_constraint( + constraint_name="ck_image_required_unless_destroyed", table_name="endpoints", type_="check" + ) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index b42fb046826..5689f7e35e0 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -13,9 +13,12 @@ import yarl from graphene.types.datetime import DateTime as GQLDateTime from graphql import Undefined +from redis.asyncio import Redis +from redis.asyncio.client import Pipeline +from sqlalchemy import CheckConstraint from sqlalchemy.dialects import postgresql as pgsql from sqlalchemy.ext.asyncio import AsyncConnection, AsyncSession -from sqlalchemy.orm import relationship, selectinload +from sqlalchemy.orm import foreign, relationship, selectinload from sqlalchemy.orm.exc import NoResultFound from ai.backend.common.config import model_definition_iv @@ -102,6 +105,16 @@ class EndpointLifecycle(Enum): class EndpointRow(Base): __tablename__ = "endpoints" + __table_args__ = ( + CheckConstraint( + sa.or_( + sa.column("lifecycle_stage") == EndpointLifecycle.DESTROYED.value, + sa.column("image").isnot(None), + ), + name="ck_image_required_unless_destroyed", + ), + ) + id = EndpointIDColumn() name = sa.Column("name", sa.String(length=512), nullable=False) created_user = sa.Column( @@ -111,12 +124,8 @@ class EndpointRow(Base): "session_owner", GUID, sa.ForeignKey("users.uuid", ondelete="RESTRICT"), nullable=False ) # minus session count means this endpoint is requested for removal - desired_session_count = sa.Column( - "desired_session_count", sa.Integer, nullable=False, default=0, server_default="0" - ) - image = sa.Column( - "image", GUID, sa.ForeignKey("images.id", ondelete="RESTRICT"), nullable=False - ) + replicas = sa.Column("replicas", sa.Integer, nullable=False, default=0, server_default="0") + image = sa.Column("image", GUID) model = sa.Column( "model", GUID, @@ -206,7 +215,16 @@ class EndpointRow(Base): routings = relationship("RoutingRow", back_populates="endpoint_row") tokens = relationship("EndpointTokenRow", back_populates="endpoint_row") - image_row = relationship("ImageRow", back_populates="endpoints") + endpoint_auto_scaling_rules = relationship( + "EndpointAutoScalingRuleRow", back_populates="endpoint_row" + ) + image_row = relationship( + "ImageRow", + primaryjoin=lambda: foreign(EndpointRow.image) == ImageRow.id, + foreign_keys=[image], + back_populates="endpoints", + ) + model_row = relationship("VFolderRow", back_populates="endpoints") created_user_row = relationship( "UserRow", back_populates="created_endpoints", foreign_keys="EndpointRow.created_user" @@ -1248,7 +1266,17 @@ def _get_vfolder_id(id_input: str) -> uuid.UUID: raise InvalidAPIParameters( "Model mount destination must be /models for non-custom runtimes" ) - # from AgentRegistry.handle_route_creation() + + async with graph_ctx.db.begin_session() as db_session: + image_row = await ImageRow.resolve( + db_session, + [ + ImageIdentifier( + endpoint_row.image_row.name, endpoint_row.image_row.architecture + ), + ], + ) + await graph_ctx.registry.create_session( "", endpoint_row.image_row.name, diff --git a/src/ai/backend/manager/models/image.py b/src/ai/backend/manager/models/image.py index 42ab8e6513a..636c68cd455 100644 --- a/src/ai/backend/manager/models/image.py +++ b/src/ai/backend/manager/models/image.py @@ -24,11 +24,8 @@ import graphene import sqlalchemy as sa import trafaret as t -from graphql import Undefined -from redis.asyncio import Redis -from redis.asyncio.client import Pipeline -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import load_only, relationship, selectinload +from sqlalchemy.ext.asyncio import AsyncConnection, AsyncSession +from sqlalchemy.orm import foreign, joinedload, load_only, relationship, selectinload from ai.backend.common import redis_helper from ai.backend.common.docker import ImageRef @@ -179,6 +176,13 @@ class ImageType(enum.Enum): SERVICE = "service" +# Defined for avoiding circular import +def _get_image_endpoint_join_condition(): + from ai.backend.manager.models.endpoint import EndpointRow + + return ImageRow.id == foreign(EndpointRow.image) + + class ImageRow(Base): __tablename__ = "images" id = IDColumn("id") @@ -221,7 +225,11 @@ class ImageRow(Base): ) aliases: relationship # sessions = relationship("SessionRow", back_populates="image_row") - endpoints = relationship("EndpointRow", back_populates="image_row") + endpoints = relationship( + "EndpointRow", + primaryjoin=_get_image_endpoint_join_condition, + back_populates="image_row", + ) def __init__( self, From 5f7dd0a4e7364513c098de2e88d9d32c1f744439 Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 7 Feb 2025 08:43:03 +0000 Subject: [PATCH 2/3] fix: Broken CI --- ...e4_remove_fk_constraint_from_endpoints_.py | 4 ++-- src/ai/backend/manager/models/endpoint.py | 21 ++++--------------- src/ai/backend/manager/models/image.py | 4 ++-- 3 files changed, 8 insertions(+), 21 deletions(-) diff --git a/src/ai/backend/manager/models/alembic/versions/ecc9f6322be4_remove_fk_constraint_from_endpoints_.py b/src/ai/backend/manager/models/alembic/versions/ecc9f6322be4_remove_fk_constraint_from_endpoints_.py index a0556cf86c9..5b42bea05c8 100644 --- a/src/ai/backend/manager/models/alembic/versions/ecc9f6322be4_remove_fk_constraint_from_endpoints_.py +++ b/src/ai/backend/manager/models/alembic/versions/ecc9f6322be4_remove_fk_constraint_from_endpoints_.py @@ -1,7 +1,7 @@ """Remove foreign key constraint from endpoints.image column Revision ID: ecc9f6322be4 -Revises: f6ca2f2d04c1 +Revises: ef9a7960d234 Create Date: 2025-02-07 00:58:05.211395 """ @@ -12,7 +12,7 @@ # revision identifiers, used by Alembic. revision = "ecc9f6322be4" -down_revision = "f6ca2f2d04c1" +down_revision = "ef9a7960d234" branch_labels = None depends_on = None diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 5689f7e35e0..1111197523c 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -13,8 +13,6 @@ import yarl from graphene.types.datetime import DateTime as GQLDateTime from graphql import Undefined -from redis.asyncio import Redis -from redis.asyncio.client import Pipeline from sqlalchemy import CheckConstraint from sqlalchemy.dialects import postgresql as pgsql from sqlalchemy.ext.asyncio import AsyncConnection, AsyncSession @@ -124,7 +122,9 @@ class EndpointRow(Base): "session_owner", GUID, sa.ForeignKey("users.uuid", ondelete="RESTRICT"), nullable=False ) # minus session count means this endpoint is requested for removal - replicas = sa.Column("replicas", sa.Integer, nullable=False, default=0, server_default="0") + desired_session_count = sa.Column( + "desired_session_count", sa.Integer, nullable=False, default=0, server_default="0" + ) image = sa.Column("image", GUID) model = sa.Column( "model", @@ -215,9 +215,6 @@ class EndpointRow(Base): routings = relationship("RoutingRow", back_populates="endpoint_row") tokens = relationship("EndpointTokenRow", back_populates="endpoint_row") - endpoint_auto_scaling_rules = relationship( - "EndpointAutoScalingRuleRow", back_populates="endpoint_row" - ) image_row = relationship( "ImageRow", primaryjoin=lambda: foreign(EndpointRow.image) == ImageRow.id, @@ -1266,17 +1263,7 @@ def _get_vfolder_id(id_input: str) -> uuid.UUID: raise InvalidAPIParameters( "Model mount destination must be /models for non-custom runtimes" ) - - async with graph_ctx.db.begin_session() as db_session: - image_row = await ImageRow.resolve( - db_session, - [ - ImageIdentifier( - endpoint_row.image_row.name, endpoint_row.image_row.architecture - ), - ], - ) - + # from AgentRegistry.handle_route_creation() await graph_ctx.registry.create_session( "", endpoint_row.image_row.name, diff --git a/src/ai/backend/manager/models/image.py b/src/ai/backend/manager/models/image.py index 636c68cd455..5bf17d84acc 100644 --- a/src/ai/backend/manager/models/image.py +++ b/src/ai/backend/manager/models/image.py @@ -24,8 +24,8 @@ import graphene import sqlalchemy as sa import trafaret as t -from sqlalchemy.ext.asyncio import AsyncConnection, AsyncSession -from sqlalchemy.orm import foreign, joinedload, load_only, relationship, selectinload +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import foreign, load_only, relationship, selectinload from ai.backend.common import redis_helper from ai.backend.common.docker import ImageRef From fc589c0a77a43dafa354578c27a8257767b3bbdc Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 7 Feb 2025 08:46:37 +0000 Subject: [PATCH 3/3] fix: Broken CI --- src/ai/backend/manager/models/image.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ai/backend/manager/models/image.py b/src/ai/backend/manager/models/image.py index 5bf17d84acc..f231c99093c 100644 --- a/src/ai/backend/manager/models/image.py +++ b/src/ai/backend/manager/models/image.py @@ -24,6 +24,9 @@ import graphene import sqlalchemy as sa import trafaret as t +from graphql import Undefined +from redis.asyncio import Redis +from redis.asyncio.client import Pipeline from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import foreign, load_only, relationship, selectinload