Skip to content

Commit

Permalink
implement ssd-1b inference
Browse files Browse the repository at this point in the history
  • Loading branch information
aniketmaurya committed Nov 30, 2023
1 parent 2b7a636 commit 3d3f0e4
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 11 deletions.
1 change: 1 addition & 0 deletions src/fastserve/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""A Python Project"""
from .base_fastserve import BaseRequest, FastServe

__version__ = "0.0.1"
2 changes: 1 addition & 1 deletion src/fastserve/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uvicorn

from .fastserve import FastServe
from .base_fastserve import FastServe

serve = FastServe()
serve.run_server()
18 changes: 10 additions & 8 deletions src/fastserve/fastserve.py → src/fastserve/base_fastserve.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,26 @@ class BaseRequest(BaseModel):


class FastServe:
def __init__(self, batch_size=2, timeout=0.5) -> None:
self.batch_processing: BatchProcessor = None
def __init__(self, batch_size=2, timeout=0.5, input_schema=BaseRequest) -> None:
self.input_schema = input_schema
self.batch_processing = BatchProcessor(
func=self.handle, bs=batch_size, timeout=timeout
)

@asynccontextmanager
async def lifespan(app: FastAPI):
self.batch_processing = BatchProcessor(
func=self.handle, bs=batch_size, timeout=timeout
)
yield
self.batch_processing.cancel()

self._app = FastAPI(lifespan=lifespan)

def serve(
def _serve(
self,
):
INPUT_SCHEMA = self.input_schema

@self._app.post(path="/endpoint")
def api(request: BaseRequest):
def api(request: INPUT_SCHEMA):
wait_obj = self.batch_processing.process(request)
return wait_obj.get()

Expand All @@ -40,7 +42,7 @@ def handle(self, batch: List[BaseRequest]):
def run_server(
self,
):
self.serve()
self._serve()
import uvicorn

uvicorn.run(self._app)
5 changes: 3 additions & 2 deletions src/fastserve/batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
import time
import uuid
from dataclasses import dataclass, field
from logging import INFO, Logger
from queue import Empty, Queue
from threading import Event, Thread
from typing import Any, Callable, Dict, List

from logging import Logger, INFO

logger = Logger(__name__, level=INFO)


Expand Down Expand Up @@ -132,5 +131,7 @@ def process(self, item: Any):
return waited_obj

def cancel(self):
logger.info("Terminating Batch Processor...")
self._cancel_signal.set()
self._thread.join()
logger.info("Batch Processor terminated!")
1 change: 1 addition & 0 deletions src/fastserve/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .ssd import FastServeSSD
16 changes: 16 additions & 0 deletions src/fastserve/models/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import argparse

from .ssd import FastServeSSD

parser = argparse.ArgumentParser(description="Serve models with FastServe")
parser.add_argument("--model", type=str, required=True, help="Name of the model")

args = parser.parse_args()

app = None
if args.model == "ssd-1b":
app = FastServeSSD(device="mps")
else:
raise Exception(f"FastServe.models doesn't implement model={args.model}")

app.run_server()
44 changes: 44 additions & 0 deletions src/fastserve/models/ssd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import io
from typing import List

import torch
from diffusers import StableDiffusionXLPipeline
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

from fastserve import BaseRequest, FastServe


class PromptRequest(BaseModel):
prompt: str # "An astronaut riding a green horse"
negative_prompt: str = "ugly, blurry, poor quality"


class FastServeSSD(FastServe):
def __init__(self, batch_size=2, timeout=0.5, device="cuda") -> None:
super().__init__(batch_size, timeout)
self.input_schema = PromptRequest
self.pipe = StableDiffusionXLPipeline.from_pretrained(
"segmind/SSD-1B",
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
self.pipe.to(device)

def handle(self, batch: List[PromptRequest]) -> List[StreamingResponse]:
prompts = [b.prompt for b in batch]
negative_prompts = [b.negative_prompt for b in batch]

pil_images = self.pipe(
prompt=prompts, negative_prompt=negative_prompts, num_inference_steps=1
).images
image_bytes_list = []
for pil_image in pil_images:
image_bytes = io.BytesIO()
pil_image.save(image_bytes, format="JPEG")
image_bytes_list.append(image_bytes)
return [
StreamingResponse(image_bytes, media_type="image/jpeg")
for image_bytes in image_bytes_list
]

0 comments on commit 3d3f0e4

Please sign in to comment.