Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start a serving sub-project. #397

Merged
merged 10 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@ on:

jobs:
black:
strategy:
matrix:
version: [3.11]
os: [ubuntu-latest]
runs-on: ${{matrix.os}}
name: Python Formatting With Black
runs-on: ubuntu-latest
steps:
- name: Checking out repository
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
Expand Down
26 changes: 20 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Turbine Core Test
name: Turbine Unit Tests

on:
workflow_dispatch:
Expand All @@ -17,6 +17,7 @@ concurrency:

jobs:
test:
name: "Test"
strategy:
matrix:
version: [3.11]
Expand All @@ -40,10 +41,23 @@ jobs:
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --index-url https://download.pytorch.org/whl/cpu \
-r core/pytorch-cpu-requirements.txt \
-r core/torchvision-requirements.txt
pip install --upgrade -r core/requirements.txt
pip install -e core[testing]
-r core/torchvision-requirements.txt
pip install --upgrade \
-r core/requirements.txt \
-r mypy-requirements.txt
pip install -e core[testing] -e serving[testing]

- name: Run tests
- name: Run core tests
if: ${{ !cancelled() }}
run: |
pytest -n 4 core/tests/
pytest -n 4 core/

- name: Run serving tests
if: ${{ !cancelled() }}
run: |
pytest -n 4 serving/

- name: MyPy Type Checking
if: ${{ !cancelled() }}
run: |
mypy serving/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ can specify pytorch-cpu and install via:
```
pip install --index-url https://download.pytorch.org/whl/cpu \
-r core/pytorch-cpu-requirements.txt \
-r core torchvision-requirements.txt
-r core/torchvision-requirements.txt
pip install shark-turbine
```

Expand Down
2 changes: 2 additions & 0 deletions core/iree-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
iree-compiler==20240129.785
stellaraccident marked this conversation as resolved.
Show resolved Hide resolved
iree-runtime==20240129.785
4 changes: 4 additions & 0 deletions core/misc-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
numpy>=1.26.3
onnx>=1.15.0
pytest>=8.0.0
pytest-xdist>=3.5.0
4 changes: 1 addition & 3 deletions core/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,4 @@

-r pytorch-cpu-requirements.txt
-r torchvision-requirements.txt

iree-compiler==20240129.785
iree-runtime==20240129.785
-r iree-requirements.txt
13 changes: 7 additions & 6 deletions core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def load_requirement_pins(requirements_file: str):
requirement_pins.update(dict(pin_pairs))


load_requirement_pins("requirements.txt")
load_requirement_pins("iree-requirements.txt")
load_requirement_pins("misc-requirements.txt")
load_requirement_pins("pytorch-cpu-requirements.txt")


Expand Down Expand Up @@ -97,7 +98,7 @@ def initialize_options(self):
],
},
install_requires=[
"numpy",
f"numpy{get_version_spec('numpy')}",
f"iree-compiler{get_version_spec('iree-compiler')}",
f"iree-runtime{get_version_spec('iree-runtime')}",
# Use the [torch-cpu-nightly] spec to get a more recent/specific version.
Expand All @@ -106,12 +107,12 @@ def initialize_options(self):
extras_require={
"torch-cpu-nightly": [f"torch{get_version_spec('torch')}"],
"onnx": [
"onnx>=1.15.0",
f"onnx{get_version_spec('onnx')}",
],
"testing": [
"onnx==1.15.0",
"pytest",
"pytest-xdist",
f"onnx{get_version_spec('onnx')}",
f"pytest{get_version_spec('pytest')}",
f"pytest-xdist{get_version_spec('pytest-xdist')}",
],
},
cmdclass={"build": BuildCommand},
Expand Down
3 changes: 3 additions & 0 deletions mypy-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Typing packages needed for full mypy execution at the project level.
mypy==1.8.0
types-requests
12 changes: 12 additions & 0 deletions serving/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Turbine Serving Infrastructure

This sub-project contains components and infrastructure for serving various
forms of Turbine compiled models. Instead of coming with models, it defines
ABIs that compiled models should adhere to in order to be served. It then
allows them to be delivered as web endpoints via popular APIs.

As emulation can be the sincerest form of flattery, this project derives
substantial inspiration from vllm and the OpenAI APIs, emulating and
interopping with them where possible. It is intended to be the lightest
weight possible reference implementation for serving models with an
opinionated compiled form, built elsewhere in the project.
5 changes: 5 additions & 0 deletions serving/mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[mypy]

explicit_package_bases = True
mypy_path = $MYPY_CONFIG_FILE_DIR
packages = turbine_serving.llm
3 changes: 3 additions & 0 deletions serving/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
2 changes: 2 additions & 0 deletions serving/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fastapi>=0.109.2
uvicorn>=0.27.0
6 changes: 6 additions & 0 deletions serving/setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[tool:pytest]
testpaths =
./tests
filterwarnings =
# TODO: Remove once flatbuffer 'imp' usage resolved.
ignore::DeprecationWarning
109 changes: 109 additions & 0 deletions serving/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright 2024 Advanced Micro Devices, Inc
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import json
import os
import distutils.command.build
from pathlib import Path

from setuptools import find_namespace_packages, setup # type: ignore

THIS_DIR = Path(__file__).resolve().parent
REPO_DIR = THIS_DIR.parent
VERSION_INFO_FILE = REPO_DIR / "version_info.json"


with open(
os.path.join(
REPO_DIR,
"README.md",
),
"rt",
) as f:
README = f.read()


def load_version_info():
with open(VERSION_INFO_FILE, "rt") as f:
return json.load(f)


version_info = load_version_info()
PACKAGE_VERSION = version_info["package-version"]

packages = find_namespace_packages(
include=[
"turbine_serving",
"turbine_serving.*",
],
)

print("Found packages:", packages)

# Lookup version pins from requirements files.
requirement_pins = {}


def load_requirement_pins(requirements_file: Path):
with open(requirements_file, "rt") as f:
lines = f.readlines()
pin_pairs = [line.strip().split("==") for line in lines if "==" in line]
requirement_pins.update(dict(pin_pairs))


load_requirement_pins(THIS_DIR / "requirements.txt")
load_requirement_pins(REPO_DIR / "core" / "iree-requirements.txt")
load_requirement_pins(REPO_DIR / "core" / "misc-requirements.txt")


def get_version_spec(dep: str):
if dep in requirement_pins:
return f">={requirement_pins[dep]}"
else:
return ""


# Override build command so that we can build into _python_build
# instead of the default "build". This avoids collisions with
# typical CMake incantations, which can produce all kinds of
# hilarity (like including the contents of the build/lib directory).
class BuildCommand(distutils.command.build.build):
def initialize_options(self):
distutils.command.build.build.initialize_options(self)
self.build_base = "_python_build"


setup(
name=f"turbine-serving",
version=f"{PACKAGE_VERSION}",
author="SHARK Authors",
author_email="[email protected]",
description="SHARK Turbine Machine Learning Deployment Tools",
long_description=README,
long_description_content_type="text/markdown",
url="https://github.com/nod-ai/SHARK-Turbine",
license="Apache-2.0",
classifiers=[
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
],
packages=packages,
package_data={"turbine_serving": ["py.typed"]},
install_requires=[
f"fastapi{get_version_spec('fastapi')}",
f"iree-compiler{get_version_spec('iree-compiler')}",
f"iree-runtime{get_version_spec('iree-runtime')}",
f"uvicorn{get_version_spec('uvicorn')}",
],
extras_require={
"testing": [
f"pytest{get_version_spec('pytest')}",
f"pytest-xdist{get_version_spec('pytest-xdist')}",
],
},
cmdclass={"build": BuildCommand},
)
63 changes: 63 additions & 0 deletions serving/tests/api_server_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2024 Advanced Micro Devices, Inc
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os
import pytest
import requests
import subprocess
import sys
import time


class ServerRunner:
def __init__(self, args):
self.url = "http://localhost:8000"
env = os.environ.copy()
env["PYTHONUNBUFFERED"] = "1"
self.process = subprocess.Popen(
[
sys.executable,
"-m",
"turbine_serving.llm.entrypoints.api_server",
]
+ args,
env=env,
stdout=sys.stdout,
stderr=sys.stderr,
)
self._wait_for_ready()

def _wait_for_ready(self):
start = time.time()
while True:
try:
if requests.get(f"{self.url}/health").status_code == 200:
return
except Exception as e:
if self.process.poll() is not None:
raise RuntimeError("API server processs terminated") from e
time.sleep(0.25)
if time.time() - start > 30:
raise RuntimeError("Timeout waiting for server start") from e

def __del__(self):
try:
process = self.process
except AttributeError:
pass
else:
process.terminate()
process.wait()


@pytest.fixture(scope="session")
def server():
runner = ServerRunner([])
yield runner


def test_basic(server: ServerRunner):
...
Empty file.
Empty file.
50 changes: 50 additions & 0 deletions serving/turbine_serving/llm/entrypoints/api_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2024 Advanced Micro Devices, Inc
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

from typing import Sequence

import argparse

from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response
import sys
import uvicorn

app = FastAPI()


@app.get("/health")
async def health() -> Response:
return Response(status_code=200)

raikonenfnu marked this conversation as resolved.
Show resolved Hide resolved

def main(clargs: Sequence[str]):
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default=None)
parser.add_argument("--port", type=int, default=8000)
parser.add_argument(
"--root-path",
type=str,
default=None,
help="Root path to use for installing behind path based proxy.",
)
parser.add_argument(
"--timeout-keep-alive", type=int, default=5, help="Keep alive timeout"
)
args = parser.parse_args(clargs)

app.root_path = args.root_path
uvicorn.run(
app,
host=args.host,
port=args.port,
log_level="debug",
timeout_keep_alive=args.timeout_keep_alive,
)


if __name__ == "__main__":
main(sys.argv[1:])
1 change: 1 addition & 0 deletions serving/turbine_serving/py.typed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Marker file for PEP 561 inline type checking.
Loading