Skip to content

Commit

Permalink
Merge pull request #1 from m3dev/feature/env_init
Browse files Browse the repository at this point in the history
Env setup
  • Loading branch information
hirosassa authored Mar 13, 2023
2 parents 04a7e11 + 90d9c14 commit c86f5b5
Show file tree
Hide file tree
Showing 11 changed files with 2,691 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Publish

on:
push:
tags: '*'

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies and build
run: |
python -m pip install --upgrade pip
python -m pip install poetry poetry-dynamic-versioning twine
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
run: |
poetry publish --build --username $TWINE_USERNAME --password $TWINE_PASSWORD
29 changes: 29 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Test

on:
push:
branches: [ master ]
pull_request:

jobs:
tests:
runs-on: ${{ matrix.platform }}
strategy:
max-parallel: 4
matrix:
platform: [ubuntu-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install tox-gh-actions poetry
poetry install
- name: Test with tox
run: poetry run tox
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# kannon

Kannon is a wrapper for the [gokart](https://github.com/m3dev/gokart) library that allows gokart tasks to be easily executed in a distributed and parallel manner on multiple [kubernetes](https://kubernetes.io/) jobs.

# Thanks

Kannon is a wrapper for gokart. Thanks to gokart and dependent projects!

- [gokart](https://github.com/m3dev/gokart)
2 changes: 2 additions & 0 deletions kannon/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from kannon.task import TaskOnBullet
from kannon.master import Kannon
49 changes: 49 additions & 0 deletions kannon/kube_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import enum
from time import sleep
from kubernetes import client
from datetime import datetime
import random

import logging

logger = logging.getLogger(__name__)


class JobStatus(enum.Enum):
RUNNING = 0
SUCCEEDED = 1
FAILED = 2


def create_job(
api_instance: client.BatchV1Api, job: client.V1Job, namespace: str
) -> None:
api_response = api_instance.create_namespaced_job(
body=job,
namespace=namespace,
)
logger.debug(f"Job created. status={api_response.status}")

def get_job_status(
api_instance: client.BatchV1Api, job_name: str, namespace: str
) -> JobStatus:
api_response = api_instance.read_namespaced_job_status(
name=job_name, namespace=namespace
)
if (
api_response.status.succeeded is not None
or api_response.status.failed is not None
):
final_status = (
JobStatus.SUCCEEDED if api_response.status.succeeded else JobStatus.FAILED
)
return final_status
return JobStatus.RUNNING


def gen_job_name(job_prefix: str) -> str:
job_name = f"{job_prefix}-{str(random.randint(0, 255)).zfill(3)}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
# TODO: validate job_name more precisely
job_name = job_name[:50]
job_name = job_name.replace("_", "-").lower()
return job_name
203 changes: 203 additions & 0 deletions kannon/master.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
from collections import deque
import os
from time import sleep
from typing import Deque, Dict, List, Set
import logging

import gokart
from kubernetes import client

from .task import TaskOnBullet
from .kube_util import create_job, JobStatus, gen_job_name, get_job_status


logger = logging.getLogger(__name__)


class Kannon:
def __init__(
self,
api_instance: client.BatchV1Api,
namespace: str,
image_name: str,
container_name: str,
service_account_name: str,
job_prefix: str,
path_child_script: str,
env_to_inherit: List[str],
backoff_limit: int = 0,
) -> None:
# validation
if not os.path.exists(path_child_script):
raise FileNotFoundError(f"Child script {path_child_script} does not exist.")
if backoff_limit < 0:
raise ValueError(f"backoff_limit should be >= 0")
self.api_instance = api_instance
self.namespace = namespace
self.image_name = image_name
self.container_name = container_name
self.service_account_name = service_account_name
self.job_prefix = job_prefix
self.path_child_script = path_child_script
self.env_to_inherit = env_to_inherit
self.backoff_limit = backoff_limit

self.task_id_to_job_name: Dict[str, str] = dict()

def build(self, root_task: gokart.TaskOnKart):
# push tasks into queue
logger.info("Creating task queue...")
task_queue = self._create_task_queue(root_task)

# consume task queue
launched_task_ids: Set[str] = set()
logger.info("Consuming task queue...")
while task_queue:
task = task_queue.popleft()
if task.complete():
logger.info(f"Task {self._gen_task_info(task)} is already done.")
continue
if task.make_unique_id() in launched_task_ids:
logger.info(f"Task {self._gen_task_info(task)} is already running.")
continue

logger.info(
f"Checking if task {self._gen_task_info(task)} is executable..."
)
# TODO: enable user to specify duration to sleep for each task
sleep(1.0)
if not self._is_executable(task):
task_queue.append(task)
continue
# execute task
if isinstance(task, TaskOnBullet):
logger.info(
f"Trying to run task {self._gen_task_info(task)} on child job..."
)
self._exec_bullet_task(task)
elif isinstance(task, gokart.TaskOnKart):
logger.info(
f"Executing task {self._gen_task_info(task)} on master job..."
)
self._exec_gokart_task(task)
logger.info(
f"Completed task {self._gen_task_info(task)} on master job."
)
else:
raise TypeError(f"Invalid task type: {type(task)}")
launched_task_ids.add(task.make_unique_id())

logger.info(f"All tasks completed!")

def _create_task_queue(
self, root_task: gokart.TaskOnKart
) -> Deque[gokart.TaskOnKart]:
task_queue: Deque[gokart.TaskOnKart] = deque()

def _rec_enqueue_task(task: gokart.TaskOnKart) -> None:
"""Traversal task tree in post-order to push tasks into task queue."""
nonlocal task_queue
# run children
children = task.requires()
if isinstance(children, dict):
children = children.values()
for child in children:
_rec_enqueue_task(child)

task_queue.append(task)
logger.info(f"Task {self._gen_task_info(task)} is pushed to task queue")

_rec_enqueue_task(root_task)
return task_queue

def _exec_gokart_task(self, task: gokart.TaskOnKart) -> None:
# Run on master job
try:
gokart.build(task)
except Exception:
raise RuntimeError(
f"Task {self._gen_task_info(task)} on job master has failed."
)

def _exec_bullet_task(self, task: TaskOnBullet) -> None:
# Run on child job
serialized_task = gokart.TaskInstanceParameter().serialize(task)
job_name = gen_job_name(f"{self.job_prefix}-{task.get_task_family()}")
job = self._create_job_object(
job_name=job_name,
serialized_task=serialized_task,
)
create_job(self.api_instance, job, self.namespace)
logger.info(
f"Created child job {job_name} with task {self._gen_task_info(task)}"
)
task_unique_id = task.make_unique_id()
self.task_id_to_job_name[task_unique_id] = job_name

@staticmethod
def _gen_task_info(task: gokart.TaskOnKart) -> str:
return f"{task.get_task_family()}_{task.make_unique_id()}"

def _create_job_object(self, serialized_task: str, job_name: str) -> client.V1Job:
# TODO: use python -c to avoid dependency to execute_task.py
cmd = [
"python",
self.path_child_script,
"--serialized-task",
f"'{serialized_task}'",
]
child_envs = []
for env_name in self.env_to_inherit:
if env_name not in os.environ:
raise ValueError(f"Envvar {env_name} does not exist.")
child_envs.append({"name": env_name, "value": os.environ.get(env_name)})
container = client.V1Container(
name=self.container_name,
image=self.image_name,
command=cmd,
env=child_envs,
)
template = client.V1PodTemplateSpec(
metadata=client.V1ObjectMeta(labels={"app": "kannon"}),
spec=client.V1PodSpec(
restart_policy="Never",
containers=[container],
service_account_name=self.service_account_name,
),
)
spec = client.V1JobSpec(template=template, backoff_limit=self.backoff_limit)
job = client.V1Job(
api_version="batch/v1",
kind="Job",
metadata=client.V1ObjectMeta(
name=job_name,
namespace=self.namespace,
),
spec=spec,
)

return job

def _is_executable(self, task: gokart.TaskOnKart) -> bool:
children = task.requires()
if isinstance(children, dict):
children = children.values()

for child in children:
if not child.complete():
return False
if child.make_unique_id() not in self.task_id_to_job_name:
continue
job_name = self.task_id_to_job_name[child.make_unique_id()]
job_status = get_job_status(
self.api_instance,
job_name,
self.namespace,
)
if job_status == JobStatus.FAILED:
raise RuntimeError(
f"Task {self._gen_task_info(child)} on job {job_name} has failed."
)
if job_status == JobStatus.RUNNING:
return False
return True
5 changes: 5 additions & 0 deletions kannon/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import gokart


class TaskOnBullet(gokart.TaskOnKart):
pass
Loading

0 comments on commit c86f5b5

Please sign in to comment.