Skip to content

Commit

Permalink
Merge branch 'master' of github.com:flyteorg/flytekit into map-container
Browse files Browse the repository at this point in the history
  • Loading branch information
pingsutw committed Mar 29, 2023
2 parents a48e7f0 + 12be8d8 commit fee1b22
Show file tree
Hide file tree
Showing 127 changed files with 3,454 additions and 3,386 deletions.
47 changes: 35 additions & 12 deletions .github/workflows/pythonbuild.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10", "3.11"]
exclude:
# Ignore this test because we failed to install docker-py
# docker-py will install pywin32==227, whereas pywin only added support for python 3.10 in version 301.
Expand Down Expand Up @@ -53,7 +53,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10", "3.11"]
plugin-names:
# Please maintain an alphabetical order in the following list
- flytekit-aws-athena
Expand All @@ -72,6 +72,7 @@ jobs:
- flytekit-kf-mpi
- flytekit-kf-pytorch
- flytekit-kf-tensorflow
- flytekit-mlflow
- flytekit-modin
- flytekit-onnx-pytorch
- flytekit-onnx-scikitlearn
Expand All @@ -88,13 +89,16 @@ jobs:
- flytekit-vaex
- flytekit-whylogs
exclude:
# flytekit-modin depends on ray which does not have a 3.10 wheel yet.
# Issue tracked in https://github.com/ray-project/ray/issues/19116.
- python-version: 3.10
# flytekit-modin depends on ray which does not have a 3.11 wheel yet.
# Issue tracked in https://github.com/ray-project/ray/issues/27881
- python-version: 3.11
plugin-names: "flytekit-modin"
# Great-expectations does not support python 3.10 yet
# https://github.com/great-expectations/great_expectations/blob/develop/setup.py#L87-L89
- python-version: 3.10
- python-version: 3.11
plugin-names: "flytekit-ray"
# Great-expectations does not support python 3.11 due to sqlachemy>=2.0.0
# not being supported yet:
# https://github.com/great-expectations/great_expectations/issues/7020
- python-version: 3.11
plugin-names: "flytekit-greatexpectations"
# onnxruntime does not support python 3.10 yet
# https://github.com/microsoft/onnxruntime/issues/9782
Expand All @@ -104,10 +108,29 @@ jobs:
plugin-names: "flytekit-onnx-scikitlearn"
- python-version: 3.10
plugin-names: "flytekit-onnx-tensorflow"
# whylogs-sketching library does not have a 3.10 build yet
# Issue tracked: https://github.com/whylabs/whylogs/issues/697
- python-version: 3.10
- python-version: 3.11
plugin-names: "flytekit-onnx-pytorch"
- python-version: 3.11
plugin-names: "flytekit-onnx-scikitlearn"
- python-version: 3.11
plugin-names: "flytekit-onnx-tensorflow"
# numba, a dependency of mlflow, doesn't support python 3.11
# https://github.com/numba/numba/issues/8304
- python-version: 3.11
plugin-names: "flytekit-mlflow"
# pyspark 3.4.0 will support python 3.11, which isn't available yet
# Issue tracked in: https://github.com/apache/spark/pull/38987
- python-version: 3.11
plugin-names: "flytekit-spark"
# vaex currently doesn't support python 3.11
- python-version: 3.11
plugin-names: "flytekit-vaex"
# whylogs does support python 3.11 dataclass restrictions
# See: https://github.com/flyteorg/flytekit/actions/runs/4493746408/jobs/7905368664
- python-version: 3.11
plugin-names: "flytekit-whylogs"


steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -134,7 +157,7 @@ jobs:
cd plugins/${{ matrix.plugin-names }}
# onnx plugins does not support protobuf>4 yet (in fact it is tensorflow that
# does not support that yet). More details in https://github.com/onnx/onnx/issues/4239.
if [[ ${{ matrix.plugin-names }} == *"onnx"* || ${{ matrix.plugin-names }} == "flytekit-whylogs" ]]; then
if [[ ${{ matrix.plugin-names }} == *"onnx"* || ${{ matrix.plugin-names }} == "flytekit-whylogs" || ${{ matrix.plugin-names }} == "flytekit-mlflow" ]]; then
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python coverage run -m pytest tests
else
coverage run -m pytest tests
Expand Down
29 changes: 23 additions & 6 deletions .github/workflows/pythonpublish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,28 @@ jobs:
run: |
make -C plugins build_all_plugins
make -C plugins publish_all_plugins
# Added sleep because PYPI take some time in publish
- name: Sleep for 180 seconds
uses: jakejarvis/wait-action@master
with:
time: '180s'
- name: Sleep until pypi is available
id: pypiwait
run: |
# from refs/tags/v1.2.3 get 1.2.3 and make sure it's not an empty string
VERSION=$(echo $GITHUB_REF | sed 's#.*/v##')
if [ -z "$VERSION" ]
then
echo "No tagged version found, exiting"
exit 1
fi
LINK="https://pypi.org/project/flytekit/${VERSION}"
for i in {1..60}; do
if curl -L -I -s -f ${LINK} >/dev/null; then
echo "Found pypi"
exit 0
else
echo "Did not find - Retrying in 10 seconds..."
sleep 10
fi
done
exit 1
shell: bash
outputs:
version: ${{ steps.bump.outputs.version }}

Expand All @@ -60,7 +77,7 @@ jobs:
needs: deploy
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v2
with:
Expand Down
2 changes: 0 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ RUN apt-get update && apt-get install build-essential -y
RUN pip install -U flytekit==$VERSION \
flytekitplugins-pod==$VERSION \
flytekitplugins-deck-standard==$VERSION \
flytekitplugins-data-fsspec[aws]==$VERSION \
flytekitplugins-data-fsspec[gcp]==$VERSION \
scikit-learn

RUN useradd -u 1000 flytekit
Expand Down
32 changes: 32 additions & 0 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This Dockerfile is here to help with end-to-end testing
# From flytekit
# $ docker build -f Dockerfile.dev --build-arg PYTHON_VERSION=3.10 -t localhost:30000/flytekittest:someversion .
# $ docker push localhost:30000/flytekittest:someversion
# From your test user code
# $ pyflyte run --image localhost:30000/flytekittest:someversion

ARG PYTHON_VERSION
FROM python:${PYTHON_VERSION}-slim-buster

MAINTAINER Flyte Team <[email protected]>
LABEL org.opencontainers.image.source https://github.com/flyteorg/flytekit

WORKDIR /root

ARG VERSION

RUN apt-get update && apt-get install build-essential vim -y

COPY . /flytekit

# Pod tasks should be exposed in the default image
RUN pip install -e /flytekit
RUN pip install -e /flytekit/plugins/flytekit-k8s-pod
RUN pip install -e /flytekit/plugins/flytekit-deck-standard
RUN pip install scikit-learn

ENV PYTHONPATH "/flytekit:/flytekit/plugins/flytekit-k8s-pod:/flytekit/plugins/flytekit-deck-standard:"

RUN useradd -u 1000 flytekit
RUN chown flytekit: /root
USER flytekit
10 changes: 8 additions & 2 deletions dev-requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,16 @@ IPython
keyrings.alt

# Only install tensorflow if not running on an arm Mac.
tensorflow==2.8.1; platform_machine!='arm64' or platform_system!='Darwin'
tensorflow==2.8.1; python_version<'3.11' and (platform_machine!='arm64' or platform_system!='Darwin')
# Tensorflow release candidate supports python 3.11
tensorflow==2.12.0rc1; python_version>='3.11' and (platform_machine!='arm64' or platform_system!='Darwin')

# Newer versions of torch bring in nvidia dependencies that are not present in windows, so
# we put this constraint while we do not have per-environment requirements files
torch<=1.12.1
torch<=1.12.1; python_version<'3.11'
# pytorch 2 supports python 3.11
torch<=2.0.0; python_version>='3.11' or platform_system!='Windows'

scikit-learn
types-protobuf
types-croniter
Expand Down
4 changes: 2 additions & 2 deletions doc-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ flask==2.2.3
# via mlflow
flatbuffers==23.1.21
# via tensorflow
flyteidl==1.3.7
flyteidl==1.3.12
# via flytekit
fonttools==4.38.0
# via matplotlib
Expand All @@ -216,7 +216,7 @@ frozenlist==1.3.3
# via
# aiosignal
# ray
fsspec==2023.1.0
fsspec==2023.3.0
# via
# -r doc-requirements.in
# dask
Expand Down
2 changes: 0 additions & 2 deletions flytekit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@
from flytekit.core.condition import conditional
from flytekit.core.container_task import ContainerTask
from flytekit.core.context_manager import ExecutionParameters, FlyteContext, FlyteContextManager
from flytekit.core.data_persistence import DataPersistence, DataPersistencePlugins
from flytekit.core.dynamic_workflow_task import dynamic
from flytekit.core.gate import approve, sleep, wait_for_input
from flytekit.core.hash import HashMethod
Expand All @@ -223,7 +222,6 @@
from flytekit.core.workflow import WorkflowFailurePolicy, reference_workflow, workflow
from flytekit.deck import Deck
from flytekit.extras import pytorch, sklearn, tensorflow
from flytekit.extras.persistence import GCSPersistence, HttpPersistence, S3Persistence
from flytekit.loggers import logger
from flytekit.models.common import Annotations, AuthRole, Labels
from flytekit.models.core.execution import WorkflowExecutionPhase
Expand Down
4 changes: 3 additions & 1 deletion flytekit/clients/auth/auth_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,11 @@ def _credentials_from_response(self, auth_token_resp) -> Credentials:
raise ValueError('Expected "access_token" in response from oauth server')
if "refresh_token" in response_body:
refresh_token = response_body["refresh_token"]
if "expires_in" in response_body:
expires_in = response_body["expires_in"]
access_token = response_body["access_token"]

return Credentials(access_token, refresh_token, self._endpoint)
return Credentials(access_token, refresh_token, self._endpoint, expires_in=expires_in)

def _request_access_token(self, auth_code) -> Credentials:
if self._state != auth_code.state:
Expand Down
108 changes: 57 additions & 51 deletions flytekit/clients/auth/authenticator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import base64
import logging
import subprocess
import typing
from abc import abstractmethod
from dataclasses import dataclass

import requests

from . import token_client
from .auth_client import AuthorizationClient
from .exceptions import AccessTokenNotFoundError, AuthenticationError
from .keyring import Credentials, KeyringStore
Expand All @@ -22,6 +20,7 @@ class ClientConfig:
authorization_endpoint: str
redirect_uri: str
client_id: str
device_authorization_endpoint: typing.Optional[str] = None
scopes: typing.List[str] = None
header_key: str = "authorization"

Expand Down Expand Up @@ -155,67 +154,25 @@ class ClientCredentialsAuthenticator(Authenticator):
This Authenticator uses ClientId and ClientSecret to authenticate
"""

_utf_8 = "utf-8"

def __init__(
self,
endpoint: str,
client_id: str,
client_secret: str,
cfg_store: ClientConfigStore,
header_key: str = None,
header_key: typing.Optional[str] = None,
scopes: typing.Optional[typing.List[str]] = None,
):
if not client_id or not client_secret:
raise ValueError("Client ID and Client SECRET both are required.")
cfg = cfg_store.get_client_config()
self._token_endpoint = cfg.token_endpoint
self._scopes = cfg.scopes
# Use scopes from `flytekit.configuration.PlatformConfig` if passed
self._scopes = scopes or cfg.scopes
self._client_id = client_id
self._client_secret = client_secret
super().__init__(endpoint, cfg.header_key or header_key)

@staticmethod
def get_token(token_endpoint: str, authorization_header: str, scopes: typing.List[str]) -> typing.Tuple[str, int]:
"""
:rtype: (Text,Int) The first element is the access token retrieved from the IDP, the second is the expiration
in seconds
"""
headers = {
"Authorization": authorization_header,
"Cache-Control": "no-cache",
"Accept": "application/json",
"Content-Type": "application/x-www-form-urlencoded",
}
body = {
"grant_type": "client_credentials",
}
if scopes is not None:
body["scope"] = ",".join(scopes)
response = requests.post(token_endpoint, data=body, headers=headers)
if response.status_code != 200:
logging.error("Non-200 ({}) received from IDP: {}".format(response.status_code, response.text))
raise AuthenticationError("Non-200 received from IDP")

response = response.json()
return response["access_token"], response["expires_in"]

@staticmethod
def get_basic_authorization_header(client_id: str, client_secret: str) -> str:
"""
This function transforms the client id and the client secret into a header that conforms with http basic auth.
It joins the id and the secret with a : then base64 encodes it, then adds the appropriate text
:param client_id: str
:param client_secret: str
:rtype: str
"""
concated = "{}:{}".format(client_id, client_secret)
return "Basic {}".format(
base64.b64encode(concated.encode(ClientCredentialsAuthenticator._utf_8)).decode(
ClientCredentialsAuthenticator._utf_8
)
)

def refresh_credentials(self):
"""
This function is used by the _handle_rpc_error() decorator, depending on the AUTH_MODE config object. This handler
Expand All @@ -229,7 +186,56 @@ def refresh_credentials(self):

# Note that unlike the Pkce flow, the client ID does not come from Admin.
logging.debug(f"Basic authorization flow with client id {self._client_id} scope {scopes}")
authorization_header = self.get_basic_authorization_header(self._client_id, self._client_secret)
token, expires_in = self.get_token(token_endpoint, authorization_header, scopes)
authorization_header = token_client.get_basic_authorization_header(self._client_id, self._client_secret)
token, expires_in = token_client.get_token(token_endpoint, scopes, authorization_header)
logging.info("Retrieved new token, expires in {}".format(expires_in))
self._creds = Credentials(token)


class DeviceCodeAuthenticator(Authenticator):
"""
This Authenticator implements the Device Code authorization flow useful for headless user authentication.
Examples described
- https://developer.okta.com/docs/guides/device-authorization-grant/main/
- https://auth0.com/docs/get-started/authentication-and-authorization-flow/device-authorization-flow#device-flow
"""

def __init__(
self,
endpoint: str,
cfg_store: ClientConfigStore,
header_key: typing.Optional[str] = None,
audience: typing.Optional[str] = None,
):
self._audience = audience
cfg = cfg_store.get_client_config()
self._client_id = cfg.client_id
self._device_auth_endpoint = cfg.device_authorization_endpoint
self._scope = cfg.scopes
self._token_endpoint = cfg.token_endpoint
if self._device_auth_endpoint is None:
raise AuthenticationError(
"Device Authentication is not available on the Flyte backend / authentication server"
)
super().__init__(
endpoint=endpoint, header_key=header_key or cfg.header_key, credentials=KeyringStore.retrieve(endpoint)
)

def refresh_credentials(self):
resp = token_client.get_device_code(self._device_auth_endpoint, self._client_id, self._audience, self._scope)
print(
f"""
To Authenticate navigate in a browser to the following URL: {resp.verification_uri} and enter code: {resp.user_code}
OR copy paste the following URL: {resp.verification_uri_complete}
"""
)
try:
# Currently the refresh token is not retreived. We may want to add support for refreshTokens so that
# access tokens can be refreshed for once authenticated machines
token, expires_in = token_client.poll_token_endpoint(resp, self._token_endpoint, client_id=self._client_id)
self._creds = Credentials(access_token=token, expires_in=expires_in, for_endpoint=self._endpoint)
KeyringStore.store(self._creds)
except Exception:
KeyringStore.delete(self._endpoint)
raise
Loading

0 comments on commit fee1b22

Please sign in to comment.