Merge branch 'master' of github.com:flyteorg/flytekit into map-container

flyteorg · Mar 29, 2023 · fee1b22 · fee1b22
2 parents a48e7f0 + 12be8d8
commit fee1b22
Show file tree

Hide file tree

Showing 127 changed files with 3,454 additions and 3,386 deletions.
diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest]
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         exclude:
           # Ignore this test because we failed to install docker-py
           # docker-py will install pywin32==227, whereas pywin only added support for python 3.10 in version 301.
@@ -53,7 +53,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         plugin-names:
           # Please maintain an alphabetical order in the following list
           - flytekit-aws-athena
@@ -72,6 +72,7 @@ jobs:
           - flytekit-kf-mpi
           - flytekit-kf-pytorch
           - flytekit-kf-tensorflow
+          - flytekit-mlflow
           - flytekit-modin
           - flytekit-onnx-pytorch
           - flytekit-onnx-scikitlearn
@@ -88,13 +89,16 @@ jobs:
           - flytekit-vaex
           - flytekit-whylogs
         exclude:
-          # flytekit-modin depends on ray which does not have a 3.10 wheel yet.
-          # Issue tracked in https://github.com/ray-project/ray/issues/19116.
-          - python-version: 3.10
+          # flytekit-modin depends on ray which does not have a 3.11 wheel yet.
+          # Issue tracked in https://github.com/ray-project/ray/issues/27881
+          - python-version: 3.11
             plugin-names: "flytekit-modin"
-          # Great-expectations does not support python 3.10 yet
-          # https://github.com/great-expectations/great_expectations/blob/develop/setup.py#L87-L89
-          - python-version: 3.10
+          - python-version: 3.11
+            plugin-names: "flytekit-ray"
+          # Great-expectations does not support python 3.11 due to sqlachemy>=2.0.0
+          # not being supported yet:
+          # https://github.com/great-expectations/great_expectations/issues/7020
+          - python-version: 3.11
             plugin-names: "flytekit-greatexpectations"
           # onnxruntime does not support python 3.10 yet
           # https://github.com/microsoft/onnxruntime/issues/9782
@@ -104,10 +108,29 @@ jobs:
             plugin-names: "flytekit-onnx-scikitlearn"
           - python-version: 3.10
             plugin-names: "flytekit-onnx-tensorflow"
-          # whylogs-sketching library does not have a 3.10 build yet
-          # Issue tracked: https://github.com/whylabs/whylogs/issues/697
-          - python-version: 3.10
+          - python-version: 3.11
+            plugin-names: "flytekit-onnx-pytorch"
+          - python-version: 3.11
+            plugin-names: "flytekit-onnx-scikitlearn"
+          - python-version: 3.11
+            plugin-names: "flytekit-onnx-tensorflow"
+          # numba, a dependency of mlflow, doesn't support python 3.11
+          # https://github.com/numba/numba/issues/8304
+          - python-version: 3.11
+            plugin-names: "flytekit-mlflow"
+          # pyspark 3.4.0 will support python 3.11, which isn't available yet
+          # Issue tracked in: https://github.com/apache/spark/pull/38987
+          - python-version: 3.11
+            plugin-names: "flytekit-spark"
+          # vaex currently doesn't support python 3.11
+          - python-version: 3.11
+            plugin-names: "flytekit-vaex"
+          # whylogs does support python 3.11 dataclass restrictions
+          # See: https://github.com/flyteorg/flytekit/actions/runs/4493746408/jobs/7905368664
+          - python-version: 3.11
             plugin-names: "flytekit-whylogs"
+
+
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}
@@ -134,7 +157,7 @@ jobs:
           cd plugins/${{ matrix.plugin-names }}
           # onnx plugins does not support protobuf>4 yet (in fact it is tensorflow that
           # does not support that yet). More details in https://github.com/onnx/onnx/issues/4239.
-          if [[ ${{ matrix.plugin-names }} == *"onnx"* || ${{ matrix.plugin-names }} == "flytekit-whylogs" ]]; then
+          if [[ ${{ matrix.plugin-names }} == *"onnx"* || ${{ matrix.plugin-names }} == "flytekit-whylogs" || ${{ matrix.plugin-names }} == "flytekit-mlflow" ]]; then
              PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python coverage run -m pytest tests
           else
              coverage run -m pytest tests

diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml
@@ -47,11 +47,28 @@ jobs:
         run: |
           make -C plugins build_all_plugins
           make -C plugins publish_all_plugins
-      # Added sleep because PYPI take some time in publish
-      - name: Sleep for 180 seconds
-        uses: jakejarvis/wait-action@master
-        with:
-          time: '180s'
+      - name: Sleep until pypi is available
+        id: pypiwait
+        run: |
+          # from refs/tags/v1.2.3 get 1.2.3 and make sure it's not an empty string
+          VERSION=$(echo $GITHUB_REF | sed 's#.*/v##')
+          if [ -z "$VERSION" ]
+          then
+            echo "No tagged version found, exiting"
+            exit 1
+          fi
+          LINK="https://pypi.org/project/flytekit/${VERSION}"
+          for i in {1..60}; do
+            if curl -L -I -s -f ${LINK} >/dev/null; then
+              echo "Found pypi"
+              exit 0
+            else
+              echo "Did not find - Retrying in 10 seconds..."
+              sleep 10
+            fi
+          done
+          exit 1
+        shell: bash
     outputs:
       version: ${{ steps.bump.outputs.version }}
 
@@ -60,7 +77,7 @@ jobs:
     needs: deploy
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - uses: actions/checkout@v2
         with:

diff --git a/Dockerfile b/Dockerfile
@@ -16,8 +16,6 @@ RUN apt-get update && apt-get install build-essential -y
 RUN pip install -U flytekit==$VERSION \
 	flytekitplugins-pod==$VERSION \
 	flytekitplugins-deck-standard==$VERSION \
-	flytekitplugins-data-fsspec[aws]==$VERSION \
-	flytekitplugins-data-fsspec[gcp]==$VERSION \
 	scikit-learn
 
 RUN useradd -u 1000 flytekit

diff --git a/Dockerfile.dev b/Dockerfile.dev
@@ -0,0 +1,32 @@
+# This Dockerfile is here to help with end-to-end testing
+# From flytekit
+# $ docker build -f Dockerfile.dev --build-arg PYTHON_VERSION=3.10 -t localhost:30000/flytekittest:someversion .
+# $ docker push localhost:30000/flytekittest:someversion
+# From your test user code
+# $ pyflyte run --image localhost:30000/flytekittest:someversion
+
+ARG PYTHON_VERSION
+FROM python:${PYTHON_VERSION}-slim-buster
+
+MAINTAINER Flyte Team <[email protected]>
+LABEL org.opencontainers.image.source https://github.com/flyteorg/flytekit
+
+WORKDIR /root
+
+ARG VERSION
+
+RUN apt-get update && apt-get install build-essential vim -y
+
+COPY . /flytekit
+
+# Pod tasks should be exposed in the default image
+RUN pip install -e /flytekit
+RUN pip install -e /flytekit/plugins/flytekit-k8s-pod
+RUN pip install -e /flytekit/plugins/flytekit-deck-standard
+RUN pip install scikit-learn
+
+ENV PYTHONPATH "/flytekit:/flytekit/plugins/flytekit-k8s-pod:/flytekit/plugins/flytekit-deck-standard:"
+
+RUN useradd -u 1000 flytekit
+RUN chown flytekit: /root
+USER flytekit
diff --git a/dev-requirements.in b/dev-requirements.in
@@ -15,10 +15,16 @@ IPython
 keyrings.alt
 
 # Only install tensorflow if not running on an arm Mac.
-tensorflow==2.8.1; platform_machine!='arm64' or platform_system!='Darwin'
+tensorflow==2.8.1; python_version<'3.11' and (platform_machine!='arm64' or platform_system!='Darwin')
+# Tensorflow release candidate supports python 3.11
+tensorflow==2.12.0rc1; python_version>='3.11' and (platform_machine!='arm64' or platform_system!='Darwin')
+
 # Newer versions of torch bring in nvidia dependencies that are not present in windows, so
 # we put this constraint while we do not have per-environment requirements files
-torch<=1.12.1
+torch<=1.12.1; python_version<'3.11'
+# pytorch 2 supports python 3.11
+torch<=2.0.0; python_version>='3.11' or platform_system!='Windows'
+
 scikit-learn
 types-protobuf
 types-croniter

diff --git a/doc-requirements.txt b/doc-requirements.txt
@@ -204,7 +204,7 @@ flask==2.2.3
     # via mlflow
 flatbuffers==23.1.21
     # via tensorflow
-flyteidl==1.3.7
+flyteidl==1.3.12
     # via flytekit
 fonttools==4.38.0
     # via matplotlib
@@ -216,7 +216,7 @@ frozenlist==1.3.3
     # via
     #   aiosignal
     #   ray
-fsspec==2023.1.0
+fsspec==2023.3.0
     # via
     #   -r doc-requirements.in
     #   dask

diff --git a/flytekit/__init__.py b/flytekit/__init__.py
@@ -205,7 +205,6 @@
 from flytekit.core.condition import conditional
 from flytekit.core.container_task import ContainerTask
 from flytekit.core.context_manager import ExecutionParameters, FlyteContext, FlyteContextManager
-from flytekit.core.data_persistence import DataPersistence, DataPersistencePlugins
 from flytekit.core.dynamic_workflow_task import dynamic
 from flytekit.core.gate import approve, sleep, wait_for_input
 from flytekit.core.hash import HashMethod
@@ -223,7 +222,6 @@
 from flytekit.core.workflow import WorkflowFailurePolicy, reference_workflow, workflow
 from flytekit.deck import Deck
 from flytekit.extras import pytorch, sklearn, tensorflow
-from flytekit.extras.persistence import GCSPersistence, HttpPersistence, S3Persistence
 from flytekit.loggers import logger
 from flytekit.models.common import Annotations, AuthRole, Labels
 from flytekit.models.core.execution import WorkflowExecutionPhase

diff --git a/flytekit/clients/auth/auth_client.py b/flytekit/clients/auth/auth_client.py
@@ -269,9 +269,11 @@ def _credentials_from_response(self, auth_token_resp) -> Credentials:
             raise ValueError('Expected "access_token" in response from oauth server')
         if "refresh_token" in response_body:
             refresh_token = response_body["refresh_token"]
+        if "expires_in" in response_body:
+            expires_in = response_body["expires_in"]
         access_token = response_body["access_token"]
 
-        return Credentials(access_token, refresh_token, self._endpoint)
+        return Credentials(access_token, refresh_token, self._endpoint, expires_in=expires_in)
 
     def _request_access_token(self, auth_code) -> Credentials:
         if self._state != auth_code.state:

diff --git a/flytekit/clients/auth/authenticator.py b/flytekit/clients/auth/authenticator.py
@@ -1,12 +1,10 @@
-import base64
 import logging
 import subprocess
 import typing
 from abc import abstractmethod
 from dataclasses import dataclass
 
-import requests
-
+from . import token_client
 from .auth_client import AuthorizationClient
 from .exceptions import AccessTokenNotFoundError, AuthenticationError
 from .keyring import Credentials, KeyringStore
@@ -22,6 +20,7 @@ class ClientConfig:
     authorization_endpoint: str
     redirect_uri: str
     client_id: str
+    device_authorization_endpoint: typing.Optional[str] = None
     scopes: typing.List[str] = None
     header_key: str = "authorization"
 
@@ -155,67 +154,25 @@ class ClientCredentialsAuthenticator(Authenticator):
     This Authenticator uses ClientId and ClientSecret to authenticate
     """
 
-    _utf_8 = "utf-8"
-
     def __init__(
         self,
         endpoint: str,
         client_id: str,
         client_secret: str,
         cfg_store: ClientConfigStore,
-        header_key: str = None,
+        header_key: typing.Optional[str] = None,
+        scopes: typing.Optional[typing.List[str]] = None,
     ):
         if not client_id or not client_secret:
             raise ValueError("Client ID and Client SECRET both are required.")
         cfg = cfg_store.get_client_config()
         self._token_endpoint = cfg.token_endpoint
-        self._scopes = cfg.scopes
+        # Use scopes from `flytekit.configuration.PlatformConfig` if passed
+        self._scopes = scopes or cfg.scopes
         self._client_id = client_id
         self._client_secret = client_secret
         super().__init__(endpoint, cfg.header_key or header_key)
 
-    @staticmethod
-    def get_token(token_endpoint: str, authorization_header: str, scopes: typing.List[str]) -> typing.Tuple[str, int]:
-        """
-        :rtype: (Text,Int) The first element is the access token retrieved from the IDP, the second is the expiration
-                in seconds
-        """
-        headers = {
-            "Authorization": authorization_header,
-            "Cache-Control": "no-cache",
-            "Accept": "application/json",
-            "Content-Type": "application/x-www-form-urlencoded",
-        }
-        body = {
-            "grant_type": "client_credentials",
-        }
-        if scopes is not None:
-            body["scope"] = ",".join(scopes)
-        response = requests.post(token_endpoint, data=body, headers=headers)
-        if response.status_code != 200:
-            logging.error("Non-200 ({}) received from IDP: {}".format(response.status_code, response.text))
-            raise AuthenticationError("Non-200 received from IDP")
-
-        response = response.json()
-        return response["access_token"], response["expires_in"]
-
-    @staticmethod
-    def get_basic_authorization_header(client_id: str, client_secret: str) -> str:
-        """
-        This function transforms the client id and the client secret into a header that conforms with http basic auth.
-        It joins the id and the secret with a : then base64 encodes it, then adds the appropriate text
-
-        :param client_id: str
-        :param client_secret: str
-        :rtype: str
-        """
-        concated = "{}:{}".format(client_id, client_secret)
-        return "Basic {}".format(
-            base64.b64encode(concated.encode(ClientCredentialsAuthenticator._utf_8)).decode(
-                ClientCredentialsAuthenticator._utf_8
-            )
-        )
-
     def refresh_credentials(self):
         """
         This function is used by the _handle_rpc_error() decorator, depending on the AUTH_MODE config object. This handler
@@ -229,7 +186,56 @@ def refresh_credentials(self):
 
         # Note that unlike the Pkce flow, the client ID does not come from Admin.
         logging.debug(f"Basic authorization flow with client id {self._client_id} scope {scopes}")
-        authorization_header = self.get_basic_authorization_header(self._client_id, self._client_secret)
-        token, expires_in = self.get_token(token_endpoint, authorization_header, scopes)
+        authorization_header = token_client.get_basic_authorization_header(self._client_id, self._client_secret)
+        token, expires_in = token_client.get_token(token_endpoint, scopes, authorization_header)
         logging.info("Retrieved new token, expires in {}".format(expires_in))
         self._creds = Credentials(token)
+
+
+class DeviceCodeAuthenticator(Authenticator):
+    """
+    This Authenticator implements the Device Code authorization flow useful for headless user authentication.
+
+    Examples described
+    - https://developer.okta.com/docs/guides/device-authorization-grant/main/
+    - https://auth0.com/docs/get-started/authentication-and-authorization-flow/device-authorization-flow#device-flow
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        cfg_store: ClientConfigStore,
+        header_key: typing.Optional[str] = None,
+        audience: typing.Optional[str] = None,
+    ):
+        self._audience = audience
+        cfg = cfg_store.get_client_config()
+        self._client_id = cfg.client_id
+        self._device_auth_endpoint = cfg.device_authorization_endpoint
+        self._scope = cfg.scopes
+        self._token_endpoint = cfg.token_endpoint
+        if self._device_auth_endpoint is None:
+            raise AuthenticationError(
+                "Device Authentication is not available on the Flyte backend / authentication server"
+            )
+        super().__init__(
+            endpoint=endpoint, header_key=header_key or cfg.header_key, credentials=KeyringStore.retrieve(endpoint)
+        )
+
+    def refresh_credentials(self):
+        resp = token_client.get_device_code(self._device_auth_endpoint, self._client_id, self._audience, self._scope)
+        print(
+            f"""
+To Authenticate navigate in a browser to the following URL: {resp.verification_uri} and enter code: {resp.user_code}
+OR copy paste the following URL: {resp.verification_uri_complete}
+        """
+        )
+        try:
+            # Currently the refresh token is not retreived. We may want to add support for refreshTokens so that
+            # access tokens can be refreshed for once authenticated machines
+            token, expires_in = token_client.poll_token_endpoint(resp, self._token_endpoint, client_id=self._client_id)
+            self._creds = Credentials(access_token=token, expires_in=expires_in, for_endpoint=self._endpoint)
+            KeyringStore.store(self._creds)
+        except Exception:
+            KeyringStore.delete(self._endpoint)
+            raise