From 540ba580991e339130941d7369baeffd71d5be7c Mon Sep 17 00:00:00 2001 From: testinnplayin Date: Mon, 21 Sep 2020 09:22:35 +0200 Subject: [PATCH] feat(gsheets2): passed secrets as param (#209) * feat(gsheets2): passed secrets as param * v0.41.2 --- doc/connectors/google_sheets_2.md | 12 ++- setup.py | 2 +- tests/google_sheets_2/test_google_sheets_2.py | 80 +++++++++---------- tests/test_connector.py | 36 ++++++++- toucan_connectors/__init__.py | 2 +- .../google_sheets_2_connector.py | 48 +++++------ toucan_connectors/toucan_connector.py | 21 +++-- 7 files changed, 121 insertions(+), 80 deletions(-) diff --git a/doc/connectors/google_sheets_2.md b/doc/connectors/google_sheets_2.md index ee4c438a0..aa934ed60 100644 --- a/doc/connectors/google_sheets_2.md +++ b/doc/connectors/google_sheets_2.md @@ -4,16 +4,13 @@ * `type`: `"GoogleSheets2"` * `name`: str, required -* `auth_flow`: str +* `_auth_flow`: str * `auth_flow_id`: str -* `baseroute`: str -* `secrets`: dict +* `_baseroute`: str -The `auth_flow` property marks this as being a connector that uses the connector_oauth_manager for the oauth dance. +The `_auth_flow` property marks this as being a connector that requires initiating the oauth dance and prevents it from being in the schema. -The `baseroute` is fixed and is 'https://sheets.googleapis.com/v4/spreadsheets/'. - -The `secrets` dictionary contains the `access_token` and a `refresh_token` (if there is one). Though `secrets` is optional during the initial creation of the connector, it is necessary for when the user wants to make requests to the connector. If there is no `access_token`, an Exception is thrown. +The `_baseroute` is fixed and is 'https://sheets.googleapis.com/v4/spreadsheets/'. This is also hidden from rendering. The `auth_flow_id` property is like an identifier that is used to identify the secrets associated with the connector. @@ -22,6 +19,7 @@ The `auth_flow_id` property is like an identifier that is used to identify the s DATA_PROVIDERS: [ type: 'GoogleSheets' name: '' + auth_flow_id: '' , ... ] diff --git a/setup.py b/setup.py index 0c884855b..2753e2616 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ def get_static_file_paths(): setup( name='toucan_connectors', - version='0.41.1', + version='0.41.2', description='Toucan Toco Connectors', long_description=(HERE / 'README.md').read_text(encoding='utf-8'), long_description_content_type='text/markdown', diff --git a/tests/google_sheets_2/test_google_sheets_2.py b/tests/google_sheets_2/test_google_sheets_2.py index 4032eb1d0..be3164f54 100644 --- a/tests/google_sheets_2/test_google_sheets_2.py +++ b/tests/google_sheets_2/test_google_sheets_2.py @@ -1,3 +1,4 @@ +from functools import partial from unittest.mock import Mock import pytest @@ -8,6 +9,7 @@ from toucan_connectors.google_sheets_2.google_sheets_2_connector import ( GoogleSheets2Connector, GoogleSheets2DataSource, + NoCredentialsError, ) import_path = 'toucan_connectors.google_sheets_2.google_sheets_2_connector' @@ -18,12 +20,6 @@ def con(): return GoogleSheets2Connector(name='test_name') -@fixture -def con_with_secrets(con): - con.set_secrets({'access_token': 'foo', 'refresh_token': None}) - return con - - @fixture def ds(): return GoogleSheets2DataSource( @@ -43,6 +39,14 @@ def ds_without_sheet(): ) +@fixture +def fake_kwargs(): + def fake_fetch_secrets(small_app_id, connector_type, auth_flow_id): + return {'access_token': 'myaccesstoken'} + + return {'secrets': partial(fake_fetch_secrets, 'laputa', 'GoogleSheets2')} + + FAKE_SHEET = { 'metadata': '...', 'values': [['country', 'city'], ['France', 'Paris'], ['England', 'London']], @@ -85,13 +89,14 @@ def get_columns_in_schema(schema): return None -def test_get_form_with_secrets(mocker, con_with_secrets, ds): +def test_get_form_with_secrets(mocker, con, ds, fake_kwargs): """It should return a list of spreadsheet titles.""" mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=FAKE_SHEET_LIST_RESPONSE) result = ds.get_form( - connector=con_with_secrets, + connector=con, current_config={'spreadsheet_id': '1SMnhnmBm-Tup3SfhS03McCf6S4pS2xqjI6CAXSSBpHU'}, + **fake_kwargs, ) expected_results = ['Foo', 'Bar', 'Baz'] assert get_columns_in_schema(result) == expected_results @@ -107,50 +112,41 @@ def test_get_form_no_secrets(mocker, con, ds): assert not get_columns_in_schema(result) -def test_set_secrets(mocker, con): - """It should set secrets on the connector.""" - spy = mocker.spy(GoogleSheets2Connector, 'set_secrets') - fake_secrets = { - 'access_token': 'myaccesstoken', - 'refresh_token': None, - } - con.set_secrets(fake_secrets) - - assert con.secrets == fake_secrets - spy.assert_called_once_with(con, fake_secrets) - - -def test_spreadsheet_success(mocker, con_with_secrets, ds): +def test_spreadsheet_success(mocker, con, ds, fake_kwargs): """It should return a spreadsheet.""" mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=FAKE_SHEET) - df = con_with_secrets.get_df(ds) + df = con.get_df(ds, **fake_kwargs) assert df.shape == (2, 2) assert df.columns.tolist() == ['country', 'city'] ds.header_row = 1 - df = con_with_secrets.get_df(ds) + df = con.get_df(ds, **fake_kwargs) assert df.shape == (1, 2) assert df.columns.tolist() == ['France', 'Paris'] def test_spreadsheet_no_secrets(mocker, con, ds): - """It should raise an exception if there no secrets passed or no access token.""" + """It should raise an exception if there are no secrets returned or any document in database.""" mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=FAKE_SHEET) - - with pytest.raises(Exception) as err: - con.get_df(ds) + bogus_fake_kwargs = {'secrets': None} + with pytest.raises(NoCredentialsError) as err: + con.get_df(ds, **bogus_fake_kwargs) assert str(err.value) == 'No credentials' - con.set_secrets({'refresh_token': None}) + # Function that returns an empty dict, as if when no document is found in database + def fake_fetch_secrets(small_app_id, connector_type, auth_flow_id): + return {} + + empty_secrets_kwargs = {'secrets': partial(fake_fetch_secrets, 'laputa', 'GoogleSheets2')} - with pytest.raises(KeyError): - con.get_df(ds) + with pytest.raises(NoCredentialsError): + con.get_df(ds, **empty_secrets_kwargs) -def test_set_columns(mocker, con_with_secrets, ds): +def test_set_columns(mocker, con, ds, fake_kwargs): """It should return a well-formed column set.""" fake_results = { 'metadata': '...', @@ -158,7 +154,7 @@ def test_set_columns(mocker, con_with_secrets, ds): } mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=fake_results) - df = con_with_secrets.get_df(ds) + df = con.get_df(ds, **fake_kwargs) assert df.to_dict() == { 'Animateur': {1: 'pika', 2: 'bulbi'}, 1: {1: '', 2: ''}, @@ -178,7 +174,7 @@ def test__run_fetch(mocker, con): assert result == FAKE_SHEET -def test_spreadsheet_without_sheet(mocker, con_with_secrets, ds_without_sheet): +def test_spreadsheet_without_sheet(mocker, con, ds_without_sheet, fake_kwargs): """ It should retrieve the first sheet of the spreadsheet if no sheet has been indicated """ @@ -192,7 +188,7 @@ def mock_api_responses(uri: str, _token): fetch_mock: Mock = mocker.patch.object( GoogleSheets2Connector, '_run_fetch', side_effect=mock_api_responses ) - df = con_with_secrets.get_df(ds_without_sheet) + df = con.get_df(ds_without_sheet, **fake_kwargs) assert fetch_mock.call_count == 2 assert ( @@ -215,27 +211,27 @@ def test_get_status_no_secrets(mocker, con): assert con.get_status().status is False -def test_get_status_success(mocker, con_with_secrets): +def test_get_status_success(mocker, con, fake_kwargs): """ - It should fail if no secrets are provided + It should fail if no secrets are provided. """ fetch_mock: Mock = mocker.patch.object( GoogleSheets2Connector, '_run_fetch', return_value={'email': 'foo@bar.baz'} ) - connector_status = con_with_secrets.get_status() + connector_status = con.get_status(**fake_kwargs) assert connector_status.status is True assert 'foo@bar.baz' in connector_status.message fetch_mock.assert_called_once_with( - 'https://www.googleapis.com/oauth2/v2/userinfo?alt=json', 'foo' + 'https://www.googleapis.com/oauth2/v2/userinfo?alt=json', 'myaccesstoken' ) -def test_get_status_api_down(mocker, con_with_secrets): +def test_get_status_api_down(mocker, con, fake_kwargs): """ - It should fail if no secrets are provided + It should fail if the third-party api is down. """ mocker.patch.object(GoogleSheets2Connector, '_run_fetch', side_effect=HttpError) - assert con_with_secrets.get_status().status is False + assert con.get_status(**fake_kwargs).status is False diff --git a/tests/test_connector.py b/tests/test_connector.py index bcfddb163..c8e646fde 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -6,7 +6,12 @@ from pydantic import create_model from toucan_connectors.common import ConnectorStatus -from toucan_connectors.toucan_connector import ToucanConnector, ToucanDataSource, strlist_to_enum +from toucan_connectors.toucan_connector import ( + RetryPolicy, + ToucanConnector, + ToucanDataSource, + strlist_to_enum, +) class DataSource(ToucanDataSource): @@ -99,6 +104,35 @@ def _retrieve_data(self, datasource): assert res.total_count == 5 +def test_get_slice_w_secrets(mocker): + """It should pass secrets on in kwargs if an auth flow connector.""" + + class AuthFlowDataConnector(ToucanConnector): + type = 'MyAuthFlow' + data_source_model = 'asd' + _auth_flow = 'oauth2' + + def _retrieve_data(self, datasource, **kwargs): + return pd.DataFrame({'foo': ['bar', 'baz']}) + + spy = mocker.spy(AuthFlowDataConnector, '_retrieve_data') + fake_kwargs = {'secrets': 'secretsecrets'} + connector = AuthFlowDataConnector(name='my_connector') + res = connector.get_slice({}, **fake_kwargs) + assert res.total_count == 2 + spy.assert_called_once_with( + AuthFlowDataConnector( + name='my_connector', + retry_policy=RetryPolicy( + max_attempts=1, max_delay=0.0, wait_time=0.0, retry_on=(), logger=None + ), + type='MyAuthFlow', + ), + {}, + **fake_kwargs, + ) + + def test_explain(): class DataConnector(ToucanConnector): type = 'MyDB' diff --git a/toucan_connectors/__init__.py b/toucan_connectors/__init__.py index 98c483bba..15fbf9f1d 100644 --- a/toucan_connectors/__init__.py +++ b/toucan_connectors/__init__.py @@ -194,7 +194,7 @@ def html_base64_image_src(image_path: str) -> str: with suppress(AttributeError): connector_infos['bearer_integration'] = connector_cls.bearer_integration with suppress(AttributeError): - connector_infos['auth_flow'] = connector_cls.auth_flow + connector_infos['_auth_flow'] = connector_cls._auth_flow # check if connector implements `get_status`, # which is hence different from `ToucanConnector.get_status` connector_infos['hasStatusCheck'] = ( diff --git a/toucan_connectors/google_sheets_2/google_sheets_2_connector.py b/toucan_connectors/google_sheets_2/google_sheets_2_connector.py index 36c3955d1..50ca9a304 100644 --- a/toucan_connectors/google_sheets_2/google_sheets_2_connector.py +++ b/toucan_connectors/google_sheets_2/google_sheets_2_connector.py @@ -13,6 +13,10 @@ from toucan_connectors.toucan_connector import ToucanConnector, ToucanDataSource, strlist_to_enum +class NoCredentialsError(Exception): + """Raised when no secrets avaiable.""" + + class GoogleSheets2DataSource(ToucanDataSource): """ Google Spreadsheet 2 data source class. @@ -37,14 +41,15 @@ class GoogleSheets2DataSource(ToucanDataSource): ) @classmethod - def get_form(cls, connector: 'GoogleSheets2Connector', current_config): + def get_form(cls, connector: 'GoogleSheets2Connector', current_config, **kwargs): """Retrieve a form filled with suggestions of available sheets.""" # Always add the suggestions for the available sheets constraints = {} with suppress(Exception): partial_endpoint = current_config['spreadsheet_id'] - final_url = f'{connector.baseroute}{partial_endpoint}' - data = connector._run_fetch(final_url, connector.secrets['access_token']) + final_url = f'{connector._baseroute}{partial_endpoint}' + secrets = kwargs.get('secrets')(connector.auth_flow_id) + data = connector._run_fetch(final_url, secrets['access_token']) available_sheets = [str(x['properties']['title']) for x in data['sheets']] constraints['sheet'] = strlist_to_enum('sheet', available_sheets) @@ -59,15 +64,12 @@ class GoogleSheets2Connector(ToucanConnector): data_source_model: GoogleSheets2DataSource - auth_flow = 'oauth2' + _auth_flow = 'oauth2' auth_flow_id: Optional[str] - # The following should be hidden properties - - baseroute = 'https://sheets.googleapis.com/v4/spreadsheets/' - - secrets: Optional[Secrets] + # TODO: turn into a class property + _baseroute = 'https://sheets.googleapis.com/v4/spreadsheets/' async def _authentified_fetch(self, url, access_token): """Build the final request along with headers.""" @@ -76,38 +78,36 @@ async def _authentified_fetch(self, url, access_token): async with ClientSession(headers=headers) as session: return await fetch(url, session) - def set_secrets(self, secrets: Secrets): - """Set the secrets from inside the main service.""" - self.secrets = secrets - def _run_fetch(self, url, access_token): """Run loop.""" loop = get_loop() future = asyncio.ensure_future(self._authentified_fetch(url, access_token)) return loop.run_until_complete(future) - def _retrieve_data(self, data_source: GoogleSheets2DataSource) -> pd.DataFrame: + def _retrieve_data(self, data_source: GoogleSheets2DataSource, **kwargs) -> pd.DataFrame: """ Point of entry for data retrieval in the connector Requires: - Datasource + - Secrets """ - if not self.secrets: - raise Exception('No credentials') - - access_token = self.secrets['access_token'] + try: + secrets = kwargs.get('secrets')(self.auth_flow_id) + access_token = secrets['access_token'] + except Exception: + raise NoCredentialsError('No credentials') if data_source.sheet is None: # Get spreadsheet informations and retrieve all the available sheets # https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/get - data = self._run_fetch(f'{self.baseroute}{data_source.spreadsheet_id}', access_token) + data = self._run_fetch(f'{self._baseroute}{data_source.spreadsheet_id}', access_token) available_sheets = [str(x['properties']['title']) for x in data['sheets']] data_source.sheet = available_sheets[0] # https://developers.google.com/sheets/api/samples/reading read_sheet_endpoint = f'{data_source.spreadsheet_id}/values/{data_source.sheet}' - full_url = f'{self.baseroute}{read_sheet_endpoint}' + full_url = f'{self._baseroute}{read_sheet_endpoint}' data = self._run_fetch(full_url, access_token)['values'] df = pd.DataFrame(data) @@ -126,16 +126,18 @@ def _retrieve_data(self, data_source: GoogleSheets2DataSource) -> pd.DataFrame: return df - def get_status(self) -> ConnectorStatus: + def get_status(self, **kwargs) -> ConnectorStatus: """ Test the Google Sheets connexion. If successful, returns a message with the email of the connected user account. """ - if not self.secrets or 'access_token' not in self.secrets: + try: + secrets = kwargs.get('secrets')(self.auth_flow_id) + access_token = secrets['access_token'] + except Exception: return ConnectorStatus(status=False, error='Credentials are missing') - access_token = self.secrets['access_token'] try: user_info = self._run_fetch( 'https://www.googleapis.com/oauth2/v2/userinfo?alt=json', access_token diff --git a/toucan_connectors/toucan_connector.py b/toucan_connectors/toucan_connector.py index 028ca48b8..211bfbf53 100644 --- a/toucan_connectors/toucan_connector.py +++ b/toucan_connectors/toucan_connector.py @@ -204,8 +204,6 @@ def __init_subclass__(cls): raise TypeError(f'{cls.__name__} has no {e} attribute.') if 'bearer_integration' in cls.__fields__: cls.bearer_integration = cls.__fields__['bearer_integration'].default - if 'auth_flow' in cls.__fields__: - cls.auth_flow = cls.__fields__['auth_flow'].default def bearer_oauth_get_endpoint( self, @@ -227,7 +225,7 @@ def retry_decorator(self): return RetryPolicy(**kwargs) @abstractmethod - def _retrieve_data(self, data_source: ToucanDataSource): + def _retrieve_data(self, data_source: ToucanDataSource, **kwargs): """Main method to retrieve a pandas dataframe""" @decorate_func_with_retry @@ -235,12 +233,19 @@ def get_df( self, data_source: ToucanDataSource, permissions: Optional[dict] = None, + **kwargs, ) -> pd.DataFrame: """ Method to retrieve the data as a pandas dataframe filtered by permissions """ - res = self._retrieve_data(data_source) + # This conditional prevents passing secrets in kwargs to connectors that can't use them + # inspect module's signature object can't be used here in Python 3.6 because it is not + # generated the same way as in 3.7 and 3.8; this can be eventually replaced + if hasattr(self, '_auth_flow'): + res = self._retrieve_data(data_source, **kwargs) + else: + res = self._retrieve_data(data_source) if permissions is not None: permissions_query = PandasConditionTranslator.translate(permissions) @@ -254,6 +259,7 @@ def get_slice( permissions: Optional[dict] = None, offset: int = 0, limit: Optional[int] = None, + **kwargs, ) -> DataSlice: """ Method to retrieve a part of the data as a pandas dataframe @@ -263,7 +269,7 @@ def get_slice( - limit is the number of rows to retrieve Exemple: if offset = 5 and limit = 10 then 10 results are expected from 6th row """ - df = self.get_df(data_source, permissions) + df = self.get_df(data_source, permissions, **kwargs) if limit is not None: return DataSlice(df[offset : offset + limit], len(df)) else: @@ -306,3 +312,8 @@ def get_status(self) -> ConnectorStatus: } """ return ConnectorStatus() + + def set_get_secrets(self, get_secrets): + print(self) + if self.auth_flow_id: + self._get_secrets = get_secrets(self.auth_flow_id)