From 1656630522446aa579412eec636ddd9e5b8b21b9 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Tue, 17 Oct 2023 11:45:52 +0200 Subject: [PATCH 01/82] Change collections to collections.abc --- polytope_server/frontend/common/flask_decorators.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/polytope_server/frontend/common/flask_decorators.py b/polytope_server/frontend/common/flask_decorators.py index 2a5280e..f94d68f 100644 --- a/polytope_server/frontend/common/flask_decorators.py +++ b/polytope_server/frontend/common/flask_decorators.py @@ -18,7 +18,7 @@ # does it submit to any jurisdiction. # -import collections +import collections.abc import json from flask import Response @@ -31,13 +31,13 @@ def RequestSucceeded(response): - if not isinstance(response, collections.Mapping): + if not isinstance(response, collections.abc.Mapping): response = {"message": response} return Response(response=json.dumps(response), status=200, mimetype="application/json") def RequestAccepted(response): - if not isinstance(response, collections.Mapping): + if not isinstance(response, collections.abc.Mapping): response = {"message": response} if response["message"] == "": response["message"] = "Request {}".format(response["status"]) From 76239174f0a06aa6a5d56db1ca15a2fa6530d7c9 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Wed, 18 Oct 2023 10:34:46 +0200 Subject: [PATCH 02/82] Feature/s3 secure (#2) Add config option staging.s3.secure to support secure communication with S3. --------- Co-authored-by: Christian Kanesan --- polytope_server/common/staging/s3_staging.py | 5 +-- tests/unit/test_s3_staging.py | 37 ++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 tests/unit/test_s3_staging.py diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index bac873c..c2234e4 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -47,15 +47,16 @@ def __init__(self, config): access_key = config.get("access_key", "") secret_key = config.get("secret_key", "") self.bucket = config.get("bucket", "default") + secure = config.get("secure", False) == True self.url = config.get("url", None) internal_url = "{}:{}".format(self.host, self.port) self.client = Minio( internal_url, access_key=access_key, secret_key=secret_key, - secure=False, + secure=secure, ) - self.internal_url = "http://" + internal_url + self.internal_url = ("https://" if secure else "http://") + internal_url try: self.client.make_bucket(self.bucket) diff --git a/tests/unit/test_s3_staging.py b/tests/unit/test_s3_staging.py new file mode 100644 index 0000000..7bd7d4c --- /dev/null +++ b/tests/unit/test_s3_staging.py @@ -0,0 +1,37 @@ +from unittest import mock +from polytope_server.common.staging.s3_staging import S3Staging + + +@mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) +def test_s3_staging_secure_false(mock_minio: mock.Mock): + s3Staging = S3Staging(config={"secure": False}) + + verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) + + +@mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) +def test_s3_staging_secure_any_value_false(mock_minio: mock.Mock): + s3Staging = S3Staging(config={"secure": "sdafsdfs"}) + + verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) + + +@mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) +def test_s3_staging_secure_default(mock_minio: mock.Mock): + s3Staging = S3Staging(config={}) + + verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) + + +@mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) +def test_s3_staging_secure_true(mock_minio: mock.Mock): + s3Staging = S3Staging(config={"secure": True}) + + verify_secure_flag_and_internal_url(mock_minio, s3Staging, True) + + +def verify_secure_flag_and_internal_url(mock_minio: mock.Mock, s3Staging: S3Staging, secure: bool): + mock_minio.assert_called_once() + _, kwargs = mock_minio.call_args + assert kwargs["secure"] == secure + assert s3Staging.get_internal_url("test").startswith("https" if secure else "http") From 9740fdd92e2baec96caa0cf2ea0946c348845f12 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Mon, 30 Oct 2023 11:11:06 +0100 Subject: [PATCH 03/82] Allow list access --- polytope_server/common/staging/s3_staging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index c2234e4..026396c 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -195,7 +195,7 @@ def get_type(self): def bucket_policy(self): """ Grants read access to individual objects - user has access to all objects, but would need to know the UUID. - Denies read access to the bucket (cannot list objects) - important, so users cannot see all UUIDs! + Grants read access to the bucket (was originally denieds). Denies read access to the bucket location (quite meaningless for MinIO) """ policy = { @@ -210,7 +210,7 @@ def bucket_policy(self): }, { "Sid": "", - "Effect": "Deny", + "Effect": "Allow", "Principal": {"AWS": "*"}, "Action": "s3:ListBucket", "Resource": "arn:aws:s3:::{}".format(self.bucket), From ab1222d5ca2e92f2d0780c9ff82b7b3bed2efbad Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 31 Oct 2023 09:19:34 +0100 Subject: [PATCH 04/82] Allow GetBucketLocation --- polytope_server/common/staging/s3_staging.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index 026396c..034cb07 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -215,6 +215,13 @@ def bucket_policy(self): "Action": "s3:ListBucket", "Resource": "arn:aws:s3:::{}".format(self.bucket), }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:GetBucketLocation", + "Resource": "arn:aws:s3:::{}".format(self.bucket), + }, { "Sid": "", "Effect": "Allow", From 1a1f2bd451e6d5b57c42bce9df88e8d08852188a Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 31 Oct 2023 09:36:22 +0100 Subject: [PATCH 05/82] Allow GetBucketLocation --- polytope_server/common/staging/s3_staging.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index 034cb07..fc52d8a 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -203,7 +203,7 @@ def bucket_policy(self): "Statement": [ { "Sid": "", - "Effect": "Deny", + "Effect": "Allow", "Principal": {"AWS": "*"}, "Action": "s3:GetBucketLocation", "Resource": "arn:aws:s3:::{}".format(self.bucket), @@ -215,13 +215,6 @@ def bucket_policy(self): "Action": "s3:ListBucket", "Resource": "arn:aws:s3:::{}".format(self.bucket), }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:GetBucketLocation", - "Resource": "arn:aws:s3:::{}".format(self.bucket), - }, { "Sid": "", "Effect": "Allow", From 12ebc6a8b980a5580a8a3668a6c073586299fff6 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 31 Oct 2023 15:19:53 +0100 Subject: [PATCH 06/82] Update comment --- polytope_server/common/staging/s3_staging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index fc52d8a..02b96cb 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -196,7 +196,7 @@ def bucket_policy(self): """ Grants read access to individual objects - user has access to all objects, but would need to know the UUID. Grants read access to the bucket (was originally denieds). - Denies read access to the bucket location (quite meaningless for MinIO) + Grants read access to the bucket location """ policy = { "Version": "2012-10-17", From a0bcdf82e07d351be8e9ee47630d62b0df5680f5 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 7 Nov 2023 16:36:54 +0100 Subject: [PATCH 07/82] Add region and rollback bucket policies --- polytope_server/common/staging/s3_staging.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index 02b96cb..2a72f00 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -59,7 +59,7 @@ def __init__(self, config): self.internal_url = ("https://" if secure else "http://") + internal_url try: - self.client.make_bucket(self.bucket) + self.client.make_bucket(self.bucket, self.client._region) except BucketAlreadyOwnedByYou: pass @@ -195,22 +195,22 @@ def get_type(self): def bucket_policy(self): """ Grants read access to individual objects - user has access to all objects, but would need to know the UUID. - Grants read access to the bucket (was originally denieds). - Grants read access to the bucket location + Denies read access to the bucket (cannot list objects) - important, so users cannot see all UUIDs! + Denies read access to the bucket location (quite meaningless for MinIO) """ policy = { "Version": "2012-10-17", "Statement": [ { "Sid": "", - "Effect": "Allow", + "Effect": "Deny", "Principal": {"AWS": "*"}, "Action": "s3:GetBucketLocation", "Resource": "arn:aws:s3:::{}".format(self.bucket), }, { "Sid": "", - "Effect": "Allow", + "Effect": "Deny", "Principal": {"AWS": "*"}, "Action": "s3:ListBucket", "Resource": "arn:aws:s3:::{}".format(self.bucket), From 32421383d8d6140c671d8f42c3ad5ef477267723 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Thu, 9 Nov 2023 12:04:23 +0100 Subject: [PATCH 08/82] Add SQS queue --- polytope_server/common/queue/queue.py | 2 +- polytope_server/common/queue/sqs_queue.py | 99 +++++++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 polytope_server/common/queue/sqs_queue.py diff --git a/polytope_server/common/queue/queue.py b/polytope_server/common/queue/queue.py index 8e8ed85..9cb1e22 100644 --- a/polytope_server/common/queue/queue.py +++ b/polytope_server/common/queue/queue.py @@ -80,7 +80,7 @@ def collect_metric_info( """Collect dictionary of metrics""" -queue_dict = {"rabbitmq": "RabbitmqQueue"} +queue_dict = {"rabbitmq": "RabbitmqQueue", "sqs": "SQSQueue"} def create_queue(queue_config): diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py new file mode 100644 index 0000000..fba3a24 --- /dev/null +++ b/polytope_server/common/queue/sqs_queue.py @@ -0,0 +1,99 @@ +import json +import logging +from . import queue +import boto3 + +class SQSQueue(queue.Queue): + def __init__(self, config): + + host = config.get("host", "localhost") + queue_name = config.get("name", "default") + self.username = config.get("user", "guest") + self.password = config.get("password", "guest") + self.keep_alive_interval = config.get("keep_alive_interval", 60) + self.queue_url = "{}/{}".format(host, queue_name) + + logging.getLogger("sqs").setLevel("WARNING") + + self.client = boto3.client('sqs') + self.check_connection() + + def enqueue(self, message): + self.client.send_message( + QueueUrl=self.queue_url, + MessageBody=json.dumps(message.body).encode("utf-8") + ) + + def dequeue(self): + response = self.client.receive_message( + QueueUrl=self.queue_url, + VisibilityTimeout=120, #If processing takes more seconds, message will be read twice + MaxNumberOfMessages=1, + ) + if not response['Messages']: + return None + if len(response['Messages']) > 1: + raise ValueError("Received {} messages, should have received 1".format(len(response['Messages']))) + + body = response['Messages'][0]['Body'] + receipt_handle = response['Messages'][0]['ReceiptHandle'] + + return queue.Message(json.loads(body.decode("utf-8")), context=receipt_handle) + + + def ack(self, message): + self.client.delete_message( + QueueUrl=self.queue_url, + ReceiptHandle=message.context + ) + + def nack(self, message): + self.client.change_message_visibility( + QueueUrl=self.queue_url, + ReceiptHandle=message.context, + VisibilityTimeout=0 + ) + + def keep_alive(self): + return self.check_connection() + + def check_connection(self): + """Check the queue connection""" + response = self.client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=['CreatedTimestamp'] + ) + #Tries to parse response + return 'Attributes' in response & 'CreatedTimestamp' in response['Attributes'] + + def close_connection(self): + self.client.close() + + def count(self): + response = self.client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=[ + 'ApproximateNumberOfMessages' + ] + ) + num_messages = response['Attributes']['ApproximateNumberOfMessages'] + + return int(num_messages) + + def get_type(self): + return "sqs" + + def collect_metric_info(self): + response = self.client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=[ + 'ApproximateNumberOfMessages', + 'ApproximateNumberOfMessages', + 'ApproximateNumberOfMessagesNotVisible', + ] + ) + return response['Attributes'] + + + + From 5c0c00470cf5e1230568c475084968a58d487ff8 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Thu, 9 Nov 2023 13:19:57 +0100 Subject: [PATCH 09/82] Add visibility_timeout to config --- polytope_server/common/queue/sqs_queue.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index fba3a24..e3466e7 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -11,6 +11,7 @@ def __init__(self, config): self.username = config.get("user", "guest") self.password = config.get("password", "guest") self.keep_alive_interval = config.get("keep_alive_interval", 60) + self.visibility_timeout = config.get("visibility_timeout", 120) self.queue_url = "{}/{}".format(host, queue_name) logging.getLogger("sqs").setLevel("WARNING") @@ -27,7 +28,7 @@ def enqueue(self, message): def dequeue(self): response = self.client.receive_message( QueueUrl=self.queue_url, - VisibilityTimeout=120, #If processing takes more seconds, message will be read twice + VisibilityTimeout=self.visibility_timeout, #If processing takes more seconds, message will be read twice MaxNumberOfMessages=1, ) if not response['Messages']: @@ -88,7 +89,7 @@ def collect_metric_info(self): QueueUrl=self.queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', - 'ApproximateNumberOfMessages', + 'ApproximateNumberOfMessagesDelayed', 'ApproximateNumberOfMessagesNotVisible', ] ) From 28cb2d94f1cc1ac2cc14007c7f356898f57cca36 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Thu, 9 Nov 2023 15:57:16 +0100 Subject: [PATCH 10/82] Add boto3 credentials --- polytope_server/common/queue/sqs_queue.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index e3466e7..e9037f2 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -2,21 +2,25 @@ import logging from . import queue import boto3 +import os class SQSQueue(queue.Queue): def __init__(self, config): host = config.get("host", "localhost") queue_name = config.get("name", "default") - self.username = config.get("user", "guest") - self.password = config.get("password", "guest") + region = config.get("region", "eu-central-2") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) self.queue_url = "{}/{}".format(host, queue_name) logging.getLogger("sqs").setLevel("WARNING") - - self.client = boto3.client('sqs') + session = boto3.Session( + aws_access_key_id=os.getenv('POLYTOPE_S3_ACCESS_KEY'), + aws_secret_access_key=os.getenv('POLYTOPE_S3_SECRET_KEY'), + region_name=region + ) + self.client = session.client('sqs') self.check_connection() def enqueue(self, message): From fb04abfe1ed0144befb5d330d490e8b27533a89e Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Fri, 10 Nov 2023 09:22:11 +0100 Subject: [PATCH 11/82] Add boto3 to reqs --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e461a9d..c496d3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,4 +29,5 @@ deepmerge==0.1.0 flask-swagger-ui==3.25.0 ldap3==2.7 docker==4.2.0 -python-keycloak==0.24.0 \ No newline at end of file +python-keycloak==0.24.0 +boto3==1.28.80 From d27f60b69a6140396694e04f9640e2ab85fa5d6a Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Fri, 10 Nov 2023 11:10:28 +0100 Subject: [PATCH 12/82] Update logic operator --- polytope_server/common/queue/sqs_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index e9037f2..b22138f 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -69,7 +69,7 @@ def check_connection(self): AttributeNames=['CreatedTimestamp'] ) #Tries to parse response - return 'Attributes' in response & 'CreatedTimestamp' in response['Attributes'] + return 'Attributes' in response and 'CreatedTimestamp' in response['Attributes'] def close_connection(self): self.client.close() From 1eb7c4c4e48e282f0c5449119fde2ad6caff3ffc Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Fri, 10 Nov 2023 16:22:37 +0100 Subject: [PATCH 13/82] Remove enqueue encoding --- polytope_server/common/queue/sqs_queue.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index b22138f..ace49e5 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -26,7 +26,7 @@ def __init__(self, config): def enqueue(self, message): self.client.send_message( QueueUrl=self.queue_url, - MessageBody=json.dumps(message.body).encode("utf-8") + MessageBody=json.dumps(message.body) ) def dequeue(self): @@ -60,10 +60,11 @@ def nack(self, message): ) def keep_alive(self): - return self.check_connection() + #Implemented for compatibility, disabled because each request to SQS is billed + pass + # return self.check_connection() def check_connection(self): - """Check the queue connection""" response = self.client.get_queue_attributes( QueueUrl=self.queue_url, AttributeNames=['CreatedTimestamp'] From c668eba2b638f495f34c532b54db1d1b3912e50e Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Mon, 13 Nov 2023 09:42:18 +0100 Subject: [PATCH 14/82] Update dequeue func --- polytope_server/common/queue/sqs_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index ace49e5..c9ea2fd 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -43,7 +43,7 @@ def dequeue(self): body = response['Messages'][0]['Body'] receipt_handle = response['Messages'][0]['ReceiptHandle'] - return queue.Message(json.loads(body.decode("utf-8")), context=receipt_handle) + return queue.Message(json.loads(body), context=receipt_handle) def ack(self, message): From a16c79d29cb5699ec6b4d2c3e9b86be1af6b8e34 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Mon, 13 Nov 2023 15:24:26 +0100 Subject: [PATCH 15/82] Format --- polytope_server/common/queue/sqs_queue.py | 77 +++++++++-------------- 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index c9ea2fd..9e01ac2 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -4,86 +4,71 @@ import boto3 import os + class SQSQueue(queue.Queue): def __init__(self, config): - host = config.get("host", "localhost") queue_name = config.get("name", "default") region = config.get("region", "eu-central-2") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) self.queue_url = "{}/{}".format(host, queue_name) - + logging.getLogger("sqs").setLevel("WARNING") session = boto3.Session( - aws_access_key_id=os.getenv('POLYTOPE_S3_ACCESS_KEY'), - aws_secret_access_key=os.getenv('POLYTOPE_S3_SECRET_KEY'), - region_name=region + aws_access_key_id=os.getenv("POLYTOPE_S3_ACCESS_KEY"), + aws_secret_access_key=os.getenv("POLYTOPE_S3_SECRET_KEY"), + region_name=region, ) - self.client = session.client('sqs') + self.client = session.client("sqs") self.check_connection() def enqueue(self, message): - self.client.send_message( - QueueUrl=self.queue_url, - MessageBody=json.dumps(message.body) - ) + self.client.send_message(QueueUrl=self.queue_url, MessageBody=json.dumps(message.body)) def dequeue(self): response = self.client.receive_message( QueueUrl=self.queue_url, - VisibilityTimeout=self.visibility_timeout, #If processing takes more seconds, message will be read twice + VisibilityTimeout=self.visibility_timeout, # If processing takes more seconds, message will be read twice MaxNumberOfMessages=1, - ) - if not response['Messages']: + ) + if not response["Messages"]: return None - if len(response['Messages']) > 1: - raise ValueError("Received {} messages, should have received 1".format(len(response['Messages']))) + if len(response["Messages"]) > 1: + raise ValueError("Received {} messages, should have received 1".format(len(response["Messages"]))) - body = response['Messages'][0]['Body'] - receipt_handle = response['Messages'][0]['ReceiptHandle'] - - return queue.Message(json.loads(body), context=receipt_handle) + body = response["Messages"][0]["Body"] + receipt_handle = response["Messages"][0]["ReceiptHandle"] + return queue.Message(json.loads(body), context=receipt_handle) def ack(self, message): - self.client.delete_message( - QueueUrl=self.queue_url, - ReceiptHandle=message.context - ) + self.client.delete_message(QueueUrl=self.queue_url, ReceiptHandle=message.context) def nack(self, message): self.client.change_message_visibility( - QueueUrl=self.queue_url, - ReceiptHandle=message.context, - VisibilityTimeout=0 + QueueUrl=self.queue_url, ReceiptHandle=message.context, VisibilityTimeout=0 ) def keep_alive(self): - #Implemented for compatibility, disabled because each request to SQS is billed + # Implemented for compatibility, disabled because each request to SQS is billed pass # return self.check_connection() def check_connection(self): - response = self.client.get_queue_attributes( - QueueUrl=self.queue_url, - AttributeNames=['CreatedTimestamp'] - ) - #Tries to parse response - return 'Attributes' in response and 'CreatedTimestamp' in response['Attributes'] + response = self.client.get_queue_attributes(QueueUrl=self.queue_url, AttributeNames=["CreatedTimestamp"]) + # Tries to parse response + return "Attributes" in response and "CreatedTimestamp" in response["Attributes"] def close_connection(self): self.client.close() def count(self): response = self.client.get_queue_attributes( - QueueUrl=self.queue_url, - AttributeNames=[ - 'ApproximateNumberOfMessages' - ] + QueueUrl=self.queue_url, AttributeNames=["ApproximateNumberOfMessages"] ) - num_messages = response['Attributes']['ApproximateNumberOfMessages'] - + num_messages = response["Attributes"]["ApproximateNumberOfMessages"] + return int(num_messages) def get_type(self): @@ -93,13 +78,9 @@ def collect_metric_info(self): response = self.client.get_queue_attributes( QueueUrl=self.queue_url, AttributeNames=[ - 'ApproximateNumberOfMessages', - 'ApproximateNumberOfMessagesDelayed', - 'ApproximateNumberOfMessagesNotVisible', - ] + "ApproximateNumberOfMessages", + "ApproximateNumberOfMessagesDelayed", + "ApproximateNumberOfMessagesNotVisible", + ], ) - return response['Attributes'] - - - - + return response["Attributes"] From 91ac6201748ab5d9d8a1b8a444699fd30bb6edf9 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Mon, 13 Nov 2023 15:24:48 +0100 Subject: [PATCH 16/82] Remove message number check --- polytope_server/common/queue/sqs_queue.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 9e01ac2..145698c 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -34,8 +34,6 @@ def dequeue(self): ) if not response["Messages"]: return None - if len(response["Messages"]) > 1: - raise ValueError("Received {} messages, should have received 1".format(len(response["Messages"]))) body = response["Messages"][0]["Body"] receipt_handle = response["Messages"][0]["ReceiptHandle"] From 4e226d901b4912fc62550ad0e618b0d84e58f19c Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 09:28:15 +0100 Subject: [PATCH 17/82] Update metric collection --- .../metric_collector/queue_metric_collector.py | 12 ++++++++++++ polytope_server/common/queue/sqs_queue.py | 13 ++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/polytope_server/common/metric_collector/queue_metric_collector.py b/polytope_server/common/metric_collector/queue_metric_collector.py index 828a774..38ae5c5 100644 --- a/polytope_server/common/metric_collector/queue_metric_collector.py +++ b/polytope_server/common/metric_collector/queue_metric_collector.py @@ -46,3 +46,15 @@ def total_queued(self): channel = connection.channel() q = channel.queue_declare(queue=self.queue_name, durable=True, passive=True) return q.method.message_count + + +class SQSQueueMetricCollector(QueueMetricCollector): + def __init__(self, host): + self.host = host + self.message_counts = None + + def total_queued(self): + num_messages = 0 + for key in self.message_counts: + num_messages += self.message_counts[key] + return num_messages diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 145698c..b43ebaf 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -3,12 +3,13 @@ from . import queue import boto3 import os +from ..metric_collector import SQSQueueMetricCollector class SQSQueue(queue.Queue): def __init__(self, config): - host = config.get("host", "localhost") - queue_name = config.get("name", "default") + host = config.get("host") + queue_name = config.get("name") region = config.get("region", "eu-central-2") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) @@ -22,6 +23,7 @@ def __init__(self, config): ) self.client = session.client("sqs") self.check_connection() + self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) def enqueue(self, message): self.client.send_message(QueueUrl=self.queue_url, MessageBody=json.dumps(message.body)) @@ -35,6 +37,10 @@ def dequeue(self): if not response["Messages"]: return None + for item in response["Messages"][1:]: + self.client.change_message_visibility( + QueueUrl=self.queue_url, ReceiptHandle=item["ReceiptHandle"], VisibilityTimeout=0 + ) body = response["Messages"][0]["Body"] receipt_handle = response["Messages"][0]["ReceiptHandle"] @@ -81,4 +87,5 @@ def collect_metric_info(self): "ApproximateNumberOfMessagesNotVisible", ], ) - return response["Attributes"] + self.queue_metric_collector.message_counts = response["Attributes"] + return self.queue_metric_collector.collect().serialize() From 02fa16fef1cb01663f8c5fc93ab8deb47087e72f Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 10:15:52 +0100 Subject: [PATCH 18/82] Update region and credentials --- polytope_server/common/queue/sqs_queue.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index b43ebaf..af87a76 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -3,6 +3,7 @@ from . import queue import boto3 import os +import re from ..metric_collector import SQSQueueMetricCollector @@ -10,18 +11,15 @@ class SQSQueue(queue.Queue): def __init__(self, config): host = config.get("host") queue_name = config.get("name") - region = config.get("region", "eu-central-2") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) + + region = self.__parse_region(host) self.queue_url = "{}/{}".format(host, queue_name) logging.getLogger("sqs").setLevel("WARNING") - session = boto3.Session( - aws_access_key_id=os.getenv("POLYTOPE_S3_ACCESS_KEY"), - aws_secret_access_key=os.getenv("POLYTOPE_S3_SECRET_KEY"), - region_name=region, - ) - self.client = session.client("sqs") + + self.client = boto3.client("sqs", region_name=region) self.check_connection() self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) @@ -89,3 +87,7 @@ def collect_metric_info(self): ) self.queue_metric_collector.message_counts = response["Attributes"] return self.queue_metric_collector.collect().serialize() + + def __parse_region(self, host): + pattern = "https:\/\/sqs.(.*).amazonaws.com\/\d+" + return re.findall(pattern, host)[0] From c74521ffcc95db43ecb21b3e0a151dd412c7a0d6 Mon Sep 17 00:00:00 2001 From: Milos Belic <119611649+milosbeliczuhlke@users.noreply.github.com> Date: Tue, 14 Nov 2023 11:07:46 +0100 Subject: [PATCH 19/82] Update polytope_server/common/metric_collector/queue_metric_collector.py Co-authored-by: Christian Kanesan --- .../common/metric_collector/queue_metric_collector.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/polytope_server/common/metric_collector/queue_metric_collector.py b/polytope_server/common/metric_collector/queue_metric_collector.py index 38ae5c5..0c48650 100644 --- a/polytope_server/common/metric_collector/queue_metric_collector.py +++ b/polytope_server/common/metric_collector/queue_metric_collector.py @@ -54,7 +54,4 @@ def __init__(self, host): self.message_counts = None def total_queued(self): - num_messages = 0 - for key in self.message_counts: - num_messages += self.message_counts[key] - return num_messages + return sum(self.message_counts.values()) From 47dcf7395dfb9765fe1769f06e799a8625ca06da Mon Sep 17 00:00:00 2001 From: Milos Belic <119611649+milosbeliczuhlke@users.noreply.github.com> Date: Tue, 14 Nov 2023 11:09:38 +0100 Subject: [PATCH 20/82] Update polytope_server/common/queue/sqs_queue.py Co-authored-by: Christian Kanesan --- polytope_server/common/queue/sqs_queue.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index af87a76..893e17b 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -35,12 +35,13 @@ def dequeue(self): if not response["Messages"]: return None - for item in response["Messages"][1:]: + msg, *remainder = response["Messages"] + for item in remainder: self.client.change_message_visibility( QueueUrl=self.queue_url, ReceiptHandle=item["ReceiptHandle"], VisibilityTimeout=0 ) - body = response["Messages"][0]["Body"] - receipt_handle = response["Messages"][0]["ReceiptHandle"] + body = msg["Body"] + receipt_handle = msg["ReceiptHandle"] return queue.Message(json.loads(body), context=receipt_handle) From 926bc52ac88e953e8789a7e05c2853449e89d6a9 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 11:15:22 +0100 Subject: [PATCH 21/82] Add get_queue_url func --- polytope_server/common/queue/sqs_queue.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index af87a76..b6875e6 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -2,24 +2,20 @@ import logging from . import queue import boto3 -import os -import re from ..metric_collector import SQSQueueMetricCollector class SQSQueue(queue.Queue): def __init__(self, config): - host = config.get("host") - queue_name = config.get("name") + queue_name = config.get("queue_name") + region = config.get("region") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) - region = self.__parse_region(host) - self.queue_url = "{}/{}".format(host, queue_name) - logging.getLogger("sqs").setLevel("WARNING") self.client = boto3.client("sqs", region_name=region) + self.queue_url = self.client.get_queue_url(QueueName=queue_name).get("QueueUrl") self.check_connection() self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) @@ -87,7 +83,3 @@ def collect_metric_info(self): ) self.queue_metric_collector.message_counts = response["Attributes"] return self.queue_metric_collector.collect().serialize() - - def __parse_region(self, host): - pattern = "https:\/\/sqs.(.*).amazonaws.com\/\d+" - return re.findall(pattern, host)[0] From 86b51dca69f3ef72e451efac46b314b49543c516 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 11:40:38 +0100 Subject: [PATCH 22/82] Add message_group_id --- polytope_server/common/queue/sqs_queue.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 721653f..9f396a9 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -11,7 +11,7 @@ def __init__(self, config): region = config.get("region") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) - + self.message_group_id = config.get("message_group_id", "polytope") logging.getLogger("sqs").setLevel("WARNING") self.client = boto3.client("sqs", region_name=region) @@ -20,7 +20,9 @@ def __init__(self, config): self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) def enqueue(self, message): - self.client.send_message(QueueUrl=self.queue_url, MessageBody=json.dumps(message.body)) + self.client.send_message( + QueueUrl=self.queue_url, MessageBody=json.dumps(message.body), MessageGroupId=self.message_group_id + ) def dequeue(self): response = self.client.receive_message( From 6ffe330d210438590d451aac2e25c76fe4d74d12 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Tue, 14 Nov 2023 12:50:23 +0100 Subject: [PATCH 23/82] Add support for authenticaiton to mongodb --- .../mongoapikey_authentication.py | 9 ++-- .../authentication/mongodb_authentication.py | 9 ++-- .../authorization/mongodb_authorization.py | 10 ++-- polytope_server/common/caching/caching.py | 10 ++-- .../common/identity/mongodb_identity.py | 9 ++-- .../keygenerator/mongodb_keygenerator.py | 11 +++-- .../metric_store/mongodb_metric_store.py | 11 +++-- .../common/mongo_client_factory.py | 22 +++++++++ .../request_store/mongodb_request_store.py | 12 +++-- pyproject.toml | 5 +- tests/unit/test_mongo_client_factory.py | 48 +++++++++++++++++++ tests/unit/test_s3_staging.py | 1 + 12 files changed, 128 insertions(+), 29 deletions(-) create mode 100644 polytope_server/common/mongo_client_factory.py create mode 100644 tests/unit/test_mongo_client_factory.py diff --git a/polytope_server/common/authentication/mongoapikey_authentication.py b/polytope_server/common/authentication/mongoapikey_authentication.py index c5bf439..c2fbd77 100644 --- a/polytope_server/common/authentication/mongoapikey_authentication.py +++ b/polytope_server/common/authentication/mongoapikey_authentication.py @@ -20,8 +20,7 @@ from datetime import datetime -import pymongo - +from .. import mongo_client_factory from ..auth import User from ..exceptions import ForbiddenRequest from ..metric_collector import MongoStorageMetricCollector @@ -45,9 +44,13 @@ def __init__(self, name, realm, config): host = config.get("host", "localhost") port = config.get("port", "27017") collection = config.get("collection", "keys") + username = config.get("username") + password = config.get("password") + tls = config.get("tls", False) == True + tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) - self.mongo_client = pymongo.MongoClient(endpoint, journal=True, connect=False) + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) self.database = self.mongo_client.keys self.keys = self.database[collection] assert realm == "polytope" diff --git a/polytope_server/common/authentication/mongodb_authentication.py b/polytope_server/common/authentication/mongodb_authentication.py index ff1f579..0fb3d93 100644 --- a/polytope_server/common/authentication/mongodb_authentication.py +++ b/polytope_server/common/authentication/mongodb_authentication.py @@ -22,8 +22,7 @@ import binascii import hashlib -import pymongo - +from .. import mongo_client_factory from ..auth import User from ..exceptions import ForbiddenRequest from ..metric_collector import MongoStorageMetricCollector @@ -37,9 +36,13 @@ def __init__(self, name, realm, config): host = config.get("host", "localhost") port = config.get("port", "27017") collection = config.get("collection", "users") + username = config.get("username") + password = config.get("password") + tls = config.get("tls", False) == True + tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) - self.mongo_client = pymongo.MongoClient(endpoint, journal=True, connect=False) + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) self.database = self.mongo_client.authentication self.users = self.database[collection] diff --git a/polytope_server/common/authorization/mongodb_authorization.py b/polytope_server/common/authorization/mongodb_authorization.py index 59fe716..280d9af 100644 --- a/polytope_server/common/authorization/mongodb_authorization.py +++ b/polytope_server/common/authorization/mongodb_authorization.py @@ -18,8 +18,7 @@ # does it submit to any jurisdiction. # -import pymongo - +from .. import mongo_client_factory from ..auth import User from ..metric_collector import MongoStorageMetricCollector from . import authorization @@ -32,9 +31,13 @@ def __init__(self, name, realm, config): self.host = config.get("host", "localhost") self.port = config.get("port", "27017") self.collection = config.get("collection", "users") + username = config.get("username") + password = config.get("password") + tls = config.get("tls", False) == True + tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(self.host, self.port) - self.mongo_client = pymongo.MongoClient(endpoint, journal=True, connect=False) + self.mongo_client = mongo_client_factory.create_client(self.host, self.port, username, password, tls, tlsCAFile) self.database = self.mongo_client.authentication self.users = self.database[self.collection] @@ -45,7 +48,6 @@ def __init__(self, name, realm, config): super().__init__(name, realm, config) def get_roles(self, user: User) -> list: - if user.realm != self.realm(): raise ValueError( "Trying to authorize a user in the wrong realm, expected {}, got {}".format(self.realm(), user.realm) diff --git a/polytope_server/common/caching/caching.py b/polytope_server/common/caching/caching.py index 652ba07..4a0d976 100644 --- a/polytope_server/common/caching/caching.py +++ b/polytope_server/common/caching/caching.py @@ -29,9 +29,9 @@ from typing import Dict, Union import pymemcache -import pymongo import redis +from .. import mongo_client_factory from ..metric import MetricType from ..metric_collector import ( DictStorageMetricCollector, @@ -197,9 +197,13 @@ def __init__(self, cache_config): super().__init__(cache_config) host = cache_config.get("host", "localhost") port = cache_config.get("port", 27017) + username = cache_config.get("username") + password = cache_config.get("password") + tls = cache_config.get("tls", False) == True + tlsCAFile = cache_config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) collection = cache_config.get("collection", "cache") - self.client = pymongo.MongoClient(host + ":" + str(port), journal=False, connect=False) + self.client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) self.database = self.client.cache self.collection = self.database[collection] self.collection.create_index("expire_at", expireAfterSeconds=0) @@ -220,7 +224,6 @@ def get(self, key): return obj["data"] def set(self, key, object, lifetime): - if lifetime == 0 or lifetime is None: expiry = datetime.datetime.max else: @@ -324,7 +327,6 @@ def __call__(self, f): @functools.wraps(f) def wrapper(*args, **kwargs): - cache.cancelled = False if self.cache is None: diff --git a/polytope_server/common/identity/mongodb_identity.py b/polytope_server/common/identity/mongodb_identity.py index d6c68be..89cee2f 100644 --- a/polytope_server/common/identity/mongodb_identity.py +++ b/polytope_server/common/identity/mongodb_identity.py @@ -18,8 +18,7 @@ # does it submit to any jurisdiction. # -import pymongo - +from .. import mongo_client_factory from ..authentication.mongodb_authentication import MongoAuthentication from ..exceptions import Conflict, NotFound from ..metric_collector import MetricCollector, MongoStorageMetricCollector @@ -32,9 +31,13 @@ def __init__(self, config): self.host = config.get("host", "localhost") self.port = config.get("port", "27017") self.collection = config.get("collection", "users") + username = config.get("username") + password = config.get("password") + tls = config.get("tls", False) == True + tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(self.host, self.port) - self.mongo_client = pymongo.MongoClient(endpoint, journal=True, connect=False) + self.mongo_client = mongo_client_factory.create_client(self.host, self.port, username, password, tls, tlsCAFile) self.database = self.mongo_client.authentication self.users = self.database[self.collection] self.realm = config.get("realm") diff --git a/polytope_server/common/keygenerator/mongodb_keygenerator.py b/polytope_server/common/keygenerator/mongodb_keygenerator.py index 471f49b..7854d94 100644 --- a/polytope_server/common/keygenerator/mongodb_keygenerator.py +++ b/polytope_server/common/keygenerator/mongodb_keygenerator.py @@ -22,8 +22,7 @@ import uuid from datetime import datetime, timedelta -import pymongo - +from .. import mongo_client_factory from ..auth import User from ..exceptions import ForbiddenRequest from ..metric_collector import MongoStorageMetricCollector @@ -37,8 +36,14 @@ def __init__(self, config): host = config.get("host", "localhost") port = config.get("port", "27017") collection = config.get("collection", "keys") + username = config.get("username") + password = config.get("password") + tls = config.get("tls", False) == True + tlsCAFile = config.get("tlsCAFile", None) + endpoint = "{}:{}".format(host, port) - self.mongo_client = pymongo.MongoClient(endpoint, journal=True, connect=False) + + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) self.database = self.mongo_client.keys self.keys = self.database[collection] self.realms = config.get("allowed_realms") diff --git a/polytope_server/common/metric_store/mongodb_metric_store.py b/polytope_server/common/metric_store/mongodb_metric_store.py index 9dbcdaf..3fe468c 100644 --- a/polytope_server/common/metric_store/mongodb_metric_store.py +++ b/polytope_server/common/metric_store/mongodb_metric_store.py @@ -22,6 +22,7 @@ import pymongo +from .. import mongo_client_factory from ..metric import ( CacheInfo, Metric, @@ -42,10 +43,15 @@ def __init__(self, config=None): port = config.get("port", "27017") metric_collection = config.get("collection", "metrics") + username = config.get("username") + password = config.get("password") + tls = config.get("tls", False) == True + tlsCAFile = config.get("tlsCAFile", None) + endpoint = "{}:{}".format(host, port) - self.mongo_client = pymongo.MongoClient(endpoint, journal=True, connect=False) - self.database = self.mongo_client.metric_store + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) + self.database = self.mongo_client.metric_storeg self.store = self.database[metric_collection] self.metric_type_class_map = { @@ -85,7 +91,6 @@ def get_metric(self, uuid): return None def get_metrics(self, ascending=None, descending=None, limit=None, **kwargs): - all_slots = [] found_type = None diff --git a/polytope_server/common/mongo_client_factory.py b/polytope_server/common/mongo_client_factory.py new file mode 100644 index 0000000..eb1d5a7 --- /dev/null +++ b/polytope_server/common/mongo_client_factory.py @@ -0,0 +1,22 @@ +import typing +import urllib.parse + +import pymongo + + +def create_client( + host: str, + port: str, + username: typing.Optional[str] = None, + password: typing.Optional[str] = None, + tls: bool = False, + tlsCAFile: typing.Optional[str] = None, +) -> pymongo.MongoClient: + endpoint = f"mongodb://{host}:{port}" + + if username and password: + encoded_username = urllib.parse.quote_plus(username) + encoded_password = urllib.parse.quote_plus(password) + endpoint = f"mongodb://{encoded_username}:{encoded_password}@{host}:{port}" + + return pymongo.MongoClient(endpoint, journal=True, connect=False, tls=tls, tlsCAFile=tlsCAFile) diff --git a/polytope_server/common/request_store/mongodb_request_store.py b/polytope_server/common/request_store/mongodb_request_store.py index 1734e2d..ea2b3e1 100644 --- a/polytope_server/common/request_store/mongodb_request_store.py +++ b/polytope_server/common/request_store/mongodb_request_store.py @@ -23,7 +23,7 @@ import pymongo -from .. import metric_store +from .. import metric_store, mongo_client_factory from ..metric import MetricType, RequestStatusChange from ..metric_collector import ( MongoRequestStoreMetricCollector, @@ -39,9 +39,14 @@ def __init__(self, config=None, metric_store_config=None): port = config.get("port", "27017") request_collection = config.get("collection", "requests") + username = config.get("username") + password = config.get("password") + tls = config.get("tls", False) == True + tlsCAFile = config.get("tlsCAFile", None) + endpoint = "{}:{}".format(host, port) - self.mongo_client = pymongo.MongoClient(endpoint, journal=True, connect=False) + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) self.database = self.mongo_client.request_store self.store = self.database[request_collection] @@ -87,7 +92,6 @@ def get_request(self, id): return None def get_requests(self, ascending=None, descending=None, limit=None, **kwargs): - if ascending: if ascending not in Request.__slots__: raise KeyError("Request has no key {}".format(ascending)) @@ -98,7 +102,6 @@ def get_requests(self, ascending=None, descending=None, limit=None, **kwargs): query = {} for k, v in kwargs.items(): - if k not in Request.__slots__: raise KeyError("Request has no key {}".format(k)) @@ -152,7 +155,6 @@ def update_request(self, request): return res def wipe(self): - if self.metric_store: res = self.get_requests() for i in res: diff --git a/pyproject.toml b/pyproject.toml index e34796e..85c3b07 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,5 @@ [tool.black] -line-length = 120 \ No newline at end of file +line-length = 120 + +[tool.isort] +profile = "black" diff --git a/tests/unit/test_mongo_client_factory.py b/tests/unit/test_mongo_client_factory.py new file mode 100644 index 0000000..955ce9a --- /dev/null +++ b/tests/unit/test_mongo_client_factory.py @@ -0,0 +1,48 @@ +from unittest import mock + +from polytope_server.common import mongo_client_factory + + +@mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) +def test_create_without_credentials(mock_mongo: mock.Mock): + mongo_client_factory.create_client("host", "123", username=None, password=None, tls=False) + + _verify(mock_mongo, "host:123", False) + + +@mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) +def test_create_with_credentials(mock_mongo: mock.Mock): + mongo_client_factory.create_client("host", "123", username="admin", password="admin", tls=False) + + _verify(mock_mongo, "admin:admin@host:123", False) + + +@mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) +def test_create_without_credentials_tls(mock_mongo: mock.Mock): + mongo_client_factory.create_client("host", "123", username=None, password=None, tls=True) + + _verify(mock_mongo, "host:123", True) + + +@mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) +def test_create_with_credentials_tls(mock_mongo: mock.Mock): + mongo_client_factory.create_client("host", "123", username="admin", password="admin", tls=True) + + _verify(mock_mongo, "admin:admin@host:123", True) + + +@mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) +def test_create_with_tlsCAfile(mock_mongo: mock.Mock): + mongo_client_factory.create_client( + "host", "123", username="admin", password="admin", tls=True, tlsCAFile="/test/ca.pem" + ) + + _verify(mock_mongo, "admin:admin@host:123", True, "/test/ca.pem") + + +def _verify(mock_mongo: mock.Mock, endpoint: str, tls: bool, tlsCAFile=None): + mock_mongo.assert_called_once() + args, kwargs = mock_mongo.call_args + assert args[0] == f"mongodb://{endpoint}" + assert kwargs["tls"] == tls + assert kwargs["tlsCAFile"] == tlsCAFile diff --git a/tests/unit/test_s3_staging.py b/tests/unit/test_s3_staging.py index 7bd7d4c..5abe61c 100644 --- a/tests/unit/test_s3_staging.py +++ b/tests/unit/test_s3_staging.py @@ -1,4 +1,5 @@ from unittest import mock + from polytope_server.common.staging.s3_staging import S3Staging From 8cea8e721c2145fdc3e88397fe2dc37c0bfba6b9 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Thu, 9 Nov 2023 12:04:23 +0100 Subject: [PATCH 24/82] Add SQS queue --- polytope_server/common/queue/queue.py | 2 +- polytope_server/common/queue/sqs_queue.py | 99 +++++++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 polytope_server/common/queue/sqs_queue.py diff --git a/polytope_server/common/queue/queue.py b/polytope_server/common/queue/queue.py index 8e8ed85..9cb1e22 100644 --- a/polytope_server/common/queue/queue.py +++ b/polytope_server/common/queue/queue.py @@ -80,7 +80,7 @@ def collect_metric_info( """Collect dictionary of metrics""" -queue_dict = {"rabbitmq": "RabbitmqQueue"} +queue_dict = {"rabbitmq": "RabbitmqQueue", "sqs": "SQSQueue"} def create_queue(queue_config): diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py new file mode 100644 index 0000000..fba3a24 --- /dev/null +++ b/polytope_server/common/queue/sqs_queue.py @@ -0,0 +1,99 @@ +import json +import logging +from . import queue +import boto3 + +class SQSQueue(queue.Queue): + def __init__(self, config): + + host = config.get("host", "localhost") + queue_name = config.get("name", "default") + self.username = config.get("user", "guest") + self.password = config.get("password", "guest") + self.keep_alive_interval = config.get("keep_alive_interval", 60) + self.queue_url = "{}/{}".format(host, queue_name) + + logging.getLogger("sqs").setLevel("WARNING") + + self.client = boto3.client('sqs') + self.check_connection() + + def enqueue(self, message): + self.client.send_message( + QueueUrl=self.queue_url, + MessageBody=json.dumps(message.body).encode("utf-8") + ) + + def dequeue(self): + response = self.client.receive_message( + QueueUrl=self.queue_url, + VisibilityTimeout=120, #If processing takes more seconds, message will be read twice + MaxNumberOfMessages=1, + ) + if not response['Messages']: + return None + if len(response['Messages']) > 1: + raise ValueError("Received {} messages, should have received 1".format(len(response['Messages']))) + + body = response['Messages'][0]['Body'] + receipt_handle = response['Messages'][0]['ReceiptHandle'] + + return queue.Message(json.loads(body.decode("utf-8")), context=receipt_handle) + + + def ack(self, message): + self.client.delete_message( + QueueUrl=self.queue_url, + ReceiptHandle=message.context + ) + + def nack(self, message): + self.client.change_message_visibility( + QueueUrl=self.queue_url, + ReceiptHandle=message.context, + VisibilityTimeout=0 + ) + + def keep_alive(self): + return self.check_connection() + + def check_connection(self): + """Check the queue connection""" + response = self.client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=['CreatedTimestamp'] + ) + #Tries to parse response + return 'Attributes' in response & 'CreatedTimestamp' in response['Attributes'] + + def close_connection(self): + self.client.close() + + def count(self): + response = self.client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=[ + 'ApproximateNumberOfMessages' + ] + ) + num_messages = response['Attributes']['ApproximateNumberOfMessages'] + + return int(num_messages) + + def get_type(self): + return "sqs" + + def collect_metric_info(self): + response = self.client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=[ + 'ApproximateNumberOfMessages', + 'ApproximateNumberOfMessages', + 'ApproximateNumberOfMessagesNotVisible', + ] + ) + return response['Attributes'] + + + + From c524b2c24cde307d6d1d1638ddbebe6eb614a240 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Thu, 9 Nov 2023 13:19:57 +0100 Subject: [PATCH 25/82] Add visibility_timeout to config --- polytope_server/common/queue/sqs_queue.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index fba3a24..e3466e7 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -11,6 +11,7 @@ def __init__(self, config): self.username = config.get("user", "guest") self.password = config.get("password", "guest") self.keep_alive_interval = config.get("keep_alive_interval", 60) + self.visibility_timeout = config.get("visibility_timeout", 120) self.queue_url = "{}/{}".format(host, queue_name) logging.getLogger("sqs").setLevel("WARNING") @@ -27,7 +28,7 @@ def enqueue(self, message): def dequeue(self): response = self.client.receive_message( QueueUrl=self.queue_url, - VisibilityTimeout=120, #If processing takes more seconds, message will be read twice + VisibilityTimeout=self.visibility_timeout, #If processing takes more seconds, message will be read twice MaxNumberOfMessages=1, ) if not response['Messages']: @@ -88,7 +89,7 @@ def collect_metric_info(self): QueueUrl=self.queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', - 'ApproximateNumberOfMessages', + 'ApproximateNumberOfMessagesDelayed', 'ApproximateNumberOfMessagesNotVisible', ] ) From 2f532e054e7094ff2f321a569f1d36132e7a7534 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Thu, 9 Nov 2023 15:57:16 +0100 Subject: [PATCH 26/82] Add boto3 credentials --- polytope_server/common/queue/sqs_queue.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index e3466e7..e9037f2 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -2,21 +2,25 @@ import logging from . import queue import boto3 +import os class SQSQueue(queue.Queue): def __init__(self, config): host = config.get("host", "localhost") queue_name = config.get("name", "default") - self.username = config.get("user", "guest") - self.password = config.get("password", "guest") + region = config.get("region", "eu-central-2") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) self.queue_url = "{}/{}".format(host, queue_name) logging.getLogger("sqs").setLevel("WARNING") - - self.client = boto3.client('sqs') + session = boto3.Session( + aws_access_key_id=os.getenv('POLYTOPE_S3_ACCESS_KEY'), + aws_secret_access_key=os.getenv('POLYTOPE_S3_SECRET_KEY'), + region_name=region + ) + self.client = session.client('sqs') self.check_connection() def enqueue(self, message): From d130cb9c9cc5cff19bb6614e08848d7461d4a633 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Fri, 10 Nov 2023 09:22:11 +0100 Subject: [PATCH 27/82] Add boto3 to reqs --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e461a9d..c496d3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,4 +29,5 @@ deepmerge==0.1.0 flask-swagger-ui==3.25.0 ldap3==2.7 docker==4.2.0 -python-keycloak==0.24.0 \ No newline at end of file +python-keycloak==0.24.0 +boto3==1.28.80 From d62d7f5e4ed19277f1c4c8e9e08ee9f4fd6b8d95 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Fri, 10 Nov 2023 11:10:28 +0100 Subject: [PATCH 28/82] Update logic operator --- polytope_server/common/queue/sqs_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index e9037f2..b22138f 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -69,7 +69,7 @@ def check_connection(self): AttributeNames=['CreatedTimestamp'] ) #Tries to parse response - return 'Attributes' in response & 'CreatedTimestamp' in response['Attributes'] + return 'Attributes' in response and 'CreatedTimestamp' in response['Attributes'] def close_connection(self): self.client.close() From c9e1add5f258a9ef161575a18b6cf893d1a430a2 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Fri, 10 Nov 2023 16:22:37 +0100 Subject: [PATCH 29/82] Remove enqueue encoding --- polytope_server/common/queue/sqs_queue.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index b22138f..ace49e5 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -26,7 +26,7 @@ def __init__(self, config): def enqueue(self, message): self.client.send_message( QueueUrl=self.queue_url, - MessageBody=json.dumps(message.body).encode("utf-8") + MessageBody=json.dumps(message.body) ) def dequeue(self): @@ -60,10 +60,11 @@ def nack(self, message): ) def keep_alive(self): - return self.check_connection() + #Implemented for compatibility, disabled because each request to SQS is billed + pass + # return self.check_connection() def check_connection(self): - """Check the queue connection""" response = self.client.get_queue_attributes( QueueUrl=self.queue_url, AttributeNames=['CreatedTimestamp'] From e3cbe7c7b2bfe82d1b7147eb2078667e5ac39083 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Mon, 13 Nov 2023 09:42:18 +0100 Subject: [PATCH 30/82] Update dequeue func --- polytope_server/common/queue/sqs_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index ace49e5..c9ea2fd 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -43,7 +43,7 @@ def dequeue(self): body = response['Messages'][0]['Body'] receipt_handle = response['Messages'][0]['ReceiptHandle'] - return queue.Message(json.loads(body.decode("utf-8")), context=receipt_handle) + return queue.Message(json.loads(body), context=receipt_handle) def ack(self, message): From 277deeccb12284504f4c7bf6e8379b32f833e2d4 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Mon, 13 Nov 2023 15:24:26 +0100 Subject: [PATCH 31/82] Format --- polytope_server/common/queue/sqs_queue.py | 77 +++++++++-------------- 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index c9ea2fd..9e01ac2 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -4,86 +4,71 @@ import boto3 import os + class SQSQueue(queue.Queue): def __init__(self, config): - host = config.get("host", "localhost") queue_name = config.get("name", "default") region = config.get("region", "eu-central-2") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) self.queue_url = "{}/{}".format(host, queue_name) - + logging.getLogger("sqs").setLevel("WARNING") session = boto3.Session( - aws_access_key_id=os.getenv('POLYTOPE_S3_ACCESS_KEY'), - aws_secret_access_key=os.getenv('POLYTOPE_S3_SECRET_KEY'), - region_name=region + aws_access_key_id=os.getenv("POLYTOPE_S3_ACCESS_KEY"), + aws_secret_access_key=os.getenv("POLYTOPE_S3_SECRET_KEY"), + region_name=region, ) - self.client = session.client('sqs') + self.client = session.client("sqs") self.check_connection() def enqueue(self, message): - self.client.send_message( - QueueUrl=self.queue_url, - MessageBody=json.dumps(message.body) - ) + self.client.send_message(QueueUrl=self.queue_url, MessageBody=json.dumps(message.body)) def dequeue(self): response = self.client.receive_message( QueueUrl=self.queue_url, - VisibilityTimeout=self.visibility_timeout, #If processing takes more seconds, message will be read twice + VisibilityTimeout=self.visibility_timeout, # If processing takes more seconds, message will be read twice MaxNumberOfMessages=1, - ) - if not response['Messages']: + ) + if not response["Messages"]: return None - if len(response['Messages']) > 1: - raise ValueError("Received {} messages, should have received 1".format(len(response['Messages']))) + if len(response["Messages"]) > 1: + raise ValueError("Received {} messages, should have received 1".format(len(response["Messages"]))) - body = response['Messages'][0]['Body'] - receipt_handle = response['Messages'][0]['ReceiptHandle'] - - return queue.Message(json.loads(body), context=receipt_handle) + body = response["Messages"][0]["Body"] + receipt_handle = response["Messages"][0]["ReceiptHandle"] + return queue.Message(json.loads(body), context=receipt_handle) def ack(self, message): - self.client.delete_message( - QueueUrl=self.queue_url, - ReceiptHandle=message.context - ) + self.client.delete_message(QueueUrl=self.queue_url, ReceiptHandle=message.context) def nack(self, message): self.client.change_message_visibility( - QueueUrl=self.queue_url, - ReceiptHandle=message.context, - VisibilityTimeout=0 + QueueUrl=self.queue_url, ReceiptHandle=message.context, VisibilityTimeout=0 ) def keep_alive(self): - #Implemented for compatibility, disabled because each request to SQS is billed + # Implemented for compatibility, disabled because each request to SQS is billed pass # return self.check_connection() def check_connection(self): - response = self.client.get_queue_attributes( - QueueUrl=self.queue_url, - AttributeNames=['CreatedTimestamp'] - ) - #Tries to parse response - return 'Attributes' in response and 'CreatedTimestamp' in response['Attributes'] + response = self.client.get_queue_attributes(QueueUrl=self.queue_url, AttributeNames=["CreatedTimestamp"]) + # Tries to parse response + return "Attributes" in response and "CreatedTimestamp" in response["Attributes"] def close_connection(self): self.client.close() def count(self): response = self.client.get_queue_attributes( - QueueUrl=self.queue_url, - AttributeNames=[ - 'ApproximateNumberOfMessages' - ] + QueueUrl=self.queue_url, AttributeNames=["ApproximateNumberOfMessages"] ) - num_messages = response['Attributes']['ApproximateNumberOfMessages'] - + num_messages = response["Attributes"]["ApproximateNumberOfMessages"] + return int(num_messages) def get_type(self): @@ -93,13 +78,9 @@ def collect_metric_info(self): response = self.client.get_queue_attributes( QueueUrl=self.queue_url, AttributeNames=[ - 'ApproximateNumberOfMessages', - 'ApproximateNumberOfMessagesDelayed', - 'ApproximateNumberOfMessagesNotVisible', - ] + "ApproximateNumberOfMessages", + "ApproximateNumberOfMessagesDelayed", + "ApproximateNumberOfMessagesNotVisible", + ], ) - return response['Attributes'] - - - - + return response["Attributes"] From c0ac936c271331ea1cc5b9730ed1cb54de0f1e3a Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Mon, 13 Nov 2023 15:24:48 +0100 Subject: [PATCH 32/82] Remove message number check --- polytope_server/common/queue/sqs_queue.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 9e01ac2..145698c 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -34,8 +34,6 @@ def dequeue(self): ) if not response["Messages"]: return None - if len(response["Messages"]) > 1: - raise ValueError("Received {} messages, should have received 1".format(len(response["Messages"]))) body = response["Messages"][0]["Body"] receipt_handle = response["Messages"][0]["ReceiptHandle"] From c63903723f06a353c19a12aad8e15abc00c4c652 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 09:28:15 +0100 Subject: [PATCH 33/82] Update metric collection --- .../metric_collector/queue_metric_collector.py | 12 ++++++++++++ polytope_server/common/queue/sqs_queue.py | 13 ++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/polytope_server/common/metric_collector/queue_metric_collector.py b/polytope_server/common/metric_collector/queue_metric_collector.py index 828a774..38ae5c5 100644 --- a/polytope_server/common/metric_collector/queue_metric_collector.py +++ b/polytope_server/common/metric_collector/queue_metric_collector.py @@ -46,3 +46,15 @@ def total_queued(self): channel = connection.channel() q = channel.queue_declare(queue=self.queue_name, durable=True, passive=True) return q.method.message_count + + +class SQSQueueMetricCollector(QueueMetricCollector): + def __init__(self, host): + self.host = host + self.message_counts = None + + def total_queued(self): + num_messages = 0 + for key in self.message_counts: + num_messages += self.message_counts[key] + return num_messages diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 145698c..b43ebaf 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -3,12 +3,13 @@ from . import queue import boto3 import os +from ..metric_collector import SQSQueueMetricCollector class SQSQueue(queue.Queue): def __init__(self, config): - host = config.get("host", "localhost") - queue_name = config.get("name", "default") + host = config.get("host") + queue_name = config.get("name") region = config.get("region", "eu-central-2") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) @@ -22,6 +23,7 @@ def __init__(self, config): ) self.client = session.client("sqs") self.check_connection() + self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) def enqueue(self, message): self.client.send_message(QueueUrl=self.queue_url, MessageBody=json.dumps(message.body)) @@ -35,6 +37,10 @@ def dequeue(self): if not response["Messages"]: return None + for item in response["Messages"][1:]: + self.client.change_message_visibility( + QueueUrl=self.queue_url, ReceiptHandle=item["ReceiptHandle"], VisibilityTimeout=0 + ) body = response["Messages"][0]["Body"] receipt_handle = response["Messages"][0]["ReceiptHandle"] @@ -81,4 +87,5 @@ def collect_metric_info(self): "ApproximateNumberOfMessagesNotVisible", ], ) - return response["Attributes"] + self.queue_metric_collector.message_counts = response["Attributes"] + return self.queue_metric_collector.collect().serialize() From 86fac638b1509c9eca15897134948983dfb36ce7 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 10:15:52 +0100 Subject: [PATCH 34/82] Update region and credentials --- polytope_server/common/queue/sqs_queue.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index b43ebaf..af87a76 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -3,6 +3,7 @@ from . import queue import boto3 import os +import re from ..metric_collector import SQSQueueMetricCollector @@ -10,18 +11,15 @@ class SQSQueue(queue.Queue): def __init__(self, config): host = config.get("host") queue_name = config.get("name") - region = config.get("region", "eu-central-2") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) + + region = self.__parse_region(host) self.queue_url = "{}/{}".format(host, queue_name) logging.getLogger("sqs").setLevel("WARNING") - session = boto3.Session( - aws_access_key_id=os.getenv("POLYTOPE_S3_ACCESS_KEY"), - aws_secret_access_key=os.getenv("POLYTOPE_S3_SECRET_KEY"), - region_name=region, - ) - self.client = session.client("sqs") + + self.client = boto3.client("sqs", region_name=region) self.check_connection() self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) @@ -89,3 +87,7 @@ def collect_metric_info(self): ) self.queue_metric_collector.message_counts = response["Attributes"] return self.queue_metric_collector.collect().serialize() + + def __parse_region(self, host): + pattern = "https:\/\/sqs.(.*).amazonaws.com\/\d+" + return re.findall(pattern, host)[0] From 9fefd8584b6cbe2586f50b9dbaddcb19d9345549 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 11:15:22 +0100 Subject: [PATCH 35/82] Add get_queue_url func --- polytope_server/common/queue/sqs_queue.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index af87a76..b6875e6 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -2,24 +2,20 @@ import logging from . import queue import boto3 -import os -import re from ..metric_collector import SQSQueueMetricCollector class SQSQueue(queue.Queue): def __init__(self, config): - host = config.get("host") - queue_name = config.get("name") + queue_name = config.get("queue_name") + region = config.get("region") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) - region = self.__parse_region(host) - self.queue_url = "{}/{}".format(host, queue_name) - logging.getLogger("sqs").setLevel("WARNING") self.client = boto3.client("sqs", region_name=region) + self.queue_url = self.client.get_queue_url(QueueName=queue_name).get("QueueUrl") self.check_connection() self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) @@ -87,7 +83,3 @@ def collect_metric_info(self): ) self.queue_metric_collector.message_counts = response["Attributes"] return self.queue_metric_collector.collect().serialize() - - def __parse_region(self, host): - pattern = "https:\/\/sqs.(.*).amazonaws.com\/\d+" - return re.findall(pattern, host)[0] From 505df1a766813a2fc0b0eb992e0a0d97118f020e Mon Sep 17 00:00:00 2001 From: Milos Belic <119611649+milosbeliczuhlke@users.noreply.github.com> Date: Tue, 14 Nov 2023 11:07:46 +0100 Subject: [PATCH 36/82] Update polytope_server/common/metric_collector/queue_metric_collector.py Co-authored-by: Christian Kanesan --- .../common/metric_collector/queue_metric_collector.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/polytope_server/common/metric_collector/queue_metric_collector.py b/polytope_server/common/metric_collector/queue_metric_collector.py index 38ae5c5..0c48650 100644 --- a/polytope_server/common/metric_collector/queue_metric_collector.py +++ b/polytope_server/common/metric_collector/queue_metric_collector.py @@ -54,7 +54,4 @@ def __init__(self, host): self.message_counts = None def total_queued(self): - num_messages = 0 - for key in self.message_counts: - num_messages += self.message_counts[key] - return num_messages + return sum(self.message_counts.values()) From be5817df05765359d4abe6870e02b7e5d9e5e60a Mon Sep 17 00:00:00 2001 From: Milos Belic <119611649+milosbeliczuhlke@users.noreply.github.com> Date: Tue, 14 Nov 2023 11:09:38 +0100 Subject: [PATCH 37/82] Update polytope_server/common/queue/sqs_queue.py Co-authored-by: Christian Kanesan --- polytope_server/common/queue/sqs_queue.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index b6875e6..721653f 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -31,12 +31,13 @@ def dequeue(self): if not response["Messages"]: return None - for item in response["Messages"][1:]: + msg, *remainder = response["Messages"] + for item in remainder: self.client.change_message_visibility( QueueUrl=self.queue_url, ReceiptHandle=item["ReceiptHandle"], VisibilityTimeout=0 ) - body = response["Messages"][0]["Body"] - receipt_handle = response["Messages"][0]["ReceiptHandle"] + body = msg["Body"] + receipt_handle = msg["ReceiptHandle"] return queue.Message(json.loads(body), context=receipt_handle) From a025fb5e36858660110cbc3372acfd9e02c74417 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 11:40:38 +0100 Subject: [PATCH 38/82] Add message_group_id --- polytope_server/common/queue/sqs_queue.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 721653f..9f396a9 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -11,7 +11,7 @@ def __init__(self, config): region = config.get("region") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) - + self.message_group_id = config.get("message_group_id", "polytope") logging.getLogger("sqs").setLevel("WARNING") self.client = boto3.client("sqs", region_name=region) @@ -20,7 +20,9 @@ def __init__(self, config): self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) def enqueue(self, message): - self.client.send_message(QueueUrl=self.queue_url, MessageBody=json.dumps(message.body)) + self.client.send_message( + QueueUrl=self.queue_url, MessageBody=json.dumps(message.body), MessageGroupId=self.message_group_id + ) def dequeue(self): response = self.client.receive_message( From f0dbf863d9add7063228e9948bfafe7060628721 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Tue, 14 Nov 2023 16:25:32 +0100 Subject: [PATCH 39/82] Fix test error --- tests/unit/test_s3_staging.py | 55 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/tests/unit/test_s3_staging.py b/tests/unit/test_s3_staging.py index 5abe61c..41407e3 100644 --- a/tests/unit/test_s3_staging.py +++ b/tests/unit/test_s3_staging.py @@ -1,38 +1,49 @@ from unittest import mock - from polytope_server.common.staging.s3_staging import S3Staging -@mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) -def test_s3_staging_secure_false(mock_minio: mock.Mock): - s3Staging = S3Staging(config={"secure": False}) +class DummyMinioClient: + def __init__(self) -> None: + self._region = None - verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) + def make_bucket(self, bucket, region): + return "Dummy make bucket" + def set_bucket_policy(self, bucket, policy): + return "Dummy set bucket policy" -@mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) -def test_s3_staging_secure_any_value_false(mock_minio: mock.Mock): - s3Staging = S3Staging(config={"secure": "sdafsdfs"}) - verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) +class Test: + @mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) + def test_s3_staging_secure_false(self, mock_minio: mock.Mock): + mock_minio.return_value = DummyMinioClient() + s3Staging = S3Staging(config={"secure": False}) + self.verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) -@mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) -def test_s3_staging_secure_default(mock_minio: mock.Mock): - s3Staging = S3Staging(config={}) + @mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) + def test_s3_staging_secure_any_value_false(self, mock_minio: mock.Mock): + mock_minio.return_value = DummyMinioClient() + s3Staging = S3Staging(config={"secure": "sdafsdfs"}) - verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) + self.verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) + @mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) + def test_s3_staging_secure_default(self, mock_minio: mock.Mock): + mock_minio.return_value = DummyMinioClient() + s3Staging = S3Staging(config={}) -@mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) -def test_s3_staging_secure_true(mock_minio: mock.Mock): - s3Staging = S3Staging(config={"secure": True}) + self.verify_secure_flag_and_internal_url(mock_minio, s3Staging, False) - verify_secure_flag_and_internal_url(mock_minio, s3Staging, True) + @mock.patch("polytope_server.common.staging.s3_staging.Minio", autospec=True) + def test_s3_staging_secure_true(self, mock_minio: mock.Mock): + mock_minio.return_value = DummyMinioClient() + s3Staging = S3Staging(config={"secure": True}) + self.verify_secure_flag_and_internal_url(mock_minio, s3Staging, True) -def verify_secure_flag_and_internal_url(mock_minio: mock.Mock, s3Staging: S3Staging, secure: bool): - mock_minio.assert_called_once() - _, kwargs = mock_minio.call_args - assert kwargs["secure"] == secure - assert s3Staging.get_internal_url("test").startswith("https" if secure else "http") + def verify_secure_flag_and_internal_url(self, mock_minio: mock.Mock, s3Staging: S3Staging, secure: bool): + mock_minio.assert_called_once() + _, kwargs = mock_minio.call_args + assert kwargs["secure"] == secure + assert s3Staging.get_internal_url("test").startswith("https" if secure else "http") From e324931137a90e1cc9726dd7e8ad29a9de161035 Mon Sep 17 00:00:00 2001 From: milosbeliczuhlke Date: Thu, 16 Nov 2023 15:02:18 +0100 Subject: [PATCH 40/82] Move set_bucket_policy --- polytope_server/common/staging/s3_staging.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index 2a72f00..027c14c 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -60,11 +60,10 @@ def __init__(self, config): try: self.client.make_bucket(self.bucket, self.client._region) + self.client.set_bucket_policy(self.bucket, self.bucket_policy()) except BucketAlreadyOwnedByYou: pass - self.client.set_bucket_policy(self.bucket, self.bucket_policy()) - self.storage_metric_collector = S3StorageMetricCollector(endpoint, self.client, self.bucket) logging.info( From 8e42683b38a6fbd3ce2e645063ff76b7bc2583be Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Mon, 20 Nov 2023 09:55:27 +0100 Subject: [PATCH 41/82] Update pymongo, Add srv option (#12) --- .../mongoapikey_authentication.py | 7 +++--- .../authentication/mongodb_authentication.py | 7 +++--- .../authorization/mongodb_authorization.py | 7 ++++-- polytope_server/common/caching/caching.py | 6 +++-- .../common/identity/mongodb_identity.py | 9 ++++--- .../keygenerator/mongodb_keygenerator.py | 6 ++--- .../metric_store/mongodb_metric_store.py | 5 ++-- .../common/mongo_client_factory.py | 9 +++++-- .../request_store/mongodb_request_store.py | 5 ++-- requirements.txt | 2 +- tests/unit/test_mongo_client_factory.py | 25 +++++++++++++------ 11 files changed, 55 insertions(+), 33 deletions(-) diff --git a/polytope_server/common/authentication/mongoapikey_authentication.py b/polytope_server/common/authentication/mongoapikey_authentication.py index c2fbd77..e6b04ef 100644 --- a/polytope_server/common/authentication/mongoapikey_authentication.py +++ b/polytope_server/common/authentication/mongoapikey_authentication.py @@ -39,18 +39,18 @@ class ApiKeyMongoAuthentication(authentication.Authentication): """ def __init__(self, name, realm, config): - self.config = config host = config.get("host", "localhost") port = config.get("port", "27017") collection = config.get("collection", "keys") username = config.get("username") password = config.get("password") - tls = config.get("tls", False) == True + srv = bool(config.get("srv", False)) + tls = bool(config.get("tls", False)) tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) self.database = self.mongo_client.keys self.keys = self.database[collection] assert realm == "polytope" @@ -66,7 +66,6 @@ def authentication_info(self): return "Authenticate with Polytope API Key from ../auth/keys" def authenticate(self, credentials: str) -> User: - # credentials should be of the form '' res = self.keys.find_one({"key.key": credentials}) if res is None: diff --git a/polytope_server/common/authentication/mongodb_authentication.py b/polytope_server/common/authentication/mongodb_authentication.py index 0fb3d93..414a327 100644 --- a/polytope_server/common/authentication/mongodb_authentication.py +++ b/polytope_server/common/authentication/mongodb_authentication.py @@ -31,18 +31,18 @@ class MongoAuthentication(authentication.Authentication): def __init__(self, name, realm, config): - self.config = config host = config.get("host", "localhost") port = config.get("port", "27017") collection = config.get("collection", "users") username = config.get("username") password = config.get("password") - tls = config.get("tls", False) == True + srv = bool(config.get("srv", False)) + tls = bool(config.get("tls", False)) tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) self.database = self.mongo_client.authentication self.users = self.database[collection] @@ -62,7 +62,6 @@ def authentication_info(self): return "Authenticate with username and password" def authenticate(self, credentials: str) -> User: - # credentials should be of the form 'base64(:)' try: decoded = base64.b64decode(credentials).decode("utf-8") diff --git a/polytope_server/common/authorization/mongodb_authorization.py b/polytope_server/common/authorization/mongodb_authorization.py index 280d9af..c3a101e 100644 --- a/polytope_server/common/authorization/mongodb_authorization.py +++ b/polytope_server/common/authorization/mongodb_authorization.py @@ -33,11 +33,14 @@ def __init__(self, name, realm, config): self.collection = config.get("collection", "users") username = config.get("username") password = config.get("password") - tls = config.get("tls", False) == True + srv = bool(config.get("srv", False)) + tls = bool(config.get("tls", False)) tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(self.host, self.port) - self.mongo_client = mongo_client_factory.create_client(self.host, self.port, username, password, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client( + self.host, self.port, username, password, srv, tls, tlsCAFile + ) self.database = self.mongo_client.authentication self.users = self.database[self.collection] diff --git a/polytope_server/common/caching/caching.py b/polytope_server/common/caching/caching.py index 4a0d976..51fe0e1 100644 --- a/polytope_server/common/caching/caching.py +++ b/polytope_server/common/caching/caching.py @@ -199,11 +199,13 @@ def __init__(self, cache_config): port = cache_config.get("port", 27017) username = cache_config.get("username") password = cache_config.get("password") - tls = cache_config.get("tls", False) == True + srv = bool(cache_config.get("srv", False)) + tls = bool(cache_config.get("tls", False)) tlsCAFile = cache_config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) collection = cache_config.get("collection", "cache") - self.client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) + self.client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) + self.database = self.client.cache self.collection = self.database[collection] self.collection.create_index("expire_at", expireAfterSeconds=0) diff --git a/polytope_server/common/identity/mongodb_identity.py b/polytope_server/common/identity/mongodb_identity.py index 89cee2f..413292a 100644 --- a/polytope_server/common/identity/mongodb_identity.py +++ b/polytope_server/common/identity/mongodb_identity.py @@ -33,11 +33,14 @@ def __init__(self, config): self.collection = config.get("collection", "users") username = config.get("username") password = config.get("password") - tls = config.get("tls", False) == True + srv = bool(config.get("srv", False)) + tls = bool(config.get("tls", False)) tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(self.host, self.port) - self.mongo_client = mongo_client_factory.create_client(self.host, self.port, username, password, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client( + self.host, self.port, username, password, srv, tls, tlsCAFile + ) self.database = self.mongo_client.authentication self.users = self.database[self.collection] self.realm = config.get("realm") @@ -55,7 +58,6 @@ def __init__(self, config): self.identity_metric_collector = MetricCollector() def add_user(self, username: str, password: str, roles: list) -> bool: - if self.users.find_one({"username": username}) is not None: raise Conflict("Username already registered") @@ -73,7 +75,6 @@ def add_user(self, username: str, password: str, roles: list) -> bool: return True def remove_user(self, username: str) -> bool: - result = self.users.delete_one({"username": username}) if result.deleted_count > 0: return True diff --git a/polytope_server/common/keygenerator/mongodb_keygenerator.py b/polytope_server/common/keygenerator/mongodb_keygenerator.py index 7854d94..7d0272f 100644 --- a/polytope_server/common/keygenerator/mongodb_keygenerator.py +++ b/polytope_server/common/keygenerator/mongodb_keygenerator.py @@ -38,12 +38,13 @@ def __init__(self, config): collection = config.get("collection", "keys") username = config.get("username") password = config.get("password") - tls = config.get("tls", False) == True + srv = bool(config.get("srv", False)) + tls = bool(config.get("tls", False)) tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) self.database = self.mongo_client.keys self.keys = self.database[collection] self.realms = config.get("allowed_realms") @@ -51,7 +52,6 @@ def __init__(self, config): self.storage_metric_collector = MongoStorageMetricCollector(endpoint, self.mongo_client, "keys", collection) def create_key(self, user: User) -> ApiKey: - if user.realm not in self.realms: raise ForbiddenRequest("Not allowed to create an API Key for users in realm {}".format(user.realm)) diff --git a/polytope_server/common/metric_store/mongodb_metric_store.py b/polytope_server/common/metric_store/mongodb_metric_store.py index 3fe468c..15078d6 100644 --- a/polytope_server/common/metric_store/mongodb_metric_store.py +++ b/polytope_server/common/metric_store/mongodb_metric_store.py @@ -45,12 +45,13 @@ def __init__(self, config=None): username = config.get("username") password = config.get("password") - tls = config.get("tls", False) == True + srv = bool(config.get("srv", False)) + tls = bool(config.get("tls", False)) tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) self.database = self.mongo_client.metric_storeg self.store = self.database[metric_collection] diff --git a/polytope_server/common/mongo_client_factory.py b/polytope_server/common/mongo_client_factory.py index eb1d5a7..28a052f 100644 --- a/polytope_server/common/mongo_client_factory.py +++ b/polytope_server/common/mongo_client_factory.py @@ -9,14 +9,19 @@ def create_client( port: str, username: typing.Optional[str] = None, password: typing.Optional[str] = None, + srv: bool = False, tls: bool = False, tlsCAFile: typing.Optional[str] = None, ) -> pymongo.MongoClient: - endpoint = f"mongodb://{host}:{port}" + protocol = "mongodb" + if srv: + protocol = "mongodb+srv" + + endpoint = f"{protocol}://{host}:{port}" if username and password: encoded_username = urllib.parse.quote_plus(username) encoded_password = urllib.parse.quote_plus(password) - endpoint = f"mongodb://{encoded_username}:{encoded_password}@{host}:{port}" + endpoint = f"{protocol}://{encoded_username}:{encoded_password}@{host}:{port}" return pymongo.MongoClient(endpoint, journal=True, connect=False, tls=tls, tlsCAFile=tlsCAFile) diff --git a/polytope_server/common/request_store/mongodb_request_store.py b/polytope_server/common/request_store/mongodb_request_store.py index ea2b3e1..64a2394 100644 --- a/polytope_server/common/request_store/mongodb_request_store.py +++ b/polytope_server/common/request_store/mongodb_request_store.py @@ -41,12 +41,13 @@ def __init__(self, config=None, metric_store_config=None): username = config.get("username") password = config.get("password") - tls = config.get("tls", False) == True + srv = bool(config.get("srv", False)) + tls = bool(config.get("tls", False)) tlsCAFile = config.get("tlsCAFile", None) endpoint = "{}:{}".format(host, port) - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) self.database = self.mongo_client.request_store self.store = self.database[request_collection] diff --git a/requirements.txt b/requirements.txt index c496d3d..4a4dab1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ flask-wtf==0.14.2 werkzeug==2.0 gunicorn==19.9.0 ecmwf-api-client==1.5.4 -pymongo==3.10.1 +pymongo==4.6.0 pymemcache==3.0.0 redis==3.4.1 markdown==3.2.1 diff --git a/tests/unit/test_mongo_client_factory.py b/tests/unit/test_mongo_client_factory.py index 955ce9a..8cfdf29 100644 --- a/tests/unit/test_mongo_client_factory.py +++ b/tests/unit/test_mongo_client_factory.py @@ -7,28 +7,35 @@ def test_create_without_credentials(mock_mongo: mock.Mock): mongo_client_factory.create_client("host", "123", username=None, password=None, tls=False) - _verify(mock_mongo, "host:123", False) + _verify(mock_mongo, "host:123", False, False) + + +@mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) +def test_create_with_srv(mock_mongo: mock.Mock): + mongo_client_factory.create_client("host", "123", username=None, password=None, srv=True, tls=False) + + _verify(mock_mongo, "host:123", True, False) @mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) def test_create_with_credentials(mock_mongo: mock.Mock): mongo_client_factory.create_client("host", "123", username="admin", password="admin", tls=False) - _verify(mock_mongo, "admin:admin@host:123", False) + _verify(mock_mongo, "admin:admin@host:123", False, False) @mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) def test_create_without_credentials_tls(mock_mongo: mock.Mock): mongo_client_factory.create_client("host", "123", username=None, password=None, tls=True) - _verify(mock_mongo, "host:123", True) + _verify(mock_mongo, "host:123", False, True) @mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) def test_create_with_credentials_tls(mock_mongo: mock.Mock): mongo_client_factory.create_client("host", "123", username="admin", password="admin", tls=True) - _verify(mock_mongo, "admin:admin@host:123", True) + _verify(mock_mongo, "admin:admin@host:123", False, True) @mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) @@ -37,12 +44,16 @@ def test_create_with_tlsCAfile(mock_mongo: mock.Mock): "host", "123", username="admin", password="admin", tls=True, tlsCAFile="/test/ca.pem" ) - _verify(mock_mongo, "admin:admin@host:123", True, "/test/ca.pem") + _verify(mock_mongo, "admin:admin@host:123", False, True, "/test/ca.pem") -def _verify(mock_mongo: mock.Mock, endpoint: str, tls: bool, tlsCAFile=None): +def _verify(mock_mongo: mock.Mock, endpoint: str, srv: bool, tls: bool, tlsCAFile=None): mock_mongo.assert_called_once() args, kwargs = mock_mongo.call_args - assert args[0] == f"mongodb://{endpoint}" + if srv: + assert args[0] == f"mongodb+srv://{endpoint}" + else: + assert args[0] == f"mongodb://{endpoint}" + assert kwargs["tls"] == tls assert kwargs["tlsCAFile"] == tlsCAFile From 602b3d007006328ba8d3c48a9c2b6972ffd9aaa7 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Mon, 20 Nov 2023 13:57:43 +0100 Subject: [PATCH 42/82] Add pyfdb (#13) --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 4a4dab1..3e15bf4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,3 +31,4 @@ ldap3==2.7 docker==4.2.0 python-keycloak==0.24.0 boto3==1.28.80 +pyfdb@https://github.com/ecmwf/pyfdb/archive/refs/tags/0.0.3.tar.gz \ No newline at end of file From a833364accb827c15c2644403759b0f4627c807d Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Mon, 20 Nov 2023 14:31:09 +0100 Subject: [PATCH 43/82] Fix empty response on sqs dequeue (#14) --- polytope_server/common/queue/sqs_queue.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 9f396a9..2892aad 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -1,8 +1,10 @@ import json import logging -from . import queue + import boto3 + from ..metric_collector import SQSQueueMetricCollector +from . import queue class SQSQueue(queue.Queue): @@ -30,7 +32,7 @@ def dequeue(self): VisibilityTimeout=self.visibility_timeout, # If processing takes more seconds, message will be read twice MaxNumberOfMessages=1, ) - if not response["Messages"]: + if "Messages" not in response: return None msg, *remainder = response["Messages"] From bf9d082a728ad50db2ae71f2470551248d63f4a1 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Mon, 20 Nov 2023 14:41:04 +0100 Subject: [PATCH 44/82] Update mongodb config (#15) --- .../mongoapikey_authentication.py | 11 +--- .../authentication/mongodb_authentication.py | 11 +--- .../authorization/mongodb_authorization.py | 13 +---- polytope_server/common/caching/caching.py | 13 ++--- polytope_server/common/config/schema.yaml | 4 +- .../common/identity/mongodb_identity.py | 14 ++--- .../keygenerator/mongodb_keygenerator.py | 12 +--- .../metric_store/mongodb_metric_store.py | 14 ++--- .../common/mongo_client_factory.py | 21 ++----- .../request_store/mongodb_request_store.py | 15 ++--- tests/unit/test_mongo_client_factory.py | 55 +++++++------------ 11 files changed, 57 insertions(+), 126 deletions(-) diff --git a/polytope_server/common/authentication/mongoapikey_authentication.py b/polytope_server/common/authentication/mongoapikey_authentication.py index e6b04ef..20740fa 100644 --- a/polytope_server/common/authentication/mongoapikey_authentication.py +++ b/polytope_server/common/authentication/mongoapikey_authentication.py @@ -40,22 +40,17 @@ class ApiKeyMongoAuthentication(authentication.Authentication): def __init__(self, name, realm, config): self.config = config - host = config.get("host", "localhost") - port = config.get("port", "27017") + uri = config.get("uri", "mongodb://localhost:27017") collection = config.get("collection", "keys") username = config.get("username") password = config.get("password") - srv = bool(config.get("srv", False)) - tls = bool(config.get("tls", False)) - tlsCAFile = config.get("tlsCAFile", None) - endpoint = "{}:{}".format(host, port) - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(uri, username, password) self.database = self.mongo_client.keys self.keys = self.database[collection] assert realm == "polytope" - self.storage_metric_collector = MongoStorageMetricCollector(endpoint, self.mongo_client, "keys", collection) + self.storage_metric_collector = MongoStorageMetricCollector(uri, self.mongo_client, "keys", collection) super().__init__(name, realm, config) diff --git a/polytope_server/common/authentication/mongodb_authentication.py b/polytope_server/common/authentication/mongodb_authentication.py index 414a327..a1d0be3 100644 --- a/polytope_server/common/authentication/mongodb_authentication.py +++ b/polytope_server/common/authentication/mongodb_authentication.py @@ -32,22 +32,17 @@ class MongoAuthentication(authentication.Authentication): def __init__(self, name, realm, config): self.config = config - host = config.get("host", "localhost") - port = config.get("port", "27017") + uri = config.get("uri", "mongodb://localhost:27017") collection = config.get("collection", "users") username = config.get("username") password = config.get("password") - srv = bool(config.get("srv", False)) - tls = bool(config.get("tls", False)) - tlsCAFile = config.get("tlsCAFile", None) - endpoint = "{}:{}".format(host, port) - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(uri, username, password) self.database = self.mongo_client.authentication self.users = self.database[collection] self.storage_metric_collector = MongoStorageMetricCollector( - endpoint, self.mongo_client, "authentication", collection + uri, self.mongo_client, "authentication", collection ) super().__init__(name, realm, config) diff --git a/polytope_server/common/authorization/mongodb_authorization.py b/polytope_server/common/authorization/mongodb_authorization.py index c3a101e..48ac44c 100644 --- a/polytope_server/common/authorization/mongodb_authorization.py +++ b/polytope_server/common/authorization/mongodb_authorization.py @@ -28,24 +28,17 @@ class MongoDBAuthorization(authorization.Authorization): def __init__(self, name, realm, config): self.config = config assert self.config["type"] == "mongodb" - self.host = config.get("host", "localhost") - self.port = config.get("port", "27017") + self.uri = config.get("uri", "mongodb://localhost:27017") self.collection = config.get("collection", "users") username = config.get("username") password = config.get("password") - srv = bool(config.get("srv", False)) - tls = bool(config.get("tls", False)) - tlsCAFile = config.get("tlsCAFile", None) - endpoint = "{}:{}".format(self.host, self.port) - self.mongo_client = mongo_client_factory.create_client( - self.host, self.port, username, password, srv, tls, tlsCAFile - ) + self.mongo_client = mongo_client_factory.create_client(self.uri, username, password) self.database = self.mongo_client.authentication self.users = self.database[self.collection] self.storage_metric_collector = MongoStorageMetricCollector( - endpoint, self.mongo_client, "authentication", self.collection + self.uri, self.mongo_client, "authentication", self.collection ) super().__init__(name, realm, config) diff --git a/polytope_server/common/caching/caching.py b/polytope_server/common/caching/caching.py index 51fe0e1..7cbd983 100644 --- a/polytope_server/common/caching/caching.py +++ b/polytope_server/common/caching/caching.py @@ -195,23 +195,20 @@ def collect_metric_info(self): class MongoDBCaching(Caching): def __init__(self, cache_config): super().__init__(cache_config) - host = cache_config.get("host", "localhost") - port = cache_config.get("port", 27017) + uri = cache_config.get("uri", "mongodb://localhost:27017") + username = cache_config.get("username") password = cache_config.get("password") - srv = bool(cache_config.get("srv", False)) - tls = bool(cache_config.get("tls", False)) - tlsCAFile = cache_config.get("tlsCAFile", None) - endpoint = "{}:{}".format(host, port) + collection = cache_config.get("collection", "cache") - self.client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) + self.client = mongo_client_factory.create_client(uri, username, password,) self.database = self.client.cache self.collection = self.database[collection] self.collection.create_index("expire_at", expireAfterSeconds=0) self.collection.update_one({"_id": "hits"}, {"$setOnInsert": {"n": 0}}, upsert=True) self.collection.update_one({"_id": "misses"}, {"$setOnInsert": {"n": 0}}, upsert=True) - self.storage_metric_collector = MongoStorageMetricCollector(endpoint, self.client, "cache", collection) + self.storage_metric_collector = MongoStorageMetricCollector(uri, self.client, "cache", collection) self.cache_metric_collector = MongoCacheMetricCollector(self.client, "cache", collection) def get_type(self): diff --git a/polytope_server/common/config/schema.yaml b/polytope_server/common/config/schema.yaml index 889c79b..9d7b162 100644 --- a/polytope_server/common/config/schema.yaml +++ b/polytope_server/common/config/schema.yaml @@ -68,7 +68,7 @@ mapping: desc: point to a hosted mongodb type: map mapping: - endpoint: + uri: desc: host and port example: localhost:27017 type: str @@ -116,7 +116,7 @@ mapping: desc: point to a hosted mongodb type: map mapping: - endpoint: + uri: desc: host and port example: localhost:27017 type: str diff --git a/polytope_server/common/identity/mongodb_identity.py b/polytope_server/common/identity/mongodb_identity.py index 413292a..86a67ec 100644 --- a/polytope_server/common/identity/mongodb_identity.py +++ b/polytope_server/common/identity/mongodb_identity.py @@ -28,18 +28,16 @@ class MongoDBIdentity(identity.Identity): def __init__(self, config): self.config = config - self.host = config.get("host", "localhost") - self.port = config.get("port", "27017") + self.uri = config.get("uri", "mongodb://localhost:27017") + self.collection = config.get("collection", "users") username = config.get("username") password = config.get("password") - srv = bool(config.get("srv", False)) - tls = bool(config.get("tls", False)) - tlsCAFile = config.get("tlsCAFile", None) - endpoint = "{}:{}".format(self.host, self.port) self.mongo_client = mongo_client_factory.create_client( - self.host, self.port, username, password, srv, tls, tlsCAFile + self.uri, + username, + password, ) self.database = self.mongo_client.authentication self.users = self.database[self.collection] @@ -53,7 +51,7 @@ def __init__(self, config): pass self.storage_metric_collector = MongoStorageMetricCollector( - endpoint, self.mongo_client, "authentication", self.collection + self.uri, self.mongo_client, "authentication", self.collection ) self.identity_metric_collector = MetricCollector() diff --git a/polytope_server/common/keygenerator/mongodb_keygenerator.py b/polytope_server/common/keygenerator/mongodb_keygenerator.py index 7d0272f..489054e 100644 --- a/polytope_server/common/keygenerator/mongodb_keygenerator.py +++ b/polytope_server/common/keygenerator/mongodb_keygenerator.py @@ -33,23 +33,17 @@ class MongoKeyGenerator(keygenerator.KeyGenerator): def __init__(self, config): self.config = config assert self.config["type"] == "mongodb" - host = config.get("host", "localhost") - port = config.get("port", "27017") + uri = config.get("uri", "mongodb://localhost:27017") collection = config.get("collection", "keys") username = config.get("username") password = config.get("password") - srv = bool(config.get("srv", False)) - tls = bool(config.get("tls", False)) - tlsCAFile = config.get("tlsCAFile", None) - endpoint = "{}:{}".format(host, port) - - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(uri, username, password) self.database = self.mongo_client.keys self.keys = self.database[collection] self.realms = config.get("allowed_realms") - self.storage_metric_collector = MongoStorageMetricCollector(endpoint, self.mongo_client, "keys", collection) + self.storage_metric_collector = MongoStorageMetricCollector(uri, self.mongo_client, "keys", collection) def create_key(self, user: User) -> ApiKey: if user.realm not in self.realms: diff --git a/polytope_server/common/metric_store/mongodb_metric_store.py b/polytope_server/common/metric_store/mongodb_metric_store.py index 15078d6..0dca6d6 100644 --- a/polytope_server/common/metric_store/mongodb_metric_store.py +++ b/polytope_server/common/metric_store/mongodb_metric_store.py @@ -39,19 +39,13 @@ class MongoMetricStore(MetricStore): def __init__(self, config=None): - host = config.get("host", "localhost") - port = config.get("port", "27017") + uri = config.get("uri", "mongodb://localhost:27017") metric_collection = config.get("collection", "metrics") username = config.get("username") password = config.get("password") - srv = bool(config.get("srv", False)) - tls = bool(config.get("tls", False)) - tlsCAFile = config.get("tlsCAFile", None) - endpoint = "{}:{}".format(host, port) - - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(uri, username, password) self.database = self.mongo_client.metric_storeg self.store = self.database[metric_collection] @@ -65,10 +59,10 @@ def __init__(self, config=None): } self.storage_metric_collector = MongoStorageMetricCollector( - endpoint, self.mongo_client, "metric_store", metric_collection + uri, self.mongo_client, "metric_store", metric_collection ) - logging.info("MongoClient configured to open at {}".format(endpoint)) + logging.info("MongoClient configured to open at {}".format(uri)) def get_type(self): return "mongodb" diff --git a/polytope_server/common/mongo_client_factory.py b/polytope_server/common/mongo_client_factory.py index 28a052f..7e67327 100644 --- a/polytope_server/common/mongo_client_factory.py +++ b/polytope_server/common/mongo_client_factory.py @@ -1,27 +1,14 @@ import typing -import urllib.parse import pymongo def create_client( - host: str, - port: str, + uri: str, username: typing.Optional[str] = None, password: typing.Optional[str] = None, - srv: bool = False, - tls: bool = False, - tlsCAFile: typing.Optional[str] = None, ) -> pymongo.MongoClient: - protocol = "mongodb" - if srv: - protocol = "mongodb+srv" - - endpoint = f"{protocol}://{host}:{port}" - if username and password: - encoded_username = urllib.parse.quote_plus(username) - encoded_password = urllib.parse.quote_plus(password) - endpoint = f"{protocol}://{encoded_username}:{encoded_password}@{host}:{port}" - - return pymongo.MongoClient(endpoint, journal=True, connect=False, tls=tls, tlsCAFile=tlsCAFile) + return pymongo.MongoClient(host=uri, journal=True, connect=False, username=username, password=password) + else: + return pymongo.MongoClient(host=uri, journal=True, connect=False) diff --git a/polytope_server/common/request_store/mongodb_request_store.py b/polytope_server/common/request_store/mongodb_request_store.py index 64a2394..1f43561 100644 --- a/polytope_server/common/request_store/mongodb_request_store.py +++ b/polytope_server/common/request_store/mongodb_request_store.py @@ -35,19 +35,12 @@ class MongoRequestStore(request_store.RequestStore): def __init__(self, config=None, metric_store_config=None): - host = config.get("host", "localhost") - port = config.get("port", "27017") + uri = config.get("uri", "mongodb://localhost:27017") request_collection = config.get("collection", "requests") - username = config.get("username") password = config.get("password") - srv = bool(config.get("srv", False)) - tls = bool(config.get("tls", False)) - tlsCAFile = config.get("tlsCAFile", None) - - endpoint = "{}:{}".format(host, port) - self.mongo_client = mongo_client_factory.create_client(host, port, username, password, srv, tls, tlsCAFile) + self.mongo_client = mongo_client_factory.create_client(uri, username, password) self.database = self.mongo_client.request_store self.store = self.database[request_collection] @@ -56,11 +49,11 @@ def __init__(self, config=None, metric_store_config=None): self.metric_store = metric_store.create_metric_store(metric_store_config) self.storage_metric_collector = MongoStorageMetricCollector( - endpoint, self.mongo_client, "request_store", request_collection + uri, self.mongo_client, "request_store", request_collection ) self.request_store_metric_collector = MongoRequestStoreMetricCollector() - logging.info("MongoClient configured to open at {}".format(endpoint)) + logging.info("MongoClient configured to open at {}".format(uri)) def get_type(self): return "mongodb" diff --git a/tests/unit/test_mongo_client_factory.py b/tests/unit/test_mongo_client_factory.py index 8cfdf29..81ffe81 100644 --- a/tests/unit/test_mongo_client_factory.py +++ b/tests/unit/test_mongo_client_factory.py @@ -1,3 +1,4 @@ +import typing from unittest import mock from polytope_server.common import mongo_client_factory @@ -5,55 +6,39 @@ @mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) def test_create_without_credentials(mock_mongo: mock.Mock): - mongo_client_factory.create_client("host", "123", username=None, password=None, tls=False) + mongo_client_factory.create_client("mongodb://host:123") - _verify(mock_mongo, "host:123", False, False) + _verify(mock_mongo, "mongodb://host:123", None, None) @mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) -def test_create_with_srv(mock_mongo: mock.Mock): - mongo_client_factory.create_client("host", "123", username=None, password=None, srv=True, tls=False) +def test_create_without_password_credentials(mock_mongo: mock.Mock): + mongo_client_factory.create_client("mongodb+srv://host:123", username="admin") - _verify(mock_mongo, "host:123", True, False) + _verify(mock_mongo, "mongodb+srv://host:123", None, None) @mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) -def test_create_with_credentials(mock_mongo: mock.Mock): - mongo_client_factory.create_client("host", "123", username="admin", password="admin", tls=False) - - _verify(mock_mongo, "admin:admin@host:123", False, False) - +def test_create_without_username_credentials(mock_mongo: mock.Mock): + mongo_client_factory.create_client("host:123", password="password") -@mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) -def test_create_without_credentials_tls(mock_mongo: mock.Mock): - mongo_client_factory.create_client("host", "123", username=None, password=None, tls=True) - - _verify(mock_mongo, "host:123", False, True) + _verify(mock_mongo, "host:123", None, None) @mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) -def test_create_with_credentials_tls(mock_mongo: mock.Mock): - mongo_client_factory.create_client("host", "123", username="admin", password="admin", tls=True) - - _verify(mock_mongo, "admin:admin@host:123", False, True) - - -@mock.patch("polytope_server.common.mongo_client_factory.pymongo.MongoClient", autospec=True) -def test_create_with_tlsCAfile(mock_mongo: mock.Mock): - mongo_client_factory.create_client( - "host", "123", username="admin", password="admin", tls=True, tlsCAFile="/test/ca.pem" - ) +def test_create_with_credentials(mock_mongo: mock.Mock): + mongo_client_factory.create_client("mongodb+srv://host", username="admin", password="est123123") - _verify(mock_mongo, "admin:admin@host:123", False, True, "/test/ca.pem") + _verify(mock_mongo, "mongodb+srv://host", "admin", "est123123") -def _verify(mock_mongo: mock.Mock, endpoint: str, srv: bool, tls: bool, tlsCAFile=None): +def _verify( + mock_mongo: mock.Mock, endpoint: str, username: typing.Optional[str] = None, password: typing.Optional[str] = None +): mock_mongo.assert_called_once() args, kwargs = mock_mongo.call_args - if srv: - assert args[0] == f"mongodb+srv://{endpoint}" - else: - assert args[0] == f"mongodb://{endpoint}" - - assert kwargs["tls"] == tls - assert kwargs["tlsCAFile"] == tlsCAFile + assert args[0] == endpoint + if username: + assert kwargs["username"] == username + if password: + assert kwargs["password"] == password From 23f71e19c0f056be5e1629279ed2646fb3a4dd3b Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Thu, 23 Nov 2023 13:52:23 +0100 Subject: [PATCH 45/82] Update Metric collection (#16) --- .../metric_collector/queue_metric_collector.py | 15 ++++++++++++--- .../metric_collector/storage_metric_collector.py | 6 +++--- polytope_server/common/queue/sqs_queue.py | 11 +---------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/polytope_server/common/metric_collector/queue_metric_collector.py b/polytope_server/common/metric_collector/queue_metric_collector.py index 0c48650..22827fa 100644 --- a/polytope_server/common/metric_collector/queue_metric_collector.py +++ b/polytope_server/common/metric_collector/queue_metric_collector.py @@ -49,9 +49,18 @@ def total_queued(self): class SQSQueueMetricCollector(QueueMetricCollector): - def __init__(self, host): + def __init__(self, host, client): self.host = host - self.message_counts = None + self.client = client def total_queued(self): - return sum(self.message_counts.values()) + response = self.client.get_queue_attributes( + QueueUrl=self.host, + AttributeNames=[ + "ApproximateNumberOfMessages", + "ApproximateNumberOfMessagesDelayed", + "ApproximateNumberOfMessagesNotVisible", + ], + ) + values = response.get("Attributes", {}).values() + return sum(map(int, values)) diff --git a/polytope_server/common/metric_collector/storage_metric_collector.py b/polytope_server/common/metric_collector/storage_metric_collector.py index e782183..809b592 100644 --- a/polytope_server/common/metric_collector/storage_metric_collector.py +++ b/polytope_server/common/metric_collector/storage_metric_collector.py @@ -107,12 +107,12 @@ def collect(self): def storage_space_used(self): space_used = 0 - for db in self.client.list_database_names(): - space_used += int(getattr(self.client, db).command({"dbStats": 1}).get("storageSize")) + # for db in self.client.list_database_names(): + # space_used += int(getattr(self.client, db).command({"dbStats": 1}).get("storageSize")) return space_used def total_entries(self): - return self.store.count() + return self.store.count_documents({}) def db_name(self): return self.database diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 2892aad..14c3020 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -19,7 +19,7 @@ def __init__(self, config): self.client = boto3.client("sqs", region_name=region) self.queue_url = self.client.get_queue_url(QueueName=queue_name).get("QueueUrl") self.check_connection() - self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url) + self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url, self.client) def enqueue(self, message): self.client.send_message( @@ -78,13 +78,4 @@ def get_type(self): return "sqs" def collect_metric_info(self): - response = self.client.get_queue_attributes( - QueueUrl=self.queue_url, - AttributeNames=[ - "ApproximateNumberOfMessages", - "ApproximateNumberOfMessagesDelayed", - "ApproximateNumberOfMessagesNotVisible", - ], - ) - self.queue_metric_collector.message_counts = response["Attributes"] return self.queue_metric_collector.collect().serialize() From 589b639fea08af32ba8f5678e6a02ba37025cf64 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Fri, 24 Nov 2023 13:51:07 +0100 Subject: [PATCH 46/82] SQS long polling, reduce logging output (#17) --- polytope_server/common/queue/sqs_queue.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 14c3020..55b2855 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -14,9 +14,13 @@ def __init__(self, config): self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) self.message_group_id = config.get("message_group_id", "polytope") - logging.getLogger("sqs").setLevel("WARNING") + + logging.getLogger("sqs").setLevel(logging.WARNING) + logging.getLogger("boto3").setLevel(logging.WARNING) + logging.getLogger("botocore").setLevel(logging.WARNING) self.client = boto3.client("sqs", region_name=region) + self.queue_url = self.client.get_queue_url(QueueName=queue_name).get("QueueUrl") self.check_connection() self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url, self.client) @@ -31,6 +35,7 @@ def dequeue(self): QueueUrl=self.queue_url, VisibilityTimeout=self.visibility_timeout, # If processing takes more seconds, message will be read twice MaxNumberOfMessages=1, + WaitTimeSeconds=20, ) if "Messages" not in response: return None From 0e531ca94c08d8eb95a5f2721ce5079473e107b0 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Mon, 27 Nov 2023 13:34:44 +0100 Subject: [PATCH 47/82] Proxy support for Flask (#18) --- polytope_server/frontend/flask_handler.py | 6 +++++- polytope_server/frontend/frontend.py | 11 +++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/polytope_server/frontend/flask_handler.py b/polytope_server/frontend/flask_handler.py index 86f2024..9e18526 100644 --- a/polytope_server/frontend/flask_handler.py +++ b/polytope_server/frontend/flask_handler.py @@ -29,6 +29,7 @@ from flask import Flask, request from flask_swagger_ui import get_swaggerui_blueprint from werkzeug.exceptions import default_exceptions +from werkzeug.middleware.proxy_fix import ProxyFix from ..common.exceptions import BadRequest, ForbiddenRequest, HTTPException, NotFound from ..version import __version__ @@ -47,9 +48,13 @@ def create_handler( collections, identity, apikeygenerator, + proxy_support: bool, ): handler = Flask(__name__) + if proxy_support: + handler.wsgi_app = ProxyFix(handler.wsgi_app, x_for=1, x_proto=1, x_host=1) + openapi_spec = "static/openapi.yaml" spec_path = pathlib.Path(__file__).parent.absolute() / openapi_spec with spec_path.open("r+", encoding="utf8") as f: @@ -249,7 +254,6 @@ def only_json(): return handler def run_server(self, handler, server_type, host, port): - if server_type == "flask": # flask internal server for non-production environments # should only be used for testing and debugging diff --git a/polytope_server/frontend/frontend.py b/polytope_server/frontend/frontend.py index 2c4d7e1..6481118 100644 --- a/polytope_server/frontend/frontend.py +++ b/polytope_server/frontend/frontend.py @@ -59,7 +59,6 @@ def __init__(self, config): self.port = frontend_config.get("port", "5000") def run(self): - # create instances of authentication, request_store & staging request_store = create_request_store(self.config.get("request_store"), self.config.get("metric_store")) @@ -72,7 +71,15 @@ def run(self): handler_module = importlib.import_module("polytope_server.frontend." + self.handler_type + "_handler") handler_class = getattr(handler_module, self.handler_dict[self.handler_type])() - handler = handler_class.create_handler(request_store, auth, staging, collections, identity, apikeygenerator) + handler = handler_class.create_handler( + request_store, + auth, + staging, + collections, + identity, + apikeygenerator, + self.config.get("frontend", {}).get("proxy_support", False), + ) logging.info("Starting frontend...") handler_class.run_server(handler, self.server_type, self.host, self.port) From 134012daee12b862ef5bd1c5beb7e0125d6b124b Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Thu, 30 Nov 2023 13:52:07 +0100 Subject: [PATCH 48/82] Fix metric-store db name (#19) --- polytope_server/common/metric_store/mongodb_metric_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/metric_store/mongodb_metric_store.py b/polytope_server/common/metric_store/mongodb_metric_store.py index 0dca6d6..b529426 100644 --- a/polytope_server/common/metric_store/mongodb_metric_store.py +++ b/polytope_server/common/metric_store/mongodb_metric_store.py @@ -46,7 +46,7 @@ def __init__(self, config=None): password = config.get("password") self.mongo_client = mongo_client_factory.create_client(uri, username, password) - self.database = self.mongo_client.metric_storeg + self.database = self.mongo_client.metric_store self.store = self.database[metric_collection] self.metric_type_class_map = { From 90593466a00e316d22cfe1d3d0103959d2d0794f Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Tue, 5 Dec 2023 10:43:38 +0100 Subject: [PATCH 49/82] Update MessageGroupId (#21) --- polytope_server/common/queue/sqs_queue.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/queue/sqs_queue.py b/polytope_server/common/queue/sqs_queue.py index 55b2855..dd1a392 100644 --- a/polytope_server/common/queue/sqs_queue.py +++ b/polytope_server/common/queue/sqs_queue.py @@ -1,5 +1,6 @@ import json import logging +from uuid import uuid4 import boto3 @@ -13,7 +14,6 @@ def __init__(self, config): region = config.get("region") self.keep_alive_interval = config.get("keep_alive_interval", 60) self.visibility_timeout = config.get("visibility_timeout", 120) - self.message_group_id = config.get("message_group_id", "polytope") logging.getLogger("sqs").setLevel(logging.WARNING) logging.getLogger("boto3").setLevel(logging.WARNING) @@ -26,8 +26,11 @@ def __init__(self, config): self.queue_metric_collector = SQSQueueMetricCollector(self.queue_url, self.client) def enqueue(self, message): + # Messages need to have different a `MessageGroupId` so that they can be processed in parallel. self.client.send_message( - QueueUrl=self.queue_url, MessageBody=json.dumps(message.body), MessageGroupId=self.message_group_id + QueueUrl=self.queue_url, + MessageBody=json.dumps(message.body), + MessageGroupId=message.body.get("id", uuid4()), ) def dequeue(self): From 260b51475629f1c73526e990c03399aff3538a5a Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Mon, 11 Dec 2023 07:41:17 +0100 Subject: [PATCH 50/82] Measure worker performance (#22) --- polytope_server/common/staging/s3_staging.py | 7 +++++++ polytope_server/worker/worker.py | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index 027c14c..3badb2f 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -29,6 +29,7 @@ import json import logging +import timeit import minio from minio import Minio @@ -71,6 +72,7 @@ def __init__(self, config): ) def create(self, name, data, content_type): + start = timeit.default_timer() url = self.get_url(name) logging.info("Putting to staging: {}".format(name)) @@ -129,6 +131,11 @@ def create(self, name, data, content_type): raise logging.info("Put to {}".format(url)) + end = timeit.default_timer() + delta = end - start + logging.info( + f"PERF_TIME fdb+s3 request_id, elapsed [s], size [bytes], throughput [MiB/s]: {name},{delta:.4f},{total_size},{(total_size/1024/1024/delta):.2f}" + ) return url def read(self, name): diff --git a/polytope_server/worker/worker.py b/polytope_server/worker/worker.py index 5bfac20..22dfd63 100644 --- a/polytope_server/worker/worker.py +++ b/polytope_server/worker/worker.py @@ -23,6 +23,7 @@ import signal import sys import time +import timeit from concurrent.futures import ThreadPoolExecutor import requests @@ -132,7 +133,6 @@ def update_metric(self): self.metric_store.update_metric(self.metric) def run(self): - self.queue = polytope_queue.create_queue(self.config.get("queue")) self.thread_pool = ThreadPoolExecutor(1) @@ -141,7 +141,6 @@ def run(self): self.update_metric() while not time.sleep(self.poll_interval): - self.queue.keep_alive() # No active request: try to pop from queue and process request in future thread @@ -209,8 +208,10 @@ def run(self): self.update_metric() + @perf_time def process_request(self, request): """Entrypoint for the worker thread.""" + start = timeit.default_timer() id = request.id collection = self.collections[request.collection] @@ -261,6 +262,9 @@ def process_request(self, request): else: request.user_message += "Success" + end = timeit.default_timer() + logging.info(f"PERF_TIME worker request_id, elapsed [s]: {request.id},{(end-start):.4f}") + return def fetch_input_data(self, url): From 4a35e3ed974015f25c7c624302236613dbd8efe8 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Mon, 11 Dec 2023 10:52:21 +0100 Subject: [PATCH 51/82] Fix --- polytope_server/worker/worker.py | 1 - 1 file changed, 1 deletion(-) diff --git a/polytope_server/worker/worker.py b/polytope_server/worker/worker.py index 22dfd63..46b5c74 100644 --- a/polytope_server/worker/worker.py +++ b/polytope_server/worker/worker.py @@ -208,7 +208,6 @@ def run(self): self.update_metric() - @perf_time def process_request(self, request): """Entrypoint for the worker thread.""" start = timeit.default_timer() From ecae32b3740eb388e91fbd355f41639a68b70cba Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Fri, 22 Dec 2023 10:25:11 +0100 Subject: [PATCH 52/82] Fix worker on_process_terminated (#23) --- polytope_server/worker/worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/worker/worker.py b/polytope_server/worker/worker.py index 46b5c74..dd116bd 100644 --- a/polytope_server/worker/worker.py +++ b/polytope_server/worker/worker.py @@ -307,7 +307,7 @@ def on_request_fail(self, request, exception): logging.exception("Request failed with exception.", extra={"request_id": request.id}) self.requests_failed += 1 - def on_process_terminated(self): + def on_process_terminated(self, signal, frame): """Called when the worker is asked to exit whilst processing a request, and we want to reschedule the request""" if self.request is not None: From 0983367289a7583239a33a3fadb830a76ecaf1e8 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Fri, 23 Feb 2024 11:22:19 +0100 Subject: [PATCH 53/82] Remove timeit --- polytope_server/common/staging/s3_staging.py | 7 ------- polytope_server/worker/worker.py | 5 ----- 2 files changed, 12 deletions(-) diff --git a/polytope_server/common/staging/s3_staging.py b/polytope_server/common/staging/s3_staging.py index 3badb2f..027c14c 100644 --- a/polytope_server/common/staging/s3_staging.py +++ b/polytope_server/common/staging/s3_staging.py @@ -29,7 +29,6 @@ import json import logging -import timeit import minio from minio import Minio @@ -72,7 +71,6 @@ def __init__(self, config): ) def create(self, name, data, content_type): - start = timeit.default_timer() url = self.get_url(name) logging.info("Putting to staging: {}".format(name)) @@ -131,11 +129,6 @@ def create(self, name, data, content_type): raise logging.info("Put to {}".format(url)) - end = timeit.default_timer() - delta = end - start - logging.info( - f"PERF_TIME fdb+s3 request_id, elapsed [s], size [bytes], throughput [MiB/s]: {name},{delta:.4f},{total_size},{(total_size/1024/1024/delta):.2f}" - ) return url def read(self, name): diff --git a/polytope_server/worker/worker.py b/polytope_server/worker/worker.py index dd116bd..27d5e90 100644 --- a/polytope_server/worker/worker.py +++ b/polytope_server/worker/worker.py @@ -23,7 +23,6 @@ import signal import sys import time -import timeit from concurrent.futures import ThreadPoolExecutor import requests @@ -210,7 +209,6 @@ def run(self): def process_request(self, request): """Entrypoint for the worker thread.""" - start = timeit.default_timer() id = request.id collection = self.collections[request.collection] @@ -261,9 +259,6 @@ def process_request(self, request): else: request.user_message += "Success" - end = timeit.default_timer() - logging.info(f"PERF_TIME worker request_id, elapsed [s]: {request.id},{(end-start):.4f}") - return def fetch_input_data(self, url): From caa8faba5c8156b3e007a459c05fadf4e73be0f1 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Fri, 23 Feb 2024 11:24:24 +0100 Subject: [PATCH 54/82] Reenable metric collection --- .../common/metric_collector/storage_metric_collector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polytope_server/common/metric_collector/storage_metric_collector.py b/polytope_server/common/metric_collector/storage_metric_collector.py index 809b592..49a7348 100644 --- a/polytope_server/common/metric_collector/storage_metric_collector.py +++ b/polytope_server/common/metric_collector/storage_metric_collector.py @@ -107,8 +107,8 @@ def collect(self): def storage_space_used(self): space_used = 0 - # for db in self.client.list_database_names(): - # space_used += int(getattr(self.client, db).command({"dbStats": 1}).get("storageSize")) + for db in self.client.list_database_names(): + space_used += int(getattr(self.client, db).command({"dbStats": 1}).get("storageSize")) return space_used def total_entries(self): From d5e85f01e7d9c470d742fae1b64922528a5b9340 Mon Sep 17 00:00:00 2001 From: Stefan Friedli Date: Fri, 23 Feb 2024 15:40:29 +0100 Subject: [PATCH 55/82] Remove dep pyfdb --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3e15bf4..4a4dab1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,4 +31,3 @@ ldap3==2.7 docker==4.2.0 python-keycloak==0.24.0 boto3==1.28.80 -pyfdb@https://github.com/ecmwf/pyfdb/archive/refs/tags/0.0.3.tar.gz \ No newline at end of file From e780eeb7a2acf9e0bcc606dea4f87eb1563e212e Mon Sep 17 00:00:00 2001 From: majh Date: Mon, 22 Jan 2024 20:34:38 +0000 Subject: [PATCH 56/82] feature extraction datasource --- .../common/datasource/datasource.py | 1 + polytope_server/common/datasource/fdb.py | 1 + .../common/datasource/federated.py | 222 ++++++++++++++ polytope_server/common/datasource/mars.py | 26 +- polytope_server/common/datasource/polytope.py | 290 ++++++++---------- polytope_server/frontend/flask_handler.py | 15 +- 6 files changed, 381 insertions(+), 174 deletions(-) create mode 100644 polytope_server/common/datasource/federated.py diff --git a/polytope_server/common/datasource/datasource.py b/polytope_server/common/datasource/datasource.py index 47ea379..3151097 100644 --- a/polytope_server/common/datasource/datasource.py +++ b/polytope_server/common/datasource/datasource.py @@ -109,6 +109,7 @@ def dispatch(self, request, input_data) -> bool: "mars": "MARSDataSource", "webmars": "WebMARSDataSource", "polytope": "PolytopeDataSource", + "federated": "FederatedDataSource", "echo": "EchoDataSource", "dummy": "DummyDataSource", "raise": "RaiseDataSource", diff --git a/polytope_server/common/datasource/fdb.py b/polytope_server/common/datasource/fdb.py index fb5c09a..03821ea 100644 --- a/polytope_server/common/datasource/fdb.py +++ b/polytope_server/common/datasource/fdb.py @@ -47,6 +47,7 @@ def __init__(self, config): self.check_schema() os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) + os.environ["FDB5_HOME"] = self.config("fdb_home", "/opt/fdb") self.fdb = pyfdb.FDB() if "spaces" in self.fdb_config: diff --git a/polytope_server/common/datasource/federated.py b/polytope_server/common/datasource/federated.py new file mode 100644 index 0000000..2304b7b --- /dev/null +++ b/polytope_server/common/datasource/federated.py @@ -0,0 +1,222 @@ +# +# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. +# + +import hashlib +import logging +import time +from http import HTTPStatus + +import requests + +from . import datasource + + +class FederatedDataSource(datasource.DataSource): + def __init__(self, config): + self.type = config["type"] + assert self.type == "federated" + + self.url = config["url"] + self.port = config.get("port", 443) + self.secret = config["secret"] + self.collection = config["collection"] + self.api_version = config.get("api_version", "v1") + self.result_url = None + self.mime_type_result = "application/octet-stream" + + def get_type(self): + return self.type + + def archive(self, request): + + url = "/".join( + [ + self.url + ":" + str(self.port), + "api", + self.api_version, + "requests", + self.collection, + ] + ) + logging.info("Built URL for request: {}".format(url)) + + body = { + "verb": "archive", + "request": request.user_request, + } + + headers = { + "Authorization": "Federation {}:{}:{}".format(self.secret, request.user.username, request.user.realm) + } + + # Post the initial request + + response = requests.post(url, json=body, headers=headers) + + if response.status_code != HTTPStatus.ACCEPTED: + raise Exception( + "Request could not be POSTed to remote Polytope at {}.\n\ + HTTP error code {}.\n\ + Message: {}".format( + url, response.status_code, response.content + ) + ) + + url = response.headers["location"] + + # Post the data to the upload location + + response = requests.post( + url, + self.input_data, + headers={ + **headers, + "X-Checksum": hashlib.md5(self.input_data).hexdigest(), + }, + ) + + if response.status_code != HTTPStatus.ACCEPTED: + raise Exception( + "Data could not be POSTed for upload to remote Polytope at {}.\n\ + HTTP error code {}.\n\ + Message: {}".format( + url, response.status_code, response.content + ) + ) + + url = response.headers["location"] + time.sleep(int(float(response.headers["retry-after"]))) + + status = HTTPStatus.ACCEPTED + + # Poll until the request fails or returns 200 + while status == HTTPStatus.ACCEPTED: + response = requests.get(url, headers=headers, allow_redirects=False) + status = response.status_code + logging.info(response.json()) + if "location" in response.headers: + url = response.headers["location"] + if "retry-after" in response.headers: + time.sleep(int(float(response.headers["retry-after"]))) + + if status != HTTPStatus.OK: + raise Exception( + "Request failed on remote Polytope at {}.\n\ + HTTP error code {}.\n\ + Message: {}".format( + url, status, response.json()["message"] + ) + ) + + return True + + def retrieve(self, request): + + url = "/".join( + [ + self.url + ":" + str(self.port), + "api", + self.api_version, + "requests", + self.collection, + ] + ) + logging.info("Built URL for request: {}".format(url)) + + body = { + "verb": "retrieve", + "request": request.user_request, + } + + headers = { + "Authorization": "Federation {}:{}:{}".format(self.secret, request.user.username, request.user.realm) + } + + # Post the initial request + + response = requests.post(url, json=body, headers=headers) + + if response.status_code != HTTPStatus.ACCEPTED: + raise Exception( + "Request could not be POSTed to remote Polytope at {}.\n\ + HTTP error code {}.\n\ + Message: {}".format( + url, response.status_code, response.content + ) + ) + + url = response.headers["location"] + time.sleep(int(float(response.headers["retry-after"]))) + + status = HTTPStatus.ACCEPTED + + # Poll until the request fails or returns 303 + while status == HTTPStatus.ACCEPTED: + response = requests.get(url, headers=headers, allow_redirects=False) + status = response.status_code + if "location" in response.headers: + url = response.headers["location"] + if "retry-after" in response.headers: + time.sleep(int(float(response.headers["retry-after"]))) + + if status != HTTPStatus.SEE_OTHER: + raise Exception( + "Request failed on remote Polytope at {}.\n\ + HTTP error code {}.\n\ + Message: {}".format( + url, status, response.json()["message"] + ) + ) + + self.result_url = url + + return True + + def result(self, request): + + response = requests.get(self.result_url, stream=True) + + self.mime_type_result = response.headers["Content-Type"] + + if response.status_code != HTTPStatus.OK: + raise Exception( + "Request could not be downloaded from remote Polytope at {}.\n\ + HTTP error code {}.\n\ + Message: {}".format( + self.result_url, + response.status_code, + response.json()["message"], + ) + ) + + try: + for chunk in response.iter_content(chunk_size=1024): + yield chunk + finally: + response.close() + + def mime_type(self) -> str: + return self.mime_type_result + + def destroy(self, request) -> None: + return + + def match(self, request): + return diff --git a/polytope_server/common/datasource/mars.py b/polytope_server/common/datasource/mars.py index 97600c6..5993bad 100644 --- a/polytope_server/common/datasource/mars.py +++ b/polytope_server/common/datasource/mars.py @@ -59,6 +59,7 @@ def __init__(self, config): yaml.dump(self.mars_config, f) else: self.mars_home = None + self.mars_config = None def get_type(self): return self.type @@ -68,14 +69,26 @@ def match(self, request): r = yaml.safe_load(request.user_request) or {} for k, v in self.match_rules.items(): + + # An empty match rule means that the key must not be present + if v is None or len(v) == 0: + if k in r: + raise Exception("Request containing key '{}' is not allowed".format(k)) + else: + continue # no more checks to do + # Check that all required keys exist - if k not in r: - raise Exception("Request does not contain expected key {}".format(k)) + if k not in r and not (v is None or len(v) == 0): + raise Exception("Request does not contain expected key '{}'".format(k)) + + # Process date rules if k == "date": self.date_check(r["date"], v) continue + # ... and check the value of other keys + v = [v] if isinstance(v, str) else v if r[k] not in v: raise Exception("got {} : {}, but expected one of {}".format(k, r[k], v)) @@ -171,16 +184,13 @@ def make_env(self, request): } if self.mars_config is not None: - env = { - **os.environ, - "MARS_HOME": self.mars_home, - } + env["MARS_HOME"] = self.mars_home logging.info("Accessing MARS on behalf of user {} with token {}".format(mars_user, mars_token)) - except Exception: + except Exception as e: logging.error("MARS request aborted because user does not have associated ECMWF credentials") - raise Exception() + raise e return env diff --git a/polytope_server/common/datasource/polytope.py b/polytope_server/common/datasource/polytope.py index 16200b7..0c98db9 100644 --- a/polytope_server/common/datasource/polytope.py +++ b/polytope_server/common/datasource/polytope.py @@ -18,205 +18,165 @@ # does it submit to any jurisdiction. # -import hashlib +import json import logging -import time -from http import HTTPStatus - -import requests - +import os +import subprocess + +import pygribjump +import polytope +import tempfile +from polytope_mars.api import PolytopeMars +from polytope_mars.api import features +from pathlib import Path +import yaml + +from ..caching import cache from . import datasource class PolytopeDataSource(datasource.DataSource): def __init__(self, config): + self.config = config self.type = config["type"] assert self.type == "polytope" + self.match_rules = config.get("match", {}) + self.patch_rules = config.get("patch", {}) + self.output = None + + # still need to set up fdb + self.fdb_config = self.config["fdb-config"] + + self.check_schema() + + # os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) + # os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb-gribjump") + os.environ["GRIBJUMP_HOME"] = "/opt/fdb/gribjump" + + # if "spaces" in self.fdb_config: + # for space in self.fdb_config["spaces"]: + # for root in space["roots"]: + # os.makedirs(root["path"], exist_ok=True) + + # Set up gribjump + self.gribjump_config = self.config["gribjump-config"] + os.makedirs("/home/polytope/gribjump/", exist_ok=True) + with open("/home/polytope/gribjump/config.yaml", "w") as f: + json.dump(self.gribjump_config, f) + os.environ["GRIBJUMP_CONFIG_FILE"] = "/home/polytope/gribjump/config.yaml" + self.gj = pygribjump.GribJump() + + # Set up polytope feature extraction library + self.polytope_options = { + "values": { + "mapper": {"type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]} + }, + "date": {"merge": {"with": "time", "linkers": ["T", "00"]}}, + "step": {"type_change": "int"}, + } - self.url = config["url"] - self.port = config.get("port", 443) - self.secret = config["secret"] - self.collection = config["collection"] - self.api_version = config.get("api_version", "v1") - self.result_url = None - self.mime_type_result = "application/octet-stream" + logging.info("Set up gribjump") - def get_type(self): - return self.type - def archive(self, request): + #todo: remove when we no longer need to set up a valid fdb to use gribjump + def check_schema(self): - url = "/".join( - [ - self.url + ":" + str(self.port), - "api", - self.api_version, - "requests", - self.collection, - ] - ) - logging.info("Built URL for request: {}".format(url)) + schema = self.fdb_config.get("schema", None) - body = { - "verb": "archive", - "request": request.user_request, - } + # If schema is empty, leave it empty + if schema is None: + return - headers = { - "Authorization": "Federation {}:{}:{}".format(self.secret, request.user.username, request.user.realm) - } + # If schema is just a string, then it must be a path already + if isinstance(self.fdb_config["schema"], str): + return - # Post the initial request + # pull schema from git + if "git" in schema: - response = requests.post(url, json=body, headers=headers) + git_config = schema["git"] + git_path = Path(git_config["path"]) - if response.status_code != HTTPStatus.ACCEPTED: - raise Exception( - "Request could not be POSTed to remote Polytope at {}.\n\ - HTTP error code {}.\n\ - Message: {}".format( - url, response.status_code, response.content - ) + local_path = ( + Path(tempfile.gettempdir()) + .joinpath(git_config["remote"].replace(":", "")) + .joinpath(git_config["branch"]) + .joinpath(git_path) ) - url = response.headers["location"] + Path(local_path.parent).mkdir(parents=True, exist_ok=True) - # Post the data to the upload location - - response = requests.post( - url, - self.input_data, - headers={ - **headers, - "X-Checksum": hashlib.md5(self.input_data).hexdigest(), - }, - ) - - if response.status_code != HTTPStatus.ACCEPTED: - raise Exception( - "Data could not be POSTed for upload to remote Polytope at {}.\n\ - HTTP error code {}.\n\ - Message: {}".format( - url, response.status_code, response.content + with open(local_path, "w+") as f: + f.write( + self.git_download_schema( + git_config["remote"], + git_config["branch"], + git_path.parent, + git_path.name, + ) ) - ) - url = response.headers["location"] - time.sleep(int(float(response.headers["retry-after"]))) - - status = HTTPStatus.ACCEPTED - - # Poll until the request fails or returns 200 - while status == HTTPStatus.ACCEPTED: - response = requests.get(url, headers=headers, allow_redirects=False) - status = response.status_code - logging.info(response.json()) - if "location" in response.headers: - url = response.headers["location"] - if "retry-after" in response.headers: - time.sleep(int(float(response.headers["retry-after"]))) - - if status != HTTPStatus.OK: - raise Exception( - "Request failed on remote Polytope at {}.\n\ - HTTP error code {}.\n\ - Message: {}".format( - url, status, response.json()["message"] - ) - ) - - return True - - def retrieve(self, request): + self.fdb_config["schema"] = str(local_path) - url = "/".join( - [ - self.url + ":" + str(self.port), - "api", - self.api_version, - "requests", - self.collection, - ] + @cache(lifetime=500) + def git_download_schema(self, remote, branch, git_dir, git_file): + call = "git archive --remote {} {}:{} {} | tar -xO {}".format( + remote, branch, str(git_dir), str(git_file), str(git_file) ) - logging.info("Built URL for request: {}".format(url)) - - body = { - "verb": "retrieve", - "request": request.user_request, - } - - headers = { - "Authorization": "Federation {}:{}:{}".format(self.secret, request.user.username, request.user.realm) - } - - # Post the initial request - - response = requests.post(url, json=body, headers=headers) - - if response.status_code != HTTPStatus.ACCEPTED: - raise Exception( - "Request could not be POSTed to remote Polytope at {}.\n\ - HTTP error code {}.\n\ - Message: {}".format( - url, response.status_code, response.content - ) - ) - - url = response.headers["location"] - time.sleep(int(float(response.headers["retry-after"]))) - - status = HTTPStatus.ACCEPTED - - # Poll until the request fails or returns 303 - while status == HTTPStatus.ACCEPTED: - response = requests.get(url, headers=headers, allow_redirects=False) - status = response.status_code - if "location" in response.headers: - url = response.headers["location"] - if "retry-after" in response.headers: - time.sleep(int(float(response.headers["retry-after"]))) - - if status != HTTPStatus.SEE_OTHER: - raise Exception( - "Request failed on remote Polytope at {}.\n\ - HTTP error code {}.\n\ - Message: {}".format( - url, status, response.json()["message"] - ) - ) + logging.debug("Fetching FDB schema from git with call: {}".format(call)) + output = subprocess.check_output(call, shell=True) + return output.decode("utf-8") + + def get_type(self): + return self.type - self.result_url = url + def archive(self, request): + raise NotImplementedError() + def retrieve(self, request): + r = yaml.safe_load(request.user_request) + logging.info(r) + + # We take the static config from the match rules of the datasource + self.polytope_config = {} + for k, v in self.match_rules.items(): + self.polytope_config[k] = isinstance(v, list) and v[0] or v + + logging.info(self.polytope_config) + logging.info(self.polytope_options) + p = PolytopeMars(self.polytope_config, self.polytope_options) + logging.info(p) + + self.output = p.extract(r) + self.output = json.dumps(self.output).encode("utf-8") + logging.info(self.output) return True def result(self, request): + logging.info("Getting result") + yield self.output - response = requests.get(self.result_url, stream=True) + def match(self, request): - self.mime_type_result = response.headers["Content-Type"] + r = yaml.safe_load(request.user_request) or {} - if response.status_code != HTTPStatus.OK: - raise Exception( - "Request could not be downloaded from remote Polytope at {}.\n\ - HTTP error code {}.\n\ - Message: {}".format( - self.result_url, - response.status_code, - response.json()["message"], - ) - ) + for k, v in self.match_rules.items(): + # Check that all required keys exist + if k not in r: + raise Exception("Request does not contain expected key {}".format(k)) - try: - for chunk in response.iter_content(chunk_size=1024): - yield chunk - finally: - response.close() + # ... and check the value of other keys + v = [v] if isinstance(v, str) else v - def mime_type(self) -> str: - return self.mime_type_result + if r[k] not in v: + raise Exception("got {} : {}, but expected one of {}".format(k, r[k], v)) + + # Finally check that there is a feature specified in the request + if "feature" not in r: + raise Exception("Request does not contain expected key 'feature'") def destroy(self, request) -> None: - return + pass - def match(self, request): - return + def mime_type(self) -> str: + return "application/prs.coverage+json" diff --git a/polytope_server/frontend/flask_handler.py b/polytope_server/frontend/flask_handler.py index 9e18526..d5dd9e5 100644 --- a/polytope_server/frontend/flask_handler.py +++ b/polytope_server/frontend/flask_handler.py @@ -223,7 +223,7 @@ def uploads(request_id): @handler.route("/api/v1/collections", methods=["GET"]) def list_collections(): auth_header = get_auth_header(request) - authorized_collections = [] + collection_config = authorized_collections = [] for name, collection in collections.items(): try: if auth.can_access_collection(auth_header, collection): @@ -231,6 +231,19 @@ def list_collections(): except ForbiddenRequest: pass return RequestSucceeded(authorized_collections) + + # New handler + # @handler.route("/api/v1/collection/", methods=["GET"]) + # def describe_collection(collection): + # auth_header = get_auth_header(request) + # authorized_collections = [] + # for name, collection in collections.items(): + # try: + # if auth.can_access_collection(auth_header, collection): + # authorized_collections.append(name) + # except ForbiddenRequest: + # pass + # return RequestSucceeded(authorized_collections) @handler.after_request def add_header(response: flask.Response): From 766039296104767f00a6e64a27a6547f6031ab4d Mon Sep 17 00:00:00 2001 From: majh Date: Wed, 24 Jan 2024 00:15:45 +0000 Subject: [PATCH 57/82] change import to after os.environ --- polytope_server/common/datasource/polytope.py | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/polytope_server/common/datasource/polytope.py b/polytope_server/common/datasource/polytope.py index 0c98db9..7ff520e 100644 --- a/polytope_server/common/datasource/polytope.py +++ b/polytope_server/common/datasource/polytope.py @@ -23,11 +23,10 @@ import os import subprocess -import pygribjump +os.environ["GRIBJUMP_HOME"] = "/opt/fdb-gribjump" + import polytope import tempfile -from polytope_mars.api import PolytopeMars -from polytope_mars.api import features from pathlib import Path import yaml @@ -47,16 +46,19 @@ def __init__(self, config): # still need to set up fdb self.fdb_config = self.config["fdb-config"] + self.non_sliceable = self.config.get("non-sliceable", None) + assert self.non_sliceable is not None + self.check_schema() - # os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) - # os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb-gribjump") - os.environ["GRIBJUMP_HOME"] = "/opt/fdb/gribjump" + os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) + os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb-gribjump") + # forced change - # if "spaces" in self.fdb_config: - # for space in self.fdb_config["spaces"]: - # for root in space["roots"]: - # os.makedirs(root["path"], exist_ok=True) + if "spaces" in self.fdb_config: + for space in self.fdb_config["spaces"]: + for root in space["roots"]: + os.makedirs(root["path"], exist_ok=True) # Set up gribjump self.gribjump_config = self.config["gribjump-config"] @@ -64,7 +66,7 @@ def __init__(self, config): with open("/home/polytope/gribjump/config.yaml", "w") as f: json.dump(self.gribjump_config, f) os.environ["GRIBJUMP_CONFIG_FILE"] = "/home/polytope/gribjump/config.yaml" - self.gj = pygribjump.GribJump() + # self.gj = pygribjump.GribJump() # Set up polytope feature extraction library self.polytope_options = { @@ -73,6 +75,7 @@ def __init__(self, config): }, "date": {"merge": {"with": "time", "linkers": ["T", "00"]}}, "step": {"type_change": "int"}, + "number": {"type_change": "int"}, } logging.info("Set up gribjump") @@ -118,7 +121,7 @@ def check_schema(self): self.fdb_config["schema"] = str(local_path) - @cache(lifetime=500) + @cache(lifetime=5000000) def git_download_schema(self, remote, branch, git_dir, git_file): call = "git archive --remote {} {}:{} {} | tar -xO {}".format( remote, branch, str(git_dir), str(git_file), str(git_file) @@ -139,13 +142,15 @@ def retrieve(self, request): # We take the static config from the match rules of the datasource self.polytope_config = {} - for k, v in self.match_rules.items(): - self.polytope_config[k] = isinstance(v, list) and v[0] or v + for k in self.non_sliceable: + self.polytope_config[k] = r[k] + + assert len(self.polytope_config) > 0 logging.info(self.polytope_config) logging.info(self.polytope_options) + from polytope_mars.api import PolytopeMars p = PolytopeMars(self.polytope_config, self.polytope_options) - logging.info(p) self.output = p.extract(r) self.output = json.dumps(self.output).encode("utf-8") From f0f46a8a6958fb25de40ac24e66988727e1ae551 Mon Sep 17 00:00:00 2001 From: majh Date: Fri, 26 Jan 2024 10:07:13 +0000 Subject: [PATCH 58/82] remove logging --- polytope_server/common/datasource/polytope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/datasource/polytope.py b/polytope_server/common/datasource/polytope.py index 7ff520e..59b060b 100644 --- a/polytope_server/common/datasource/polytope.py +++ b/polytope_server/common/datasource/polytope.py @@ -154,7 +154,7 @@ def retrieve(self, request): self.output = p.extract(r) self.output = json.dumps(self.output).encode("utf-8") - logging.info(self.output) + # logging.info(self.output) return True def result(self, request): From 1cc199b00d5e5104c30ecea3ecfcee9e3423e46b Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 5 Mar 2024 14:17:11 +0000 Subject: [PATCH 59/82] fixes for MARS access protocols --- polytope_server/common/datasource/mars.py | 14 ++++++++++++++ polytope_server/common/datasource/polytope.py | 1 + 2 files changed, 15 insertions(+) diff --git a/polytope_server/common/datasource/mars.py b/polytope_server/common/datasource/mars.py index 5993bad..f949fa0 100644 --- a/polytope_server/common/datasource/mars.py +++ b/polytope_server/common/datasource/mars.py @@ -50,9 +50,23 @@ def __init__(self, config): self.mars_binary = config.get("binary", "mars") + self.protocol = config.get("protocol", "dhs") + + if self.protocol == "remote": + # need to set FDB5 config in a /etc/fdb/config.yaml + self.fdb_config = config.get("fdb_config", {}) + self.fdb_home = self.tmp_dir + "/fdb-home" + os.makedirs(self.fdb_home + "/etc/fdb/", exist_ok=True) + with open(self.fdb_home + "/etc/fdb/config.yaml", "w") as f: + yaml.dump(self.fdb_config, f) + # Write the mars config if "config" in config: self.mars_config = config.get("config", {}) + + if self.protocol == "remote": + self.mars_config[0]["home"] = self.fdb_home + self.mars_home = self.tmp_dir + "/mars-home" os.makedirs(self.mars_home + "/etc/mars-client/", exist_ok=True) with open(self.mars_home + "/etc/mars-client/databases.yaml", "w") as f: diff --git a/polytope_server/common/datasource/polytope.py b/polytope_server/common/datasource/polytope.py index 59b060b..ff21fc2 100644 --- a/polytope_server/common/datasource/polytope.py +++ b/polytope_server/common/datasource/polytope.py @@ -76,6 +76,7 @@ def __init__(self, config): "date": {"merge": {"with": "time", "linkers": ["T", "00"]}}, "step": {"type_change": "int"}, "number": {"type_change": "int"}, + "longitude" : {"cyclic": [0, 360]}, } logging.info("Set up gribjump") From bcb340ed3da20643c3a4f4099acc711556c2066f Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 5 Mar 2024 14:17:33 +0000 Subject: [PATCH 60/82] prototype --- .../polytope-admin/polytope_admin/api/Auth.py | 10 +- .../keycloak_bearer_authentication.py | 123 ++++++++++++++++++ 2 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 polytope_server/common/authentication/keycloak_bearer_authentication.py diff --git a/admin/polytope-admin/polytope_admin/api/Auth.py b/admin/polytope-admin/polytope_admin/api/Auth.py index 7a6cd40..b329189 100644 --- a/admin/polytope-admin/polytope_admin/api/Auth.py +++ b/admin/polytope-admin/polytope_admin/api/Auth.py @@ -150,12 +150,12 @@ def fetch_key(self, login=True): email = self.read_email self._logger.info("Polytope user key found in session cache for user " + config["username"]) else: - key_file = Path(config["key_path"]) / config["username"] + key_file = Path(config["key_path"]) try: with open(str(key_file), "r") as infile: info = json.load(infile) - key = info["key"] - email = info["email"] + key = info["user_key"] + email = info["user_email"] except FileNotFoundError: key = None email = None @@ -190,7 +190,7 @@ def persist(self, key, email, username=None): if not username: username = config["username"] os.makedirs(config["key_path"], exist_ok=True) - key_file = Path(config["key_path"]) / username + key_file = Path(config["key_path"]) with open(str(key_file), "w", encoding="utf8") as outfile: json.dump({"key": key, "email": email}, outfile) self.read_key = key @@ -214,7 +214,7 @@ def erase(self, username=None): config = self.config.get() if not username: username = config["username"] - key_path = Path(config["key_path"]) / username + key_path = Path(config["key_path"]) try: os.remove(str(key_path)) self._logger.info("Credentials removed for " + username) diff --git a/polytope_server/common/authentication/keycloak_bearer_authentication.py b/polytope_server/common/authentication/keycloak_bearer_authentication.py new file mode 100644 index 0000000..8f26284 --- /dev/null +++ b/polytope_server/common/authentication/keycloak_bearer_authentication.py @@ -0,0 +1,123 @@ +# +# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. +# + +import base64 +import logging +import os + +from keycloak import KeycloakOpenID +from keycloak.exceptions import KeycloakConnectionError + +from ..auth import User +from ..caching import cache +from ..exceptions import ForbiddenRequest +from . import authentication + + +class KeycloakBearerAuthentication(authentication.Authentication): + def __init__(self, name, realm, config): + self.config = config + + # URL of the keycloak API: e.g. https://keycloak.insitute.org/auth/" + self.url = config["url"] + + # Keycloak client id and secret + self.client_id = config["client_id"] # e.g. polytope + self.client_secret = config["client_secret"] + + # The keycloak realm to look for users + self.keycloak_realm = config["keycloak_realm"] + + self.skipTLS = config.get("skip_tls", False) + + # Connection parameters + self.timeout = config.get("timeout", 3) + + # Mapping user attributes to keycloak attributes + self.attribute_map = config.get("attributes", {}) + + super().__init__(name, realm, config) + + def authentication_type(self): + return "Basic" + + def authentication_info(self): + return "Authenticate with Keycloak username and password" + + @cache(lifetime=120) + def authenticate(self, credentials: str) -> User: + + # credentials should be of the form 'base64(:)' + try: + decoded = base64.b64decode(credentials).decode("utf-8") + auth_user, auth_password = decoded.split(":", 1) + except UnicodeDecodeError: + raise ForbiddenRequest("Credentials could not be decoded") + except ValueError: + raise ForbiddenRequest("Credentials could not be unpacked") + + _environ = dict(os.environ) + try: + os.environ["http_proxy"] = os.getenv("POLYTOPE_PROXY", "") + os.environ["https_proxy"] = os.getenv("POLYTOPE_PROXY", "") + + logging.debug("Setting HTTPS_PROXY to {}".format(os.environ["https_proxy"])) + + try: + + # Open a session as a registered client + client = KeycloakOpenID( + server_url=self.url, + client_id=self.client_id, + realm_name=self.keycloak_realm, + client_secret_key=self.client_secret, + verify=(self.skipTLS is False), + ) + + client.connection.timeout = self.timeout + + # Obtain a session token on behalf of the user + token = client.token(auth_user, auth_password) + + except KeycloakConnectionError: + # Raise ForbiddenRequest rather than ServerError so that we are not blocked if Keycloak is down + raise ForbiddenRequest("Could not connect to Keycloak") + except Exception: + raise ForbiddenRequest("Invalid Keycloak credentials") + + userinfo = client.userinfo(token["access_token"]) + + user = User(auth_user, self.realm()) + + logging.debug("Found user {} in keycloak".format(auth_user)) + + for k, v in self.attribute_map.items(): + if v in userinfo: + user.attributes[k] = userinfo[v] + logging.debug("User {} has attribute {} : {}".format(user.username, k, user.attributes[k])) + + return user + + finally: + os.environ.clear() + os.environ.update(_environ) + + def collect_metric_info(self): + return {} From 79758608cc67893b84e5e54b505c727dd4d95228 Mon Sep 17 00:00:00 2001 From: majh Date: Thu, 14 Mar 2024 14:28:26 +0000 Subject: [PATCH 61/82] add jwt token authentication --- .../common/authentication/authentication.py | 1 + .../jwt_bearer_authentication.py | 65 +++++++++ .../keycloak_bearer_authentication.py | 123 ------------------ 3 files changed, 66 insertions(+), 123 deletions(-) create mode 100644 polytope_server/common/authentication/jwt_bearer_authentication.py delete mode 100644 polytope_server/common/authentication/keycloak_bearer_authentication.py diff --git a/polytope_server/common/authentication/authentication.py b/polytope_server/common/authentication/authentication.py index 6602740..51546dd 100644 --- a/polytope_server/common/authentication/authentication.py +++ b/polytope_server/common/authentication/authentication.py @@ -77,6 +77,7 @@ def name(self) -> str: "plain": "PlainAuthentication", "keycloak": "KeycloakAuthentication", "federation": "FederationAuthentication", + "jwt" : "JWTBearerAuthentication", } diff --git a/polytope_server/common/authentication/jwt_bearer_authentication.py b/polytope_server/common/authentication/jwt_bearer_authentication.py new file mode 100644 index 0000000..e20e3a6 --- /dev/null +++ b/polytope_server/common/authentication/jwt_bearer_authentication.py @@ -0,0 +1,65 @@ +# +# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. +# + +import logging +import os +import requests +from jose import jwt + +from ..auth import User +from ..caching import cache +from . import authentication + + +class JWTBearerAuthentication(authentication.Authentication): + def __init__(self, name, realm, config): + self.config = config + + self.certs_url = config["cert_url"] + + super().__init__(name, realm, config) + + def authentication_type(self): + return "Bearer" + + def authentication_info(self): + return "Authenticate with JWT token" + + @cache(lifetime=120) + def get_certs(self): + return requests.get(self.certs_url).json() + + @cache(lifetime=120) + def authenticate(self, credentials: str) -> User: + certs = self.get_certs() + decoded_token = jwt.decode(token=credentials, + algorithms=jwt.get_unverified_header(credentials).get('alg'), + key=certs + ) + + user = User(decoded_token["sub"], self.realm()) + + logging.debug("Found user {} from decoded JWT".format(user)) + + return user + + + def collect_metric_info(self): + return {} diff --git a/polytope_server/common/authentication/keycloak_bearer_authentication.py b/polytope_server/common/authentication/keycloak_bearer_authentication.py deleted file mode 100644 index 8f26284..0000000 --- a/polytope_server/common/authentication/keycloak_bearer_authentication.py +++ /dev/null @@ -1,123 +0,0 @@ -# -# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation nor -# does it submit to any jurisdiction. -# - -import base64 -import logging -import os - -from keycloak import KeycloakOpenID -from keycloak.exceptions import KeycloakConnectionError - -from ..auth import User -from ..caching import cache -from ..exceptions import ForbiddenRequest -from . import authentication - - -class KeycloakBearerAuthentication(authentication.Authentication): - def __init__(self, name, realm, config): - self.config = config - - # URL of the keycloak API: e.g. https://keycloak.insitute.org/auth/" - self.url = config["url"] - - # Keycloak client id and secret - self.client_id = config["client_id"] # e.g. polytope - self.client_secret = config["client_secret"] - - # The keycloak realm to look for users - self.keycloak_realm = config["keycloak_realm"] - - self.skipTLS = config.get("skip_tls", False) - - # Connection parameters - self.timeout = config.get("timeout", 3) - - # Mapping user attributes to keycloak attributes - self.attribute_map = config.get("attributes", {}) - - super().__init__(name, realm, config) - - def authentication_type(self): - return "Basic" - - def authentication_info(self): - return "Authenticate with Keycloak username and password" - - @cache(lifetime=120) - def authenticate(self, credentials: str) -> User: - - # credentials should be of the form 'base64(:)' - try: - decoded = base64.b64decode(credentials).decode("utf-8") - auth_user, auth_password = decoded.split(":", 1) - except UnicodeDecodeError: - raise ForbiddenRequest("Credentials could not be decoded") - except ValueError: - raise ForbiddenRequest("Credentials could not be unpacked") - - _environ = dict(os.environ) - try: - os.environ["http_proxy"] = os.getenv("POLYTOPE_PROXY", "") - os.environ["https_proxy"] = os.getenv("POLYTOPE_PROXY", "") - - logging.debug("Setting HTTPS_PROXY to {}".format(os.environ["https_proxy"])) - - try: - - # Open a session as a registered client - client = KeycloakOpenID( - server_url=self.url, - client_id=self.client_id, - realm_name=self.keycloak_realm, - client_secret_key=self.client_secret, - verify=(self.skipTLS is False), - ) - - client.connection.timeout = self.timeout - - # Obtain a session token on behalf of the user - token = client.token(auth_user, auth_password) - - except KeycloakConnectionError: - # Raise ForbiddenRequest rather than ServerError so that we are not blocked if Keycloak is down - raise ForbiddenRequest("Could not connect to Keycloak") - except Exception: - raise ForbiddenRequest("Invalid Keycloak credentials") - - userinfo = client.userinfo(token["access_token"]) - - user = User(auth_user, self.realm()) - - logging.debug("Found user {} in keycloak".format(auth_user)) - - for k, v in self.attribute_map.items(): - if v in userinfo: - user.attributes[k] = userinfo[v] - logging.debug("User {} has attribute {} : {}".format(user.username, k, user.attributes[k])) - - return user - - finally: - os.environ.clear() - os.environ.update(_environ) - - def collect_metric_info(self): - return {} From 214a36d5a69901c631ba3423a504dd4e4b9b1d3e Mon Sep 17 00:00:00 2001 From: majh Date: Fri, 15 Mar 2024 09:34:15 +0000 Subject: [PATCH 62/82] fix typo --- polytope_server/common/datasource/fdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/datasource/fdb.py b/polytope_server/common/datasource/fdb.py index 03821ea..609f923 100644 --- a/polytope_server/common/datasource/fdb.py +++ b/polytope_server/common/datasource/fdb.py @@ -47,7 +47,7 @@ def __init__(self, config): self.check_schema() os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) - os.environ["FDB5_HOME"] = self.config("fdb_home", "/opt/fdb") + os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb") self.fdb = pyfdb.FDB() if "spaces" in self.fdb_config: From 7e481eadc56f9ead49d517ed20511b7340fcc74e Mon Sep 17 00:00:00 2001 From: majh Date: Fri, 15 Mar 2024 09:35:31 +0000 Subject: [PATCH 63/82] add JWT token authentication --- .../common/authentication/authentication.py | 2 +- ...uthentication.py => jwt_authentication.py} | 22 ++++++++++++------- polytope_server/common/datasource/mars.py | 4 ++-- 3 files changed, 17 insertions(+), 11 deletions(-) rename polytope_server/common/authentication/{jwt_bearer_authentication.py => jwt_authentication.py} (70%) diff --git a/polytope_server/common/authentication/authentication.py b/polytope_server/common/authentication/authentication.py index 51546dd..67a589b 100644 --- a/polytope_server/common/authentication/authentication.py +++ b/polytope_server/common/authentication/authentication.py @@ -77,7 +77,7 @@ def name(self) -> str: "plain": "PlainAuthentication", "keycloak": "KeycloakAuthentication", "federation": "FederationAuthentication", - "jwt" : "JWTBearerAuthentication", + "jwt" : "JWTAuthentication", } diff --git a/polytope_server/common/authentication/jwt_bearer_authentication.py b/polytope_server/common/authentication/jwt_authentication.py similarity index 70% rename from polytope_server/common/authentication/jwt_bearer_authentication.py rename to polytope_server/common/authentication/jwt_authentication.py index e20e3a6..47e49bb 100644 --- a/polytope_server/common/authentication/jwt_bearer_authentication.py +++ b/polytope_server/common/authentication/jwt_authentication.py @@ -26,9 +26,10 @@ from ..auth import User from ..caching import cache from . import authentication +from ..exceptions import ForbiddenRequest -class JWTBearerAuthentication(authentication.Authentication): +class JWTAuthentication(authentication.Authentication): def __init__(self, name, realm, config): self.config = config @@ -48,16 +49,21 @@ def get_certs(self): @cache(lifetime=120) def authenticate(self, credentials: str) -> User: - certs = self.get_certs() - decoded_token = jwt.decode(token=credentials, - algorithms=jwt.get_unverified_header(credentials).get('alg'), - key=certs - ) - user = User(decoded_token["sub"], self.realm()) + try: + certs = self.get_certs() + decoded_token = jwt.decode(token=credentials, + algorithms=jwt.get_unverified_header(credentials).get('alg'), + key=certs + ) - logging.debug("Found user {} from decoded JWT".format(user)) + user = User(decoded_token["sub"], self.realm()) + logging.info("Found user {} from decoded JWT".format(user)) + except Exception as e: + logging.info("Failed to authenticate user from JWT") + logging.info(e) + raise ForbiddenRequest("Credentials could not be unpacked") return user diff --git a/polytope_server/common/datasource/mars.py b/polytope_server/common/datasource/mars.py index f949fa0..523c762 100644 --- a/polytope_server/common/datasource/mars.py +++ b/polytope_server/common/datasource/mars.py @@ -182,13 +182,13 @@ def make_env(self, request): logging.info("Overriding MARS_USER_EMAIL with {}".format(self.override_mars_email)) mars_user = self.override_mars_email else: - mars_user = request.user.attributes["ecmwf-email"] + mars_user = request.user.attributes.get("ecmwf-email", "no-email") if self.override_mars_apikey: logging.info("Overriding MARS_USER_TOKEN with {}".format(self.override_mars_apikey)) mars_token = self.override_mars_apikey else: - mars_token = request.user.attributes["ecmwf-apikey"] + mars_token = request.user.attributes.get("ecmwf-apikey", "no-api-key") env = { **os.environ, From abf6f92715fc9e9b29265680a8a0bc39fbae2d5e Mon Sep 17 00:00:00 2001 From: majh Date: Fri, 5 Apr 2024 19:35:46 +0100 Subject: [PATCH 64/82] fix fdb config for fdb remote --- polytope_server/common/datasource/mars.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/polytope_server/common/datasource/mars.py b/polytope_server/common/datasource/mars.py index 523c762..03172da 100644 --- a/polytope_server/common/datasource/mars.py +++ b/polytope_server/common/datasource/mars.py @@ -52,13 +52,14 @@ def __init__(self, config): self.protocol = config.get("protocol", "dhs") + # self.fdb_config = None + self.fdb_config = config.get("fdb_config", [{}]) if self.protocol == "remote": # need to set FDB5 config in a /etc/fdb/config.yaml - self.fdb_config = config.get("fdb_config", {}) self.fdb_home = self.tmp_dir + "/fdb-home" - os.makedirs(self.fdb_home + "/etc/fdb/", exist_ok=True) - with open(self.fdb_home + "/etc/fdb/config.yaml", "w") as f: - yaml.dump(self.fdb_config, f) + # os.makedirs(self.fdb_home + "/etc/fdb/", exist_ok=True) + # with open(self.fdb_home + "/etc/fdb/config.yaml", "w") as f: + # yaml.dump(self.fdb_config, f) # Write the mars config if "config" in config: @@ -194,7 +195,8 @@ def make_env(self, request): **os.environ, "MARS_USER_EMAIL": mars_user, "MARS_USER_TOKEN": mars_token, - "ECMWF_MARS_COMMAND": self.mars_binary + "ECMWF_MARS_COMMAND": self.mars_binary, + "FDB5_CONFIG" : yaml.dump(self.fdb_config[0]) } if self.mars_config is not None: From ca1d6c00ea9ff528189b7b50d063ffdfcdccecb2 Mon Sep 17 00:00:00 2001 From: majh Date: Fri, 5 Apr 2024 19:36:32 +0100 Subject: [PATCH 65/82] Fix data-not-found unexpected exit --- .../garbage-collector/garbage_collector.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/polytope_server/garbage-collector/garbage_collector.py b/polytope_server/garbage-collector/garbage_collector.py index 822ff7b..877dec1 100644 --- a/polytope_server/garbage-collector/garbage_collector.py +++ b/polytope_server/garbage-collector/garbage_collector.py @@ -86,7 +86,11 @@ def remove_dangling_data(self): request = self.request_store.get_request(id=data.name) if request is None: logging.info("Deleting {} because it has no matching request.".format(data.name)) - self.staging.delete(data.name) + try: + self.staging.delete(data.name) + except KeyError: + # TODO: why does this happen? + logging.info("Data {} not found in staging.".format(data.name)) def remove_by_size(self): """Cleans data according to size limits of the staging, removing older requests first.""" @@ -122,7 +126,10 @@ def remove_by_size(self): # Delete objects in ascending last_modified order (oldest first) for name, v in sorted(all_objects_by_age.items(), key=lambda x: x[1]["last_modified"]): logging.info("Deleting {} because threshold reached and it is the oldest request.".format(name)) - self.staging.delete(name) + try: + self.staging.delete(name) + except KeyError: + logging.info("Data {} not found in staging.".format(name)) self.request_store.remove_request(name) total_size -= v["size"] logging.info("Size of staging is {}/{}".format(total_size, self.threshold)) From af84dbd5e5c1dc1680319f07ad7f81abb7c545e6 Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 16:36:13 +0100 Subject: [PATCH 66/82] ignore venv --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index eaaeec7..9dcb600 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ htmlcov validated.yaml merged.yaml polytope_server.egg-info -**/build \ No newline at end of file +**/build +.venv \ No newline at end of file From a7693cd6b4e1ea0f811d59ff570be5dea18f66e5 Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 16:36:34 +0100 Subject: [PATCH 67/82] fix FDB retrieval --- polytope_server/common/datasource/fdb.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/polytope_server/common/datasource/fdb.py b/polytope_server/common/datasource/fdb.py index 609f923..b9cae2d 100644 --- a/polytope_server/common/datasource/fdb.py +++ b/polytope_server/common/datasource/fdb.py @@ -26,7 +26,6 @@ from datetime import datetime, timedelta from pathlib import Path -import pyfdb import yaml from dateutil.relativedelta import relativedelta @@ -47,7 +46,10 @@ def __init__(self, config): self.check_schema() os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) + os.environ["FDB_CONFIG"] = json.dumps(self.fdb_config) os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb") + os.environ["FDB_HOME"] = self.config.get("fdb_home", "/opt/fdb") + import pyfdb self.fdb = pyfdb.FDB() if "spaces" in self.fdb_config: @@ -143,14 +145,26 @@ def match(self, request): r = yaml.safe_load(request.user_request) or {} for k, v in self.match_rules.items(): + + # An empty match rule means that the key must not be present + if v is None or len(v) == 0: + if k in r: + raise Exception("Request containing key '{}' is not allowed".format(k)) + else: + continue # no more checks to do + # Check that all required keys exist - if k not in r: - raise Exception("Request does not contain expected key {}".format(k)) + if k not in r and not (v is None or len(v) == 0): + raise Exception("Request does not contain expected key '{}'".format(k)) + + # Process date rules if k == "date": self.date_check(r["date"], v) continue + # ... and check the value of other keys + v = [v] if isinstance(v, str) else v if r[k] not in v: raise Exception("got {} : {}, but expected one of {}".format(k, r[k], v)) From 80971c26a112324fe83567ec370a708d0bfe0f66 Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 16:36:55 +0100 Subject: [PATCH 68/82] improve failure handling --- polytope_server/worker/worker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/polytope_server/worker/worker.py b/polytope_server/worker/worker.py index 27d5e90..cc7b8b0 100644 --- a/polytope_server/worker/worker.py +++ b/polytope_server/worker/worker.py @@ -245,6 +245,7 @@ def process_request(self, request): except Exception: request.user_message += "Failed to finalize request" + logging.info(request.user_message, extra={"request_id": id}) logging.exception("Failed to finalize request", extra={"request_id": id}) raise @@ -255,6 +256,7 @@ def process_request(self, request): if datasource is None: request.user_message += "Failed to process request." + logging.info(request.user_message, extra={"request_id": id}) raise Exception("Failed to process request.") else: request.user_message += "Success" @@ -302,7 +304,7 @@ def on_request_fail(self, request, exception): logging.exception("Request failed with exception.", extra={"request_id": request.id}) self.requests_failed += 1 - def on_process_terminated(self, signal, frame): + def on_process_terminated(self, signumm=None, frame=None): """Called when the worker is asked to exit whilst processing a request, and we want to reschedule the request""" if self.request is not None: From 3abaa5a5f27bf40c6db256cdfa711d3707c25189 Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 21:19:07 +0100 Subject: [PATCH 69/82] add per-role limits --- polytope_server/broker/broker.py | 38 ++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/polytope_server/broker/broker.py b/polytope_server/broker/broker.py index 7af98bd..9446afb 100644 --- a/polytope_server/broker/broker.py +++ b/polytope_server/broker/broker.py @@ -40,8 +40,6 @@ def __init__(self, config): self.collections = collection.create_collections(config.get("collections")) - self.user_limit = self.broker_config.get("user_limit", None) - def run(self): logging.info("Starting broker...") @@ -89,13 +87,6 @@ def check_limits(self, active_requests, request): logging.debug("Checking limits for request {}".format(request.id)) - # User limits - if self.user_limit is not None: - user_active_requests = sum(qr.user == request.user for qr in active_requests) - if user_active_requests >= self.user_limit: - logging.debug("User has {} of {} active requests".format(user_active_requests, self.user_limit)) - return False - # Collection limits collection_total_limit = self.collections[request.collection].limits.get("total", None) if collection_total_limit is not None: @@ -107,18 +98,37 @@ def check_limits(self, active_requests, request): ) ) return False - - # Collection-user limits + + # Per role limits (pick the maximum) + role_limits = self.collections[request.collection].limits.get("per-role", {}).get(request.user.realm, {}) + user_roles = request.user.roles + per_role_limit = 0 + for role in user_roles: + role_limit = role_limits.get(role, 0) + if role_limit > per_role_limit: + per_role_limit = role_limit + + # If there is no role limit, use the collection global limit collection_user_limit = self.collections[request.collection].limits.get("per-user", None) - if collection_user_limit is not None: + + limit = per_role_limit + if limit == 0 and collection_user_limit is not None: + limit = collection_user_limit + # If there is no limit, return True (i.e. request can be queued) + elif limit == 0 and collection_total_limit is None: + logging.debug("No limit for user {} in collection {}".format(request.user, request.collection)) + return True + + + if limit > 0: collection_user_active_requests = sum( (qr.collection == request.collection and qr.user == request.user) for qr in active_requests ) - if collection_user_active_requests >= collection_user_limit: + if collection_user_active_requests >= limit: logging.debug( "User has {} of {} active requests in collection {}".format( collection_user_active_requests, - collection_user_limit, + limit, request.collection, ) ) From 3ad7e2be1c205b9645fc4ef4ff1af90bc3884b86 Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 22:11:31 +0100 Subject: [PATCH 70/82] allow authenticators to add roles --- polytope_server/common/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/auth.py b/polytope_server/common/auth.py index 17374bc..011348f 100644 --- a/polytope_server/common/auth.py +++ b/polytope_server/common/auth.py @@ -118,7 +118,7 @@ def authenticate(self, auth_header) -> User: www_authenticate=self.auth_info, ) - user.roles = ["default"] + user.roles.append("default") # Visit all authorizers to append additional roles and attributes for authorizer in self.authorizers: From 51c8dd610b3497a66e49003270fda82604bd80ff Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 22:11:40 +0100 Subject: [PATCH 71/82] extract roles from jwt --- .../common/authentication/jwt_authentication.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/polytope_server/common/authentication/jwt_authentication.py b/polytope_server/common/authentication/jwt_authentication.py index 47e49bb..5d7b72e 100644 --- a/polytope_server/common/authentication/jwt_authentication.py +++ b/polytope_server/common/authentication/jwt_authentication.py @@ -34,6 +34,7 @@ def __init__(self, name, realm, config): self.config = config self.certs_url = config["cert_url"] + self.client_id = config["client_id"] super().__init__(name, realm, config) @@ -57,8 +58,14 @@ def authenticate(self, credentials: str) -> User: key=certs ) + logging.info("Decoded JWT: {}".format(decoded_token)) + + user = User(decoded_token["sub"], self.realm()) + roles = decoded_token.get("resource_access", {}).get(self.client_id, {}).get("roles", []) + user.roles.extend(roles) + logging.info("Found user {} from decoded JWT".format(user)) except Exception as e: logging.info("Failed to authenticate user from JWT") From 65d2ec1ce7b91773011684d8be7767c166223ddd Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 22:12:00 +0100 Subject: [PATCH 72/82] add user debug formatting --- polytope_server/common/user.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/polytope_server/common/user.py b/polytope_server/common/user.py index 9f522cc..60518cb 100644 --- a/polytope_server/common/user.py +++ b/polytope_server/common/user.py @@ -68,3 +68,6 @@ def serialize(self): v = self.__getattribute__(k) result[k] = v return result + + def __str__(self): + return f"User({self.realm}:{self.username})" From d72f09d21a24cabb0a690c2609e2d003e1847500 Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 22:12:11 +0100 Subject: [PATCH 73/82] add per-role based request limits --- polytope_server/broker/broker.py | 75 ++++++++++++-------------------- 1 file changed, 29 insertions(+), 46 deletions(-) diff --git a/polytope_server/broker/broker.py b/polytope_server/broker/broker.py index 9446afb..168c89c 100644 --- a/polytope_server/broker/broker.py +++ b/polytope_server/broker/broker.py @@ -44,7 +44,6 @@ def run(self): logging.info("Starting broker...") logging.info("Maximum Queue Size: {}".format(self.max_queue_size)) - logging.info("User Request Limit: {}".format(self.user_limit)) while not time.sleep(self.scheduling_interval): self.check_requests() @@ -85,57 +84,41 @@ def check_requests(self): def check_limits(self, active_requests, request): - logging.debug("Checking limits for request {}".format(request.id)) - - # Collection limits - collection_total_limit = self.collections[request.collection].limits.get("total", None) - if collection_total_limit is not None: - collection_active_requests = sum(qr.collection == request.collection for qr in active_requests) - if collection_active_requests >= collection_total_limit: - logging.debug( - "Collection has {} of {} total active requests".format( - collection_active_requests, collection_total_limit - ) - ) - return False - - # Per role limits (pick the maximum) - role_limits = self.collections[request.collection].limits.get("per-role", {}).get(request.user.realm, {}) - user_roles = request.user.roles - per_role_limit = 0 - for role in user_roles: - role_limit = role_limits.get(role, 0) - if role_limit > per_role_limit: - per_role_limit = role_limit - - # If there is no role limit, use the collection global limit - collection_user_limit = self.collections[request.collection].limits.get("per-user", None) - - limit = per_role_limit - if limit == 0 and collection_user_limit is not None: - limit = collection_user_limit - # If there is no limit, return True (i.e. request can be queued) - elif limit == 0 and collection_total_limit is None: - logging.debug("No limit for user {} in collection {}".format(request.user, request.collection)) - return True - + logging.debug(f"Checking limits for request {request.id}") + + # Get collection limits and calculate active requests + collection = self.collections[request.collection] + collection_limits = collection.limits + collection_total_limit = collection_limits.get("total") + collection_active_requests = sum(qr.collection == request.collection for qr in active_requests) + logging.debug(f"Collection {request.collection} has {collection_active_requests} active requests") + + # Check collection total limit + if collection_total_limit is not None and collection_active_requests >= collection_total_limit: + logging.debug(f"Collection has {collection_active_requests} of {collection_total_limit} total active requests") + return False + # Determine the effective limit based on role or per-user setting + role_limits = collection_limits.get("per-role", {}).get(request.user.realm, {}) + limit = max((role_limits.get(role, 0) for role in request.user.roles), default=0) + if limit == 0: # Use collection per-user limit if no role-specific limit + limit = collection_limits.get("per-user", 0) + + # Check if user exceeds the effective limit if limit > 0: - collection_user_active_requests = sum( - (qr.collection == request.collection and qr.user == request.user) for qr in active_requests - ) - if collection_user_active_requests >= limit: - logging.debug( - "User has {} of {} active requests in collection {}".format( - collection_user_active_requests, - limit, - request.collection, - ) - ) + user_active_requests = sum(qr.collection == request.collection and qr.user == request.user for qr in active_requests) + if user_active_requests >= limit: + logging.debug(f"User {request.user} has {user_active_requests} of {limit} active requests in collection {request.collection}") return False + else: + logging.debug(f"User {request.user} has {user_active_requests} of {limit} active requests in collection {request.collection}") + return True + # Allow if no limits are exceeded + logging.debug(f"No limit for user {request.user} in collection {request.collection}") return True + def enqueue(self, request): logging.info("Queuing request", extra={"request_id": request.id}) From 411f7e055feebae3dfafdafe386e63fbbe2daa59 Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 23:40:03 +0100 Subject: [PATCH 74/82] add jose for jwt decoding --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 4a4dab1..dda9ffd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,4 +30,5 @@ flask-swagger-ui==3.25.0 ldap3==2.7 docker==4.2.0 python-keycloak==0.24.0 +python-jose boto3==1.28.80 From c56ebd69d6fa5ae5ad0c47aeccb693e0b0d503eb Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 9 Apr 2024 23:40:22 +0100 Subject: [PATCH 75/82] add openid offline_access token authentication --- .../common/authentication/authentication.py | 1 + .../openid_offline_access_authentication.py | 116 ++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 polytope_server/common/authentication/openid_offline_access_authentication.py diff --git a/polytope_server/common/authentication/authentication.py b/polytope_server/common/authentication/authentication.py index 67a589b..5227dd4 100644 --- a/polytope_server/common/authentication/authentication.py +++ b/polytope_server/common/authentication/authentication.py @@ -78,6 +78,7 @@ def name(self) -> str: "keycloak": "KeycloakAuthentication", "federation": "FederationAuthentication", "jwt" : "JWTAuthentication", + "openid_offline_access" : "OpenIDOfflineAuthentication", } diff --git a/polytope_server/common/authentication/openid_offline_access_authentication.py b/polytope_server/common/authentication/openid_offline_access_authentication.py new file mode 100644 index 0000000..5550100 --- /dev/null +++ b/polytope_server/common/authentication/openid_offline_access_authentication.py @@ -0,0 +1,116 @@ +# +# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. +# + +import logging +import os +import requests +from jose import jwt + +from ..auth import User +from ..caching import cache +from . import authentication +from ..exceptions import ForbiddenRequest + + +class OpenIDOfflineAuthentication(authentication.Authentication): + def __init__(self, name, realm, config): + self.config = config + + self.certs_url = config["cert_url"] + self.public_client_id = config["public_client_id"] + self.private_client_id = config["private_client_id"] + self.private_client_secret = config["private_client_secret"] + self.iam_url = config["iam_url"] + self.iam_realm = config["iam_realm"] + + + super().__init__(name, realm, config) + + def authentication_type(self): + return "Bearer" + + def authentication_info(self): + return "Authenticate with OpenID offline_access token" + + @cache(lifetime=120) + def get_certs(self): + return requests.get(self.certs_url).json() + + @cache(lifetime=120) + def check_offline_access_token(self, token: str) -> bool: + """ + We check if the token is recognised by the IAM service, and we cache this result. + We cannot simply try to get the access token because we would spam the IAM server with invalid tokens, and the + failure at that point would not be cached. + """ + keycloak_token_introspection = self.iam_url + "/realms/" + self.iam_realm + "/protocol/openid-connect/token/introspect" + introspection_data = { + "token": token + } + b_auth = requests.auth.HTTPBasicAuth(self.private_client_id, self.private_client_secret) + resp = requests.post(url=keycloak_token_introspection, data=introspection_data, auth=b_auth).json() + if resp["active"] and resp["token_type"] == "Offline": + return True + else: + return False + + @cache(lifetime=120) + def authenticate(self, credentials: str) -> User: + + try: + + # Check if this is a valid offline_access token + if not self.check_offline_access_token(credentials): + raise ForbiddenRequest("Not a valid offline_access token") + + # Generate an access token from the offline_access token (like a refresh token) + refresh_data = { + "client_id": self.public_client_id, + "grant_type": "refresh_token", + "refresh_token": credentials + } + keycloak_token_endpoint = self.iam_url + "/realms/" + self.iam_realm + "/protocol/openid-connect/token" + resp = requests.post(url=keycloak_token_endpoint, data=refresh_data) + token = resp.json()['access_token'] + + certs = self.get_certs() + decoded_token = jwt.decode(token=token, + algorithms=jwt.get_unverified_header(token).get('alg'), + key=certs + ) + + logging.info("Decoded JWT: {}".format(decoded_token)) + + user = User(decoded_token["sub"], self.realm()) + + roles = decoded_token.get("resource_access", {}).get(self.public_client_id, {}).get("roles", []) + user.roles.extend(roles) + + logging.info("Found user {} from openid offline_access token".format(user)) + + except Exception as e: + logging.info("Failed to authenticate user from openid offline_access token") + logging.info(e) + raise ForbiddenRequest("Could not authenticate user from openid offline_access token") + return user + + + def collect_metric_info(self): + return {} From 0cd47a64f173943493a4b0acdc9f93fc777184fe Mon Sep 17 00:00:00 2001 From: majh Date: Mon, 22 Jan 2024 20:34:38 +0000 Subject: [PATCH 76/82] feature extraction datasource --- polytope_server/common/datasource/polytope.py | 38 ++++++++----------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/polytope_server/common/datasource/polytope.py b/polytope_server/common/datasource/polytope.py index ff21fc2..0c98db9 100644 --- a/polytope_server/common/datasource/polytope.py +++ b/polytope_server/common/datasource/polytope.py @@ -23,10 +23,11 @@ import os import subprocess -os.environ["GRIBJUMP_HOME"] = "/opt/fdb-gribjump" - +import pygribjump import polytope import tempfile +from polytope_mars.api import PolytopeMars +from polytope_mars.api import features from pathlib import Path import yaml @@ -46,19 +47,16 @@ def __init__(self, config): # still need to set up fdb self.fdb_config = self.config["fdb-config"] - self.non_sliceable = self.config.get("non-sliceable", None) - assert self.non_sliceable is not None - self.check_schema() - os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) - os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb-gribjump") - # forced change + # os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) + # os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb-gribjump") + os.environ["GRIBJUMP_HOME"] = "/opt/fdb/gribjump" - if "spaces" in self.fdb_config: - for space in self.fdb_config["spaces"]: - for root in space["roots"]: - os.makedirs(root["path"], exist_ok=True) + # if "spaces" in self.fdb_config: + # for space in self.fdb_config["spaces"]: + # for root in space["roots"]: + # os.makedirs(root["path"], exist_ok=True) # Set up gribjump self.gribjump_config = self.config["gribjump-config"] @@ -66,7 +64,7 @@ def __init__(self, config): with open("/home/polytope/gribjump/config.yaml", "w") as f: json.dump(self.gribjump_config, f) os.environ["GRIBJUMP_CONFIG_FILE"] = "/home/polytope/gribjump/config.yaml" - # self.gj = pygribjump.GribJump() + self.gj = pygribjump.GribJump() # Set up polytope feature extraction library self.polytope_options = { @@ -75,8 +73,6 @@ def __init__(self, config): }, "date": {"merge": {"with": "time", "linkers": ["T", "00"]}}, "step": {"type_change": "int"}, - "number": {"type_change": "int"}, - "longitude" : {"cyclic": [0, 360]}, } logging.info("Set up gribjump") @@ -122,7 +118,7 @@ def check_schema(self): self.fdb_config["schema"] = str(local_path) - @cache(lifetime=5000000) + @cache(lifetime=500) def git_download_schema(self, remote, branch, git_dir, git_file): call = "git archive --remote {} {}:{} {} | tar -xO {}".format( remote, branch, str(git_dir), str(git_file), str(git_file) @@ -143,19 +139,17 @@ def retrieve(self, request): # We take the static config from the match rules of the datasource self.polytope_config = {} - for k in self.non_sliceable: - self.polytope_config[k] = r[k] - - assert len(self.polytope_config) > 0 + for k, v in self.match_rules.items(): + self.polytope_config[k] = isinstance(v, list) and v[0] or v logging.info(self.polytope_config) logging.info(self.polytope_options) - from polytope_mars.api import PolytopeMars p = PolytopeMars(self.polytope_config, self.polytope_options) + logging.info(p) self.output = p.extract(r) self.output = json.dumps(self.output).encode("utf-8") - # logging.info(self.output) + logging.info(self.output) return True def result(self, request): From 5efd42d0ea20ec034f148c20d9139df163095406 Mon Sep 17 00:00:00 2001 From: majh Date: Wed, 24 Jan 2024 00:15:45 +0000 Subject: [PATCH 77/82] change import to after os.environ --- polytope_server/common/datasource/polytope.py | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/polytope_server/common/datasource/polytope.py b/polytope_server/common/datasource/polytope.py index 0c98db9..7ff520e 100644 --- a/polytope_server/common/datasource/polytope.py +++ b/polytope_server/common/datasource/polytope.py @@ -23,11 +23,10 @@ import os import subprocess -import pygribjump +os.environ["GRIBJUMP_HOME"] = "/opt/fdb-gribjump" + import polytope import tempfile -from polytope_mars.api import PolytopeMars -from polytope_mars.api import features from pathlib import Path import yaml @@ -47,16 +46,19 @@ def __init__(self, config): # still need to set up fdb self.fdb_config = self.config["fdb-config"] + self.non_sliceable = self.config.get("non-sliceable", None) + assert self.non_sliceable is not None + self.check_schema() - # os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) - # os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb-gribjump") - os.environ["GRIBJUMP_HOME"] = "/opt/fdb/gribjump" + os.environ["FDB5_CONFIG"] = json.dumps(self.fdb_config) + os.environ["FDB5_HOME"] = self.config.get("fdb_home", "/opt/fdb-gribjump") + # forced change - # if "spaces" in self.fdb_config: - # for space in self.fdb_config["spaces"]: - # for root in space["roots"]: - # os.makedirs(root["path"], exist_ok=True) + if "spaces" in self.fdb_config: + for space in self.fdb_config["spaces"]: + for root in space["roots"]: + os.makedirs(root["path"], exist_ok=True) # Set up gribjump self.gribjump_config = self.config["gribjump-config"] @@ -64,7 +66,7 @@ def __init__(self, config): with open("/home/polytope/gribjump/config.yaml", "w") as f: json.dump(self.gribjump_config, f) os.environ["GRIBJUMP_CONFIG_FILE"] = "/home/polytope/gribjump/config.yaml" - self.gj = pygribjump.GribJump() + # self.gj = pygribjump.GribJump() # Set up polytope feature extraction library self.polytope_options = { @@ -73,6 +75,7 @@ def __init__(self, config): }, "date": {"merge": {"with": "time", "linkers": ["T", "00"]}}, "step": {"type_change": "int"}, + "number": {"type_change": "int"}, } logging.info("Set up gribjump") @@ -118,7 +121,7 @@ def check_schema(self): self.fdb_config["schema"] = str(local_path) - @cache(lifetime=500) + @cache(lifetime=5000000) def git_download_schema(self, remote, branch, git_dir, git_file): call = "git archive --remote {} {}:{} {} | tar -xO {}".format( remote, branch, str(git_dir), str(git_file), str(git_file) @@ -139,13 +142,15 @@ def retrieve(self, request): # We take the static config from the match rules of the datasource self.polytope_config = {} - for k, v in self.match_rules.items(): - self.polytope_config[k] = isinstance(v, list) and v[0] or v + for k in self.non_sliceable: + self.polytope_config[k] = r[k] + + assert len(self.polytope_config) > 0 logging.info(self.polytope_config) logging.info(self.polytope_options) + from polytope_mars.api import PolytopeMars p = PolytopeMars(self.polytope_config, self.polytope_options) - logging.info(p) self.output = p.extract(r) self.output = json.dumps(self.output).encode("utf-8") From 531974f0c8a4e17422d458138f151635743f0607 Mon Sep 17 00:00:00 2001 From: majh Date: Fri, 26 Jan 2024 10:07:13 +0000 Subject: [PATCH 78/82] remove logging --- polytope_server/common/datasource/polytope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_server/common/datasource/polytope.py b/polytope_server/common/datasource/polytope.py index 7ff520e..59b060b 100644 --- a/polytope_server/common/datasource/polytope.py +++ b/polytope_server/common/datasource/polytope.py @@ -154,7 +154,7 @@ def retrieve(self, request): self.output = p.extract(r) self.output = json.dumps(self.output).encode("utf-8") - logging.info(self.output) + # logging.info(self.output) return True def result(self, request): From 40c7ee2b9ef4cf8f7ddfd31d9398efd1e2e78b4f Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 5 Mar 2024 14:17:11 +0000 Subject: [PATCH 79/82] fixes for MARS access protocols --- polytope_server/common/datasource/polytope.py | 1 + 1 file changed, 1 insertion(+) diff --git a/polytope_server/common/datasource/polytope.py b/polytope_server/common/datasource/polytope.py index 59b060b..ff21fc2 100644 --- a/polytope_server/common/datasource/polytope.py +++ b/polytope_server/common/datasource/polytope.py @@ -76,6 +76,7 @@ def __init__(self, config): "date": {"merge": {"with": "time", "linkers": ["T", "00"]}}, "step": {"type_change": "int"}, "number": {"type_change": "int"}, + "longitude" : {"cyclic": [0, 360]}, } logging.info("Set up gribjump") From 88b89ae7147dbc69bb49819707b9fafbd2390bde Mon Sep 17 00:00:00 2001 From: majh Date: Tue, 5 Mar 2024 14:17:33 +0000 Subject: [PATCH 80/82] prototype --- .../keycloak_bearer_authentication.py | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 polytope_server/common/authentication/keycloak_bearer_authentication.py diff --git a/polytope_server/common/authentication/keycloak_bearer_authentication.py b/polytope_server/common/authentication/keycloak_bearer_authentication.py new file mode 100644 index 0000000..8f26284 --- /dev/null +++ b/polytope_server/common/authentication/keycloak_bearer_authentication.py @@ -0,0 +1,123 @@ +# +# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. +# + +import base64 +import logging +import os + +from keycloak import KeycloakOpenID +from keycloak.exceptions import KeycloakConnectionError + +from ..auth import User +from ..caching import cache +from ..exceptions import ForbiddenRequest +from . import authentication + + +class KeycloakBearerAuthentication(authentication.Authentication): + def __init__(self, name, realm, config): + self.config = config + + # URL of the keycloak API: e.g. https://keycloak.insitute.org/auth/" + self.url = config["url"] + + # Keycloak client id and secret + self.client_id = config["client_id"] # e.g. polytope + self.client_secret = config["client_secret"] + + # The keycloak realm to look for users + self.keycloak_realm = config["keycloak_realm"] + + self.skipTLS = config.get("skip_tls", False) + + # Connection parameters + self.timeout = config.get("timeout", 3) + + # Mapping user attributes to keycloak attributes + self.attribute_map = config.get("attributes", {}) + + super().__init__(name, realm, config) + + def authentication_type(self): + return "Basic" + + def authentication_info(self): + return "Authenticate with Keycloak username and password" + + @cache(lifetime=120) + def authenticate(self, credentials: str) -> User: + + # credentials should be of the form 'base64(:)' + try: + decoded = base64.b64decode(credentials).decode("utf-8") + auth_user, auth_password = decoded.split(":", 1) + except UnicodeDecodeError: + raise ForbiddenRequest("Credentials could not be decoded") + except ValueError: + raise ForbiddenRequest("Credentials could not be unpacked") + + _environ = dict(os.environ) + try: + os.environ["http_proxy"] = os.getenv("POLYTOPE_PROXY", "") + os.environ["https_proxy"] = os.getenv("POLYTOPE_PROXY", "") + + logging.debug("Setting HTTPS_PROXY to {}".format(os.environ["https_proxy"])) + + try: + + # Open a session as a registered client + client = KeycloakOpenID( + server_url=self.url, + client_id=self.client_id, + realm_name=self.keycloak_realm, + client_secret_key=self.client_secret, + verify=(self.skipTLS is False), + ) + + client.connection.timeout = self.timeout + + # Obtain a session token on behalf of the user + token = client.token(auth_user, auth_password) + + except KeycloakConnectionError: + # Raise ForbiddenRequest rather than ServerError so that we are not blocked if Keycloak is down + raise ForbiddenRequest("Could not connect to Keycloak") + except Exception: + raise ForbiddenRequest("Invalid Keycloak credentials") + + userinfo = client.userinfo(token["access_token"]) + + user = User(auth_user, self.realm()) + + logging.debug("Found user {} in keycloak".format(auth_user)) + + for k, v in self.attribute_map.items(): + if v in userinfo: + user.attributes[k] = userinfo[v] + logging.debug("User {} has attribute {} : {}".format(user.username, k, user.attributes[k])) + + return user + + finally: + os.environ.clear() + os.environ.update(_environ) + + def collect_metric_info(self): + return {} From 79d2df4fb79c293c61e9875d0fadf07a7cb19d3f Mon Sep 17 00:00:00 2001 From: majh Date: Thu, 14 Mar 2024 14:28:26 +0000 Subject: [PATCH 81/82] add jwt token authentication --- .../jwt_bearer_authentication.py | 65 +++++++++ .../keycloak_bearer_authentication.py | 123 ------------------ 2 files changed, 65 insertions(+), 123 deletions(-) create mode 100644 polytope_server/common/authentication/jwt_bearer_authentication.py delete mode 100644 polytope_server/common/authentication/keycloak_bearer_authentication.py diff --git a/polytope_server/common/authentication/jwt_bearer_authentication.py b/polytope_server/common/authentication/jwt_bearer_authentication.py new file mode 100644 index 0000000..e20e3a6 --- /dev/null +++ b/polytope_server/common/authentication/jwt_bearer_authentication.py @@ -0,0 +1,65 @@ +# +# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. +# + +import logging +import os +import requests +from jose import jwt + +from ..auth import User +from ..caching import cache +from . import authentication + + +class JWTBearerAuthentication(authentication.Authentication): + def __init__(self, name, realm, config): + self.config = config + + self.certs_url = config["cert_url"] + + super().__init__(name, realm, config) + + def authentication_type(self): + return "Bearer" + + def authentication_info(self): + return "Authenticate with JWT token" + + @cache(lifetime=120) + def get_certs(self): + return requests.get(self.certs_url).json() + + @cache(lifetime=120) + def authenticate(self, credentials: str) -> User: + certs = self.get_certs() + decoded_token = jwt.decode(token=credentials, + algorithms=jwt.get_unverified_header(credentials).get('alg'), + key=certs + ) + + user = User(decoded_token["sub"], self.realm()) + + logging.debug("Found user {} from decoded JWT".format(user)) + + return user + + + def collect_metric_info(self): + return {} diff --git a/polytope_server/common/authentication/keycloak_bearer_authentication.py b/polytope_server/common/authentication/keycloak_bearer_authentication.py deleted file mode 100644 index 8f26284..0000000 --- a/polytope_server/common/authentication/keycloak_bearer_authentication.py +++ /dev/null @@ -1,123 +0,0 @@ -# -# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation nor -# does it submit to any jurisdiction. -# - -import base64 -import logging -import os - -from keycloak import KeycloakOpenID -from keycloak.exceptions import KeycloakConnectionError - -from ..auth import User -from ..caching import cache -from ..exceptions import ForbiddenRequest -from . import authentication - - -class KeycloakBearerAuthentication(authentication.Authentication): - def __init__(self, name, realm, config): - self.config = config - - # URL of the keycloak API: e.g. https://keycloak.insitute.org/auth/" - self.url = config["url"] - - # Keycloak client id and secret - self.client_id = config["client_id"] # e.g. polytope - self.client_secret = config["client_secret"] - - # The keycloak realm to look for users - self.keycloak_realm = config["keycloak_realm"] - - self.skipTLS = config.get("skip_tls", False) - - # Connection parameters - self.timeout = config.get("timeout", 3) - - # Mapping user attributes to keycloak attributes - self.attribute_map = config.get("attributes", {}) - - super().__init__(name, realm, config) - - def authentication_type(self): - return "Basic" - - def authentication_info(self): - return "Authenticate with Keycloak username and password" - - @cache(lifetime=120) - def authenticate(self, credentials: str) -> User: - - # credentials should be of the form 'base64(:)' - try: - decoded = base64.b64decode(credentials).decode("utf-8") - auth_user, auth_password = decoded.split(":", 1) - except UnicodeDecodeError: - raise ForbiddenRequest("Credentials could not be decoded") - except ValueError: - raise ForbiddenRequest("Credentials could not be unpacked") - - _environ = dict(os.environ) - try: - os.environ["http_proxy"] = os.getenv("POLYTOPE_PROXY", "") - os.environ["https_proxy"] = os.getenv("POLYTOPE_PROXY", "") - - logging.debug("Setting HTTPS_PROXY to {}".format(os.environ["https_proxy"])) - - try: - - # Open a session as a registered client - client = KeycloakOpenID( - server_url=self.url, - client_id=self.client_id, - realm_name=self.keycloak_realm, - client_secret_key=self.client_secret, - verify=(self.skipTLS is False), - ) - - client.connection.timeout = self.timeout - - # Obtain a session token on behalf of the user - token = client.token(auth_user, auth_password) - - except KeycloakConnectionError: - # Raise ForbiddenRequest rather than ServerError so that we are not blocked if Keycloak is down - raise ForbiddenRequest("Could not connect to Keycloak") - except Exception: - raise ForbiddenRequest("Invalid Keycloak credentials") - - userinfo = client.userinfo(token["access_token"]) - - user = User(auth_user, self.realm()) - - logging.debug("Found user {} in keycloak".format(auth_user)) - - for k, v in self.attribute_map.items(): - if v in userinfo: - user.attributes[k] = userinfo[v] - logging.debug("User {} has attribute {} : {}".format(user.username, k, user.attributes[k])) - - return user - - finally: - os.environ.clear() - os.environ.update(_environ) - - def collect_metric_info(self): - return {} From 3f2ee4e64a74ba27e85b89ce7ddd35ea0a768967 Mon Sep 17 00:00:00 2001 From: majh Date: Fri, 15 Mar 2024 09:35:31 +0000 Subject: [PATCH 82/82] add JWT token authentication --- .../jwt_bearer_authentication.py | 65 ------------------- 1 file changed, 65 deletions(-) delete mode 100644 polytope_server/common/authentication/jwt_bearer_authentication.py diff --git a/polytope_server/common/authentication/jwt_bearer_authentication.py b/polytope_server/common/authentication/jwt_bearer_authentication.py deleted file mode 100644 index e20e3a6..0000000 --- a/polytope_server/common/authentication/jwt_bearer_authentication.py +++ /dev/null @@ -1,65 +0,0 @@ -# -# Copyright 2022 European Centre for Medium-Range Weather Forecasts (ECMWF) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation nor -# does it submit to any jurisdiction. -# - -import logging -import os -import requests -from jose import jwt - -from ..auth import User -from ..caching import cache -from . import authentication - - -class JWTBearerAuthentication(authentication.Authentication): - def __init__(self, name, realm, config): - self.config = config - - self.certs_url = config["cert_url"] - - super().__init__(name, realm, config) - - def authentication_type(self): - return "Bearer" - - def authentication_info(self): - return "Authenticate with JWT token" - - @cache(lifetime=120) - def get_certs(self): - return requests.get(self.certs_url).json() - - @cache(lifetime=120) - def authenticate(self, credentials: str) -> User: - certs = self.get_certs() - decoded_token = jwt.decode(token=credentials, - algorithms=jwt.get_unverified_header(credentials).get('alg'), - key=certs - ) - - user = User(decoded_token["sub"], self.realm()) - - logging.debug("Found user {} from decoded JWT".format(user)) - - return user - - - def collect_metric_info(self): - return {}