From 4cc23ace279050a35c976a8678a2168faf82a487 Mon Sep 17 00:00:00 2001 From: jamesbeedy Date: Thu, 3 Jun 2021 15:50:00 -0700 Subject: [PATCH 1/3] info -> debug and log moar These changes introduce enhanced logging in the prolog and epilog along with log level modifications that ensure we arent logging so many things to the info log. Additionally, increase the size of the running log to 5MB before rolling it. --- src/licensemanager2/agent/__init__.py | 4 +-- .../workload_managers/slurm/cmd_utils.py | 35 +++++++++++++++++-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/licensemanager2/agent/__init__.py b/src/licensemanager2/agent/__init__.py index 10cfbc56..8548088a 100644 --- a/src/licensemanager2/agent/__init__.py +++ b/src/licensemanager2/agent/__init__.py @@ -55,8 +55,8 @@ def _rotating_file_handler(log_file: str): handler = RotatingFileHandler( log_file, - backupCount=5, - maxBytes=100000, + backupCount=10, + maxBytes=5242880, ) handler.rotator = _rotator handler.namer = _namer diff --git a/src/licensemanager2/workload_managers/slurm/cmd_utils.py b/src/licensemanager2/workload_managers/slurm/cmd_utils.py index c0bfd837..801226f6 100644 --- a/src/licensemanager2/workload_managers/slurm/cmd_utils.py +++ b/src/licensemanager2/workload_managers/slurm/cmd_utils.py @@ -55,6 +55,9 @@ async def get_required_licenses_for_job(slurm_job_id: str) -> LicenseBookingRequ """Retrieve the required licenses for a job.""" license_array = await get_licenses_for_job(slurm_job_id) + logger.debug("##### License array #####") + logger.debug(license_array) + license_booking_request = LicenseBookingRequest( job_id=slurm_job_id, bookings=[], @@ -92,6 +95,8 @@ async def check_feature_token_availablity(lbr: LicenseBookingRequest) -> bool: f"{SETTINGS.AGENT_BASE_URL}/api/v1/license/all", headers=LM2_AGENT_HEADERS ) + logger.debug("##### /api/v1/license/all #####") + logger.debug(resp.json()) for item in resp.json(): product_feature = item["product_feature"] @@ -99,7 +104,22 @@ async def check_feature_token_availablity(lbr: LicenseBookingRequest) -> bool: if product_feature == license_booking.product_feature: tokens_available = int(item["available"]) if tokens_available >= license_booking.tokens: + logger.debug( + f"##### {product_feature}, tokens avalable #####") + logger.debug( + f"##### Tokens available {tokens_available} #####") + logger.debug( + f"##### Tokens required {license_booking.tokens} #####") + else: + logger.debug( + f"##### {product_feature}, tokens not available #####") + logger.debug( + f"##### Tokens available {tokens_available} #####") + logger.debug( + f"##### Tokens required {license_booking.tokens} #####") return True + logger.debug( + f"##### Tokens not available #####") return False @@ -122,7 +142,9 @@ async def make_booking_request(lbr: LicenseBookingRequest) -> bool: ) if resp.status_code == 200: + logger.debug("##### Booking completed successfully #####") return True + logger.debug(f"##### Booking failed: {resp.status_code} #####") return False @@ -136,7 +158,9 @@ async def reconcile(): ) if resp.status_code == 200: + logger.debug("##### Reconcile completed successfully #####") return True + logger.debug(f"##### Reconcile failed {resp.status_code} #####") return False @@ -162,7 +186,8 @@ async def get_licenses_for_job(slurm_job_id: str) -> List: CMD_TIMEOUT ) scontrol_out = str(scontrol_out, ENCODING) # type: ignore - logger.info(scontrol_out) + logger.debug("##### scontrol out #####") + logger.debug(scontrol_out) # Check that the command completed successfully if not scontrol_show_lic.returncode == 0: @@ -178,7 +203,8 @@ async def get_licenses_for_job(slurm_job_id: str) -> List: async def get_tokens_for_license( product_feature_server: str, - output_type: str) -> Optional[int]: + output_type: str, +) -> Optional[int]: """ Return tokens from scontrol output. """ @@ -227,6 +253,8 @@ async def scontrol_show_lic(): stdout, _ = await asyncio.wait_for(proc.communicate(), CMD_TIMEOUT) output = str(stdout, encoding=ENCODING) + logger.debug("##### scontrol show lic #####") + logger.debug(output) return output @@ -244,7 +272,8 @@ async def sacctmgr_modify_resource( f"count={num_tokens}", "-i", ] - logger.info(f"{' '.join(cmd)}") + logger.debug("##### sacctmgr update cmd #####") + logger.debug(f"{' '.join(cmd)}") sacctmgr_modify_resource = await asyncio.create_subprocess_shell( shlex.join(cmd), From 041459770a595446841ea5c8a65f75aa65945294 Mon Sep 17 00:00:00 2001 From: jamesbeedy Date: Thu, 3 Jun 2021 16:03:39 -0700 Subject: [PATCH 2/3] feedback from review --- src/licensemanager2/agent/__init__.py | 1 + src/licensemanager2/workload_managers/slurm/cmd_utils.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/licensemanager2/agent/__init__.py b/src/licensemanager2/agent/__init__.py index 8548088a..b21c65cd 100644 --- a/src/licensemanager2/agent/__init__.py +++ b/src/licensemanager2/agent/__init__.py @@ -53,6 +53,7 @@ def _rotating_file_handler(log_file: str): """Configure the rotating file handler.""" global logger + # Rotate the log file at 5MB, keep 10 rotations. handler = RotatingFileHandler( log_file, backupCount=10, diff --git a/src/licensemanager2/workload_managers/slurm/cmd_utils.py b/src/licensemanager2/workload_managers/slurm/cmd_utils.py index 801226f6..499f253f 100644 --- a/src/licensemanager2/workload_managers/slurm/cmd_utils.py +++ b/src/licensemanager2/workload_managers/slurm/cmd_utils.py @@ -55,7 +55,7 @@ async def get_required_licenses_for_job(slurm_job_id: str) -> LicenseBookingRequ """Retrieve the required licenses for a job.""" license_array = await get_licenses_for_job(slurm_job_id) - logger.debug("##### License array #####") + logger.debug(f"##### License array for job id: {slurm_job_id} #####") logger.debug(license_array) license_booking_request = LicenseBookingRequest( @@ -87,6 +87,8 @@ async def get_required_licenses_for_job(slurm_job_id: str) -> LicenseBookingRequ async def check_feature_token_availablity(lbr: LicenseBookingRequest) -> bool: """Determine if there are sufficient tokens to fill the request.""" + logger.info(f"##### Checking feature token availability for: {lbr.job_id} #####") + # We currently only have an "/all" endpoint. # Todo: Implement endpoint to retrieve counts for a # specific feature, or set of features so that we dont have to get /all. From da47dd4e257bcb47776032ed50d7c79d22d52c51 Mon Sep 17 00:00:00 2001 From: jamesbeedy Date: Thu, 3 Jun 2021 16:06:11 -0700 Subject: [PATCH 3/3] fix lint --- src/licensemanager2/workload_managers/slurm/cmd_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/licensemanager2/workload_managers/slurm/cmd_utils.py b/src/licensemanager2/workload_managers/slurm/cmd_utils.py index 499f253f..b95041bb 100644 --- a/src/licensemanager2/workload_managers/slurm/cmd_utils.py +++ b/src/licensemanager2/workload_managers/slurm/cmd_utils.py @@ -120,8 +120,7 @@ async def check_feature_token_availablity(lbr: LicenseBookingRequest) -> bool: logger.debug( f"##### Tokens required {license_booking.tokens} #####") return True - logger.debug( - f"##### Tokens not available #####") + logger.debug("##### Tokens not available #####") return False