diff --git a/documentation/domains/Cluster.json b/documentation/domains/Cluster.json index f0852a4d4b4..8cd87fc40cf 100644 --- a/documentation/domains/Cluster.json +++ b/documentation/domains/Cluster.json @@ -376,6 +376,11 @@ "Shutdown": { "type": "object", "properties": { + "skipWaitingCohEndangeredState": { + "default": false, + "description": "For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false.", + "type": "boolean" + }, "ignoreSessions": { "default": false, "description": "For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false.", diff --git a/documentation/domains/Cluster.md b/documentation/domains/Cluster.md index fd14bd3814a..69ab8558488 100644 --- a/documentation/domains/Cluster.md +++ b/documentation/domains/Cluster.md @@ -115,6 +115,7 @@ The specification of the operation of the WebLogic cluster. Required. | --- | --- | --- | | `ignoreSessions` | Boolean | For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false. | | `shutdownType` | string | Specifies how the operator will shut down server instances. Legal values are `Graceful` and `Forced`. Defaults to `Graceful`. | +| `skipWaitingCohEndangeredState` | Boolean | For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false. | | `timeoutSeconds` | integer | For graceful shutdown only, number of seconds to wait before aborting in-flight work and shutting down the server. Defaults to 30 seconds. | | `waitForAllSessions` | Boolean | For graceful shutdown only, set to true to wait for all HTTP sessions during in-flight work handling; false to wait for non-persisted HTTP sessions only. Defaults to false. | diff --git a/documentation/domains/Domain.json b/documentation/domains/Domain.json index 2f78fdc2e69..ed2b17a2dc6 100644 --- a/documentation/domains/Domain.json +++ b/documentation/domains/Domain.json @@ -1187,6 +1187,11 @@ "Shutdown": { "type": "object", "properties": { + "skipWaitingCohEndangeredState": { + "default": false, + "description": "For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false.", + "type": "boolean" + }, "ignoreSessions": { "default": false, "description": "For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false.", diff --git a/documentation/domains/Domain.md b/documentation/domains/Domain.md index f21faa9a118..c4437621472 100644 --- a/documentation/domains/Domain.md +++ b/documentation/domains/Domain.md @@ -278,6 +278,7 @@ The current status of the operation of the WebLogic domain. Updated automaticall | --- | --- | --- | | `ignoreSessions` | Boolean | For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false. | | `shutdownType` | string | Specifies how the operator will shut down server instances. Legal values are `Graceful` and `Forced`. Defaults to `Graceful`. | +| `skipWaitingCohEndangeredState` | Boolean | For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false. | | `timeoutSeconds` | integer | For graceful shutdown only, number of seconds to wait before aborting in-flight work and shutting down the server. Defaults to 30 seconds. | | `waitForAllSessions` | Boolean | For graceful shutdown only, set to true to wait for all HTTP sessions during in-flight work handling; false to wait for non-persisted HTTP sessions only. Defaults to false. | diff --git a/kubernetes/crd/cluster-crd.yaml b/kubernetes/crd/cluster-crd.yaml index ea44f182559..8561ee8a44c 100644 --- a/kubernetes/crd/cluster-crd.yaml +++ b/kubernetes/crd/cluster-crd.yaml @@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - weblogic.sha256: 9ac551460201264f4eb69fabd7c54120c2e30492143ad81f47965567f2a7ae79 + weblogic.sha256: 7146de067298f75bf803e2c8acf9f88980d1d5770e375122bcbb1fe4e6f9a64b name: clusters.weblogic.oracle spec: group: weblogic.oracle @@ -2390,6 +2390,14 @@ spec: description: Configures how the operator should shut down the server instance. properties: + skipWaitingCohEndangeredState: + default: false + description: For graceful shutdown only, set to true to skip + waiting for Coherence Cache Cluster service MBean HAStatus + in safe state before shutdown. By default, the operator + will wait until it is safe to shutdown the Coherence Cache + Cluster. Defaults to false. + type: boolean ignoreSessions: default: false description: For graceful shutdown only, indicates to ignore diff --git a/kubernetes/crd/domain-crd.yaml b/kubernetes/crd/domain-crd.yaml index 1867872d45b..7dcb0c3b267 100644 --- a/kubernetes/crd/domain-crd.yaml +++ b/kubernetes/crd/domain-crd.yaml @@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - weblogic.sha256: 1b5fbb6128eb44897c07ef1304cf64bc8682f7f4ef2454dde6e861a04941b635 + weblogic.sha256: d33dc80911f1901a703fe95438c2e4fc39271775bfecddd35893f8422d97caa3 name: domains.weblogic.oracle spec: group: weblogic.oracle @@ -3331,6 +3331,14 @@ spec: description: Configures how the operator should shut down the server instance. properties: + skipWaitingCohEndangeredState: + default: false + description: For graceful shutdown only, set to true to + skip waiting for Coherence Cache Cluster service MBean + HAStatus in safe state before shutdown. By default, + the operator will wait until it is safe to shutdown + the Coherence Cache Cluster. Defaults to false. + type: boolean ignoreSessions: default: false description: For graceful shutdown only, indicates to @@ -6414,6 +6422,14 @@ spec: description: Configures how the operator should shut down the server instance. properties: + skipWaitingCohEndangeredState: + default: false + description: For graceful shutdown only, set to true to skip + waiting for Coherence Cache Cluster service MBean HAStatus + in safe state before shutdown. By default, the operator + will wait until it is safe to shutdown the Coherence Cache + Cluster. Defaults to false. + type: boolean ignoreSessions: default: false description: For graceful shutdown only, indicates to ignore @@ -9155,6 +9171,14 @@ spec: the server instance. type: object properties: + skipWaitingCohEndangeredState: + default: false + description: For graceful shutdown only, set to true + to skip waiting for Coherence Cache Cluster service + MBean HAStatus in safe state before shutdown. By default, + the operator will wait until it is safe to shutdown + the Coherence Cache Cluster. Defaults to false. + type: boolean ignoreSessions: default: false description: For graceful shutdown only, indicates to diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/PodStepContext.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/PodStepContext.java index 262f60e0874..607667d2d3e 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/PodStepContext.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/PodStepContext.java @@ -613,6 +613,11 @@ private void updateEnvForShutdown(List env) { addDefaultEnvVarIfMissing(env, "SHUTDOWN_WAIT_FOR_ALL_SESSIONS", String.valueOf(shutdown.getWaitForAllSessions())); } + if (!shutdown.getSkipWaitingCohEndangeredState() + .equals(Shutdown.DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE)) { + addDefaultEnvVarIfMissing(env, "SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE", + String.valueOf(shutdown.getSkipWaitingCohEndangeredState())); + } } private Shutdown getShutdownSpec() { diff --git a/operator/src/main/java/oracle/kubernetes/operator/steps/ShutdownManagedServerStep.java b/operator/src/main/java/oracle/kubernetes/operator/steps/ShutdownManagedServerStep.java index 56e4b93591d..d74ad7117c6 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/steps/ShutdownManagedServerStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/steps/ShutdownManagedServerStep.java @@ -221,7 +221,16 @@ protected PortDetails getPortDetails() { private Integer getWlsServerPort() { Integer listenPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getListenPort) .orElse(null); - + Integer adminPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getAdminPort) + .orElse(null); + Integer sslListenPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getSslListenPort) + .orElse(null); + if (adminPort != null) { + return adminPort; + } + if (sslListenPort != null) { + return sslListenPort; + } if (listenPort == null) { // This can only happen if the running server pod does not exist in the WLS Domain. // This is a rare case where the server was deleted from the WLS Domain config. diff --git a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/BaseConfiguration.java b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/BaseConfiguration.java index 9c8029b5ea9..c34c5eca660 100644 --- a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/BaseConfiguration.java +++ b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/BaseConfiguration.java @@ -274,6 +274,12 @@ void addLimitRequirement(String resource, String quantity) { serverPod.addLimitRequirement(resource, quantity); } + void setShutdown(Shutdown shutdown) { + serverPod.setShutdown(shutdown.getShutdownType(), + shutdown.getTimeoutSeconds(), shutdown.getIgnoreSessions(), shutdown.getWaitForAllSessions(), + shutdown.getSkipWaitingCohEndangeredState()); + } + V1PodSecurityContext getPodSecurityContext() { return Optional.ofNullable(serverPod.getPodSecurityContext()).orElse(getDefaultPodSecurityContext()); } diff --git a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/ServerPod.java b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/ServerPod.java index d208a6774a0..80ba417f7e8 100644 --- a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/ServerPod.java +++ b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/ServerPod.java @@ -362,12 +362,14 @@ Shutdown getShutdown() { return this.shutdown; } - void setShutdown(ShutdownType shutdownType, Long timeoutSeconds, Boolean ignoreSessions, Boolean waitForAllSessions) { + void setShutdown(ShutdownType shutdownType, Long timeoutSeconds, Boolean ignoreSessions, Boolean waitForAllSessions, + Boolean skipWaitingCohEndangeredState) { this.shutdown .shutdownType(shutdownType) .timeoutSeconds(timeoutSeconds) .ignoreSessions(ignoreSessions) - .waitForAllSessions(waitForAllSessions); + .waitForAllSessions(waitForAllSessions) + .skipWaitingCohEndangeredState(skipWaitingCohEndangeredState); } ProbeTuning getReadinessProbeTuning() { diff --git a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Shutdown.java b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Shutdown.java index 98f5ed6f914..686f0e6a1c2 100644 --- a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Shutdown.java +++ b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Shutdown.java @@ -17,6 +17,7 @@ public class Shutdown { public static final Long DEFAULT_TIMEOUT = 30L; public static final Boolean DEFAULT_IGNORESESSIONS = Boolean.FALSE; public static final Boolean DEFAULT_WAIT_FOR_ALL_SESSIONS = Boolean.FALSE; + public static final Boolean DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE = Boolean.FALSE; @Description( "Specifies how the operator will shut down server instances." @@ -44,6 +45,14 @@ public class Shutdown { @Default(boolDefault = false) private Boolean waitForAllSessions; + @Description( + "For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus" + + " in safe state before shutdown. By default, the operator will wait until it is" + + " safe to shutdown the Coherence Cache Cluster." + + " Defaults to false.") + @Default(boolDefault = false) + private Boolean skipWaitingCohEndangeredState; + void copyValues(Shutdown fromShutdown) { if (shutdownType == null) { shutdownType(fromShutdown.shutdownType); @@ -58,6 +67,10 @@ void copyValues(Shutdown fromShutdown) { if (waitForAllSessions == null) { waitForAllSessions(fromShutdown.waitForAllSessions); } + + if (skipWaitingCohEndangeredState == null) { + skipWaitingCohEndangeredState(fromShutdown.skipWaitingCohEndangeredState); + } } public ShutdownType getShutdownType() { @@ -96,6 +109,16 @@ public Shutdown waitForAllSessions(Boolean waitForAllSessions) { return this; } + public Boolean getSkipWaitingCohEndangeredState() { + return Optional.ofNullable(skipWaitingCohEndangeredState) + .orElse(DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE); + } + + public Shutdown skipWaitingCohEndangeredState(Boolean skipWaitingCohEndangeredState) { + this.skipWaitingCohEndangeredState = skipWaitingCohEndangeredState; + return this; + } + @Override public String toString() { return new ToStringBuilder(this) @@ -103,6 +126,7 @@ public String toString() { .append("timeoutSeconds", timeoutSeconds) .append("ignoreSessions", ignoreSessions) .append("waitForAllSessions", waitForAllSessions) + .append("skipWaitingCohEndangeredState", skipWaitingCohEndangeredState) .toString(); } @@ -123,6 +147,7 @@ public boolean equals(Object o) { .append(timeoutSeconds, that.timeoutSeconds) .append(ignoreSessions, that.ignoreSessions) .append(waitForAllSessions, that.waitForAllSessions) + .append(skipWaitingCohEndangeredState, that.skipWaitingCohEndangeredState) .isEquals(); } @@ -133,6 +158,7 @@ public int hashCode() { .append(timeoutSeconds) .append(ignoreSessions) .append(waitForAllSessions) + .append(skipWaitingCohEndangeredState) .toHashCode(); } } diff --git a/operator/src/main/resources/scripts/stop-server.py b/operator/src/main/resources/scripts/stop-server.py index 9f7e421ac98..61e0e0f5241 100755 --- a/operator/src/main/resources/scripts/stop-server.py +++ b/operator/src/main/resources/scripts/stop-server.py @@ -106,7 +106,9 @@ def checkCoherenceClusterExist(configData): def waitUntilCoherenceSafe(): print ('Shutdown: getting all service Coherence MBeans') query='Coherence:type=PartitionAssignment,service=*,*' - + if 'SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE' in os.environ and \ + os.environ['SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE'] == 'true': + return # By default, Coherence will use a single WebLogic Runtime MBean server to managed # its MBeans. That server will correspond to the current Coherence senior member, # which means that the Coherence MBeans will migrate to the oldest cluster member @@ -169,6 +171,10 @@ def waitUntilServiceSafeToShutdown(objectName): if status != "ENDANGERED": break + safe_status_ha = _checkCacheSafeStatusHA(objectName) + if safe_status_ha: + break + # Coherence caches are ENDANGERED meaning that we may lose data print ('Shutdown: Waiting until it is safe to shutdown Coherence server ...') systime.sleep(5) @@ -180,6 +186,57 @@ def waitUntilServiceSafeToShutdown(objectName): systime.sleep(10) pass +# Checking the Cache is safe to shutdown. The logic is incorporated from Coherence Operator +# OperatorRestServer.areCacheServicesHA. + +def _checkCacheSafeStatusHA(objectName): + try: + fields = ["StorageEnabled", "StorageEnabledCount", "BackupCount", "PartitionsAll", + "OwnedPartitionsPrimary", "StatusHA", "OutgoingTransferCount", "PartitionsEndangered", "PartitionsVulnerable" ] + + if objectName.getKeyProperty('service') == 'PartitionedCache': + partitioned_cache_query = "Coherence:name=PartitionedCache,type=Service,*" + partitioned_cache_mbeans = mbs.queryMBeans(ObjectName(partitioned_cache_query), None) + for item in partitioned_cache_mbeans: + fields_dict = {} + attrs = mbs.getAttributes(item.getObjectName(), fields) + for attr in attrs: + fields_dict.update({ attr.getName(): attr.getValue()}) + + storage_enabled = fields_dict['StorageEnabled'] + storage_enabled_count = fields_dict['StorageEnabledCount'] + backup_count = fields_dict['BackupCount'] + status_ha = fields_dict['StatusHA'] + outgoing_transfer_count = fields_dict['OutgoingTransferCount'] + partitions_endangered = fields_dict['PartitionsEndangered'] + partitions_vulnerable = fields_dict['PartitionsVulnerable'] + partitions_all = fields_dict['PartitionsAll'] + owned_partitions_primary = fields_dict['OwnedPartitionsPrimary'] + + if storage_enabled: + if storage_enabled_count > 1 and backup_count > 0 and "ENDANGERED" == status_ha: + print("StatusHA check failed. Service %s has HA status of %s" % (objectName, status_ha)) + return False + + if outgoing_transfer_count > 0: + print("StatusHA check failed. Service %s distribution in progress" % (objectName)) + return False + + persistent_query = "Coherence:service=PartitionedCache,type=Persistence,*" + persistent_mbeans = mbs.queryMBeans(ObjectName(persistent_query), None) + if persistent_mbeans.size() == 1: + idle = mbs.getAttribute(persistent_mbeans[0].getObjectName(), 'Idle') + if not idle: + print("StatusHA check failed. Service %s persistence is not idle" % (objectName)) + return False + + except: + print("Error _checkCacheSafeStatusHA function:") + dumpStack() + + return True + + #---------------------------------- # Main script diff --git a/operator/src/test/java/oracle/kubernetes/operator/helpers/ManagedPodHelperTest.java b/operator/src/test/java/oracle/kubernetes/operator/helpers/ManagedPodHelperTest.java index 4ae7527c626..8a7795d9c5d 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/helpers/ManagedPodHelperTest.java +++ b/operator/src/test/java/oracle/kubernetes/operator/helpers/ManagedPodHelperTest.java @@ -35,6 +35,7 @@ import oracle.kubernetes.operator.work.Step.StepAndPacket; import oracle.kubernetes.weblogic.domain.DomainConfigurator; import oracle.kubernetes.weblogic.domain.ServerConfigurator; +import oracle.kubernetes.weblogic.domain.model.Shutdown; import org.hamcrest.Description; import org.hamcrest.TypeSafeDiagnosingMatcher; import org.junit.jupiter.api.Disabled; @@ -1293,6 +1294,15 @@ void whenOnlyAdminAndSslPortsAvailable_monitoringExporterSpecifiesAdminPort() { both(hasJavaOption("-DWLS_PORT=8001")).and(hasJavaOption("-DWLS_SECURE=true"))); } + @Test + void whenDomainSetShutdownSkippingCoherenceEndangeredStateHasEnvSet() { + Shutdown shutdown = new Shutdown(); + shutdown.skipWaitingCohEndangeredState(true); + getConfigurator().withServerPodShutdownSpec(shutdown); + assertThat( + getCreatedPodSpecContainer().getEnv(), + allOf(hasEnvVar("SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE", "true"))); + } @Override void setServerPort(int port) { diff --git a/operator/src/test/java/oracle/kubernetes/weblogic/domain/DomainConfigurator.java b/operator/src/test/java/oracle/kubernetes/weblogic/domain/DomainConfigurator.java index c60f4c72532..ce439a31e0f 100644 --- a/operator/src/test/java/oracle/kubernetes/weblogic/domain/DomainConfigurator.java +++ b/operator/src/test/java/oracle/kubernetes/weblogic/domain/DomainConfigurator.java @@ -32,6 +32,7 @@ import oracle.kubernetes.weblogic.domain.model.DomainSpec; import oracle.kubernetes.weblogic.domain.model.InitializeDomainOnPV; import oracle.kubernetes.weblogic.domain.model.Model; +import oracle.kubernetes.weblogic.domain.model.Shutdown; /** * Configures a domain, adding settings independently of the version of the domain representation. @@ -424,6 +425,8 @@ public abstract DomainConfigurator withFluentdConfiguration(boolean watchIntrosp String credentialName, String fluentdConfig, List args, List command); + public abstract DomainConfigurator withServerPodShutdownSpec(Shutdown shutdown); + /** * Adds a default server configuration to the domain, if not already present. * diff --git a/operator/src/test/java/oracle/kubernetes/weblogic/domain/model/DomainCommonConfigurator.java b/operator/src/test/java/oracle/kubernetes/weblogic/domain/model/DomainCommonConfigurator.java index 2c5833ddcff..5c71172129b 100644 --- a/operator/src/test/java/oracle/kubernetes/weblogic/domain/model/DomainCommonConfigurator.java +++ b/operator/src/test/java/oracle/kubernetes/weblogic/domain/model/DomainCommonConfigurator.java @@ -239,6 +239,12 @@ public DomainConfigurator withFluentdConfiguration(boolean watchIntrospectorLog, return this; } + @Override + public DomainConfigurator withServerPodShutdownSpec(Shutdown shutdown) { + getDomainSpec().setShutdown(shutdown); + return this; + } + private AdminServer getOrCreateAdminServer() { return getDomainSpec().getOrCreateAdminServer(); } @@ -821,6 +827,7 @@ public ServerConfigurator withPriorityClassName(String priorityClassName) { getDomainSpec().setPriorityClassName(priorityClassName); return this; } + } class ClusterConfiguratorImpl implements ClusterConfigurator {