Skip to content

Commit

Permalink
Merge branch 'owls-112700-cohchk-stopscript' into 'main'
Browse files Browse the repository at this point in the history
Add option to skip checking for coherence HAStatus of ENDANGERED

See merge request weblogic-cloud/weblogic-kubernetes-operator!4478
  • Loading branch information
rjeberhard committed Nov 1, 2023
2 parents 2661cf2 + 9e3d6f9 commit 814e18f
Show file tree
Hide file tree
Showing 15 changed files with 175 additions and 6 deletions.
5 changes: 5 additions & 0 deletions documentation/domains/Cluster.json
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@
"Shutdown": {
"type": "object",
"properties": {
"skipWaitingCohEndangeredState": {
"default": false,
"description": "For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false.",
"type": "boolean"
},
"ignoreSessions": {
"default": false,
"description": "For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false.",
Expand Down
1 change: 1 addition & 0 deletions documentation/domains/Cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ The specification of the operation of the WebLogic cluster. Required.
| --- | --- | --- |
| `ignoreSessions` | Boolean | For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false. |
| `shutdownType` | string | Specifies how the operator will shut down server instances. Legal values are `Graceful` and `Forced`. Defaults to `Graceful`. |
| `skipWaitingCohEndangeredState` | Boolean | For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false. |
| `timeoutSeconds` | integer | For graceful shutdown only, number of seconds to wait before aborting in-flight work and shutting down the server. Defaults to 30 seconds. |
| `waitForAllSessions` | Boolean | For graceful shutdown only, set to true to wait for all HTTP sessions during in-flight work handling; false to wait for non-persisted HTTP sessions only. Defaults to false. |

Expand Down
5 changes: 5 additions & 0 deletions documentation/domains/Domain.json
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,11 @@
"Shutdown": {
"type": "object",
"properties": {
"skipWaitingCohEndangeredState": {
"default": false,
"description": "For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false.",
"type": "boolean"
},
"ignoreSessions": {
"default": false,
"description": "For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false.",
Expand Down
1 change: 1 addition & 0 deletions documentation/domains/Domain.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ The current status of the operation of the WebLogic domain. Updated automaticall
| --- | --- | --- |
| `ignoreSessions` | Boolean | For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false. |
| `shutdownType` | string | Specifies how the operator will shut down server instances. Legal values are `Graceful` and `Forced`. Defaults to `Graceful`. |
| `skipWaitingCohEndangeredState` | Boolean | For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false. |
| `timeoutSeconds` | integer | For graceful shutdown only, number of seconds to wait before aborting in-flight work and shutting down the server. Defaults to 30 seconds. |
| `waitForAllSessions` | Boolean | For graceful shutdown only, set to true to wait for all HTTP sessions during in-flight work handling; false to wait for non-persisted HTTP sessions only. Defaults to false. |

Expand Down
10 changes: 9 additions & 1 deletion kubernetes/crd/cluster-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
weblogic.sha256: 9ac551460201264f4eb69fabd7c54120c2e30492143ad81f47965567f2a7ae79
weblogic.sha256: 7146de067298f75bf803e2c8acf9f88980d1d5770e375122bcbb1fe4e6f9a64b
name: clusters.weblogic.oracle
spec:
group: weblogic.oracle
Expand Down Expand Up @@ -2390,6 +2390,14 @@ spec:
description: Configures how the operator should shut down the
server instance.
properties:
skipWaitingCohEndangeredState:
default: false
description: For graceful shutdown only, set to true to skip
waiting for Coherence Cache Cluster service MBean HAStatus
in safe state before shutdown. By default, the operator
will wait until it is safe to shutdown the Coherence Cache
Cluster. Defaults to false.
type: boolean
ignoreSessions:
default: false
description: For graceful shutdown only, indicates to ignore
Expand Down
26 changes: 25 additions & 1 deletion kubernetes/crd/domain-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
weblogic.sha256: 1b5fbb6128eb44897c07ef1304cf64bc8682f7f4ef2454dde6e861a04941b635
weblogic.sha256: d33dc80911f1901a703fe95438c2e4fc39271775bfecddd35893f8422d97caa3
name: domains.weblogic.oracle
spec:
group: weblogic.oracle
Expand Down Expand Up @@ -3331,6 +3331,14 @@ spec:
description: Configures how the operator should shut down
the server instance.
properties:
skipWaitingCohEndangeredState:
default: false
description: For graceful shutdown only, set to true to
skip waiting for Coherence Cache Cluster service MBean
HAStatus in safe state before shutdown. By default,
the operator will wait until it is safe to shutdown
the Coherence Cache Cluster. Defaults to false.
type: boolean
ignoreSessions:
default: false
description: For graceful shutdown only, indicates to
Expand Down Expand Up @@ -6414,6 +6422,14 @@ spec:
description: Configures how the operator should shut down the
server instance.
properties:
skipWaitingCohEndangeredState:
default: false
description: For graceful shutdown only, set to true to skip
waiting for Coherence Cache Cluster service MBean HAStatus
in safe state before shutdown. By default, the operator
will wait until it is safe to shutdown the Coherence Cache
Cluster. Defaults to false.
type: boolean
ignoreSessions:
default: false
description: For graceful shutdown only, indicates to ignore
Expand Down Expand Up @@ -9155,6 +9171,14 @@ spec:
the server instance.
type: object
properties:
skipWaitingCohEndangeredState:
default: false
description: For graceful shutdown only, set to true
to skip waiting for Coherence Cache Cluster service
MBean HAStatus in safe state before shutdown. By default,
the operator will wait until it is safe to shutdown
the Coherence Cache Cluster. Defaults to false.
type: boolean
ignoreSessions:
default: false
description: For graceful shutdown only, indicates to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,11 @@ private void updateEnvForShutdown(List<V1EnvVar> env) {
addDefaultEnvVarIfMissing(env, "SHUTDOWN_WAIT_FOR_ALL_SESSIONS",
String.valueOf(shutdown.getWaitForAllSessions()));
}
if (!shutdown.getSkipWaitingCohEndangeredState()
.equals(Shutdown.DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE)) {
addDefaultEnvVarIfMissing(env, "SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE",
String.valueOf(shutdown.getSkipWaitingCohEndangeredState()));
}
}

private Shutdown getShutdownSpec() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,16 @@ protected PortDetails getPortDetails() {
private Integer getWlsServerPort() {
Integer listenPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getListenPort)
.orElse(null);

Integer adminPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getAdminPort)
.orElse(null);
Integer sslListenPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getSslListenPort)
.orElse(null);
if (adminPort != null) {
return adminPort;
}
if (sslListenPort != null) {
return sslListenPort;
}
if (listenPort == null) {
// This can only happen if the running server pod does not exist in the WLS Domain.
// This is a rare case where the server was deleted from the WLS Domain config.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,12 @@ void addLimitRequirement(String resource, String quantity) {
serverPod.addLimitRequirement(resource, quantity);
}

void setShutdown(Shutdown shutdown) {
serverPod.setShutdown(shutdown.getShutdownType(),
shutdown.getTimeoutSeconds(), shutdown.getIgnoreSessions(), shutdown.getWaitForAllSessions(),
shutdown.getSkipWaitingCohEndangeredState());
}

V1PodSecurityContext getPodSecurityContext() {
return Optional.ofNullable(serverPod.getPodSecurityContext()).orElse(getDefaultPodSecurityContext());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -362,12 +362,14 @@ Shutdown getShutdown() {
return this.shutdown;
}

void setShutdown(ShutdownType shutdownType, Long timeoutSeconds, Boolean ignoreSessions, Boolean waitForAllSessions) {
void setShutdown(ShutdownType shutdownType, Long timeoutSeconds, Boolean ignoreSessions, Boolean waitForAllSessions,
Boolean skipWaitingCohEndangeredState) {
this.shutdown
.shutdownType(shutdownType)
.timeoutSeconds(timeoutSeconds)
.ignoreSessions(ignoreSessions)
.waitForAllSessions(waitForAllSessions);
.waitForAllSessions(waitForAllSessions)
.skipWaitingCohEndangeredState(skipWaitingCohEndangeredState);
}

ProbeTuning getReadinessProbeTuning() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public class Shutdown {
public static final Long DEFAULT_TIMEOUT = 30L;
public static final Boolean DEFAULT_IGNORESESSIONS = Boolean.FALSE;
public static final Boolean DEFAULT_WAIT_FOR_ALL_SESSIONS = Boolean.FALSE;
public static final Boolean DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE = Boolean.FALSE;

@Description(
"Specifies how the operator will shut down server instances."
Expand Down Expand Up @@ -44,6 +45,14 @@ public class Shutdown {
@Default(boolDefault = false)
private Boolean waitForAllSessions;

@Description(
"For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus"
+ " in safe state before shutdown. By default, the operator will wait until it is"
+ " safe to shutdown the Coherence Cache Cluster."
+ " Defaults to false.")
@Default(boolDefault = false)
private Boolean skipWaitingCohEndangeredState;

void copyValues(Shutdown fromShutdown) {
if (shutdownType == null) {
shutdownType(fromShutdown.shutdownType);
Expand All @@ -58,6 +67,10 @@ void copyValues(Shutdown fromShutdown) {
if (waitForAllSessions == null) {
waitForAllSessions(fromShutdown.waitForAllSessions);
}

if (skipWaitingCohEndangeredState == null) {
skipWaitingCohEndangeredState(fromShutdown.skipWaitingCohEndangeredState);
}
}

public ShutdownType getShutdownType() {
Expand Down Expand Up @@ -96,13 +109,24 @@ public Shutdown waitForAllSessions(Boolean waitForAllSessions) {
return this;
}

public Boolean getSkipWaitingCohEndangeredState() {
return Optional.ofNullable(skipWaitingCohEndangeredState)
.orElse(DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE);
}

public Shutdown skipWaitingCohEndangeredState(Boolean skipWaitingCohEndangeredState) {
this.skipWaitingCohEndangeredState = skipWaitingCohEndangeredState;
return this;
}

@Override
public String toString() {
return new ToStringBuilder(this)
.append("shutdownType", shutdownType)
.append("timeoutSeconds", timeoutSeconds)
.append("ignoreSessions", ignoreSessions)
.append("waitForAllSessions", waitForAllSessions)
.append("skipWaitingCohEndangeredState", skipWaitingCohEndangeredState)
.toString();
}

Expand All @@ -123,6 +147,7 @@ public boolean equals(Object o) {
.append(timeoutSeconds, that.timeoutSeconds)
.append(ignoreSessions, that.ignoreSessions)
.append(waitForAllSessions, that.waitForAllSessions)
.append(skipWaitingCohEndangeredState, that.skipWaitingCohEndangeredState)
.isEquals();
}

Expand All @@ -133,6 +158,7 @@ public int hashCode() {
.append(timeoutSeconds)
.append(ignoreSessions)
.append(waitForAllSessions)
.append(skipWaitingCohEndangeredState)
.toHashCode();
}
}
59 changes: 58 additions & 1 deletion operator/src/main/resources/scripts/stop-server.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def checkCoherenceClusterExist(configData):
def waitUntilCoherenceSafe():
print ('Shutdown: getting all service Coherence MBeans')
query='Coherence:type=PartitionAssignment,service=*,*'

if 'SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE' in os.environ and \
os.environ['SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE'] == 'true':
return
# By default, Coherence will use a single WebLogic Runtime MBean server to managed
# its MBeans. That server will correspond to the current Coherence senior member,
# which means that the Coherence MBeans will migrate to the oldest cluster member
Expand Down Expand Up @@ -169,6 +171,10 @@ def waitUntilServiceSafeToShutdown(objectName):
if status != "ENDANGERED":
break

safe_status_ha = _checkCacheSafeStatusHA(objectName)
if safe_status_ha:
break

# Coherence caches are ENDANGERED meaning that we may lose data
print ('Shutdown: Waiting until it is safe to shutdown Coherence server ...')
systime.sleep(5)
Expand All @@ -180,6 +186,57 @@ def waitUntilServiceSafeToShutdown(objectName):
systime.sleep(10)
pass

# Checking the Cache is safe to shutdown. The logic is incorporated from Coherence Operator
# OperatorRestServer.areCacheServicesHA.

def _checkCacheSafeStatusHA(objectName):
try:
fields = ["StorageEnabled", "StorageEnabledCount", "BackupCount", "PartitionsAll",
"OwnedPartitionsPrimary", "StatusHA", "OutgoingTransferCount", "PartitionsEndangered", "PartitionsVulnerable" ]

if objectName.getKeyProperty('service') == 'PartitionedCache':
partitioned_cache_query = "Coherence:name=PartitionedCache,type=Service,*"
partitioned_cache_mbeans = mbs.queryMBeans(ObjectName(partitioned_cache_query), None)
for item in partitioned_cache_mbeans:
fields_dict = {}
attrs = mbs.getAttributes(item.getObjectName(), fields)
for attr in attrs:
fields_dict.update({ attr.getName(): attr.getValue()})

storage_enabled = fields_dict['StorageEnabled']
storage_enabled_count = fields_dict['StorageEnabledCount']
backup_count = fields_dict['BackupCount']
status_ha = fields_dict['StatusHA']
outgoing_transfer_count = fields_dict['OutgoingTransferCount']
partitions_endangered = fields_dict['PartitionsEndangered']
partitions_vulnerable = fields_dict['PartitionsVulnerable']
partitions_all = fields_dict['PartitionsAll']
owned_partitions_primary = fields_dict['OwnedPartitionsPrimary']

if storage_enabled:
if storage_enabled_count > 1 and backup_count > 0 and "ENDANGERED" == status_ha:
print("StatusHA check failed. Service %s has HA status of %s" % (objectName, status_ha))
return False

if outgoing_transfer_count > 0:
print("StatusHA check failed. Service %s distribution in progress" % (objectName))
return False

persistent_query = "Coherence:service=PartitionedCache,type=Persistence,*"
persistent_mbeans = mbs.queryMBeans(ObjectName(persistent_query), None)
if persistent_mbeans.size() == 1:
idle = mbs.getAttribute(persistent_mbeans[0].getObjectName(), 'Idle')
if not idle:
print("StatusHA check failed. Service %s persistence is not idle" % (objectName))
return False

except:
print("Error _checkCacheSafeStatusHA function:")
dumpStack()

return True



#----------------------------------
# Main script
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import oracle.kubernetes.operator.work.Step.StepAndPacket;
import oracle.kubernetes.weblogic.domain.DomainConfigurator;
import oracle.kubernetes.weblogic.domain.ServerConfigurator;
import oracle.kubernetes.weblogic.domain.model.Shutdown;
import org.hamcrest.Description;
import org.hamcrest.TypeSafeDiagnosingMatcher;
import org.junit.jupiter.api.Disabled;
Expand Down Expand Up @@ -1293,6 +1294,15 @@ void whenOnlyAdminAndSslPortsAvailable_monitoringExporterSpecifiesAdminPort() {
both(hasJavaOption("-DWLS_PORT=8001")).and(hasJavaOption("-DWLS_SECURE=true")));
}

@Test
void whenDomainSetShutdownSkippingCoherenceEndangeredStateHasEnvSet() {
Shutdown shutdown = new Shutdown();
shutdown.skipWaitingCohEndangeredState(true);
getConfigurator().withServerPodShutdownSpec(shutdown);
assertThat(
getCreatedPodSpecContainer().getEnv(),
allOf(hasEnvVar("SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE", "true")));
}

@Override
void setServerPort(int port) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import oracle.kubernetes.weblogic.domain.model.DomainSpec;
import oracle.kubernetes.weblogic.domain.model.InitializeDomainOnPV;
import oracle.kubernetes.weblogic.domain.model.Model;
import oracle.kubernetes.weblogic.domain.model.Shutdown;

/**
* Configures a domain, adding settings independently of the version of the domain representation.
Expand Down Expand Up @@ -424,6 +425,8 @@ public abstract DomainConfigurator withFluentdConfiguration(boolean watchIntrosp
String credentialName, String fluentdConfig,
List<String> args, List<String> command);

public abstract DomainConfigurator withServerPodShutdownSpec(Shutdown shutdown);

/**
* Adds a default server configuration to the domain, if not already present.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,12 @@ public DomainConfigurator withFluentdConfiguration(boolean watchIntrospectorLog,
return this;
}

@Override
public DomainConfigurator withServerPodShutdownSpec(Shutdown shutdown) {
getDomainSpec().setShutdown(shutdown);
return this;
}

private AdminServer getOrCreateAdminServer() {
return getDomainSpec().getOrCreateAdminServer();
}
Expand Down Expand Up @@ -821,6 +827,7 @@ public ServerConfigurator withPriorityClassName(String priorityClassName) {
getDomainSpec().setPriorityClassName(priorityClassName);
return this;
}

}

class ClusterConfiguratorImpl implements ClusterConfigurator {
Expand Down

0 comments on commit 814e18f

Please sign in to comment.