diff --git a/server/routerlicious/packages/lambdas/src/alfred/index.ts b/server/routerlicious/packages/lambdas/src/alfred/index.ts index 99c46c8288ab..10cacda1b3ae 100644 --- a/server/routerlicious/packages/lambdas/src/alfred/index.ts +++ b/server/routerlicious/packages/lambdas/src/alfred/index.ts @@ -244,6 +244,9 @@ export function configureWebSocketServices( let connectDocumentComplete: boolean = false; let connectDocumentP: Promise | undefined; + const clientIdConnectionsDisconnected = new Set(); + const clientIdClientsDisconnected = new Set(); + let disconnectDocumentP: Promise | undefined; // Timer to check token expiry for this socket connection let expirationTimer: NodeJS.Timer | undefined; @@ -621,6 +624,101 @@ export function configureWebSocketServices( }; } + async function disconnectDocument() { + // Clear token expiration timer on disconnection + clearExpirationTimer(); + const removeAndStoreP: Promise[] = []; + // Send notification messages for all client IDs in the connection map + for (const [clientId, connection] of connectionsMap) { + if (clientIdConnectionsDisconnected.has(clientId)) { + // We already removed this clientId once. Skip it. + continue; + } + const messageMetaData = getMessageMetadata( + connection.documentId, + connection.tenantId, + ); + logger.info(`Disconnect of ${clientId}`, { messageMetaData }); + Lumberjack.info( + `Disconnect of ${clientId}`, + getLumberBaseProperties(connection.documentId, connection.tenantId), + ); + + connection + .disconnect() + .then(() => { + // Keep track of disconnected clientIds so that we don't repeat the disconnect signal + // for the same clientId if retrying when connectDocument completes after disconnectDocument. + clientIdConnectionsDisconnected.add(clientId); + }) + .catch((error) => { + const errorMsg = `Failed to disconnect client ${clientId} from orderer connection.`; + Lumberjack.error( + errorMsg, + getLumberBaseProperties(connection.documentId, connection.tenantId), + error, + ); + }); + if (isClientConnectivityCountingEnabled && throttleAndUsageStorageManager) { + const connectionTimestamp = connectionTimeMap.get(clientId); + if (connectionTimestamp) { + removeAndStoreP.push( + storeClientConnectivityTime( + clientId, + connection.documentId, + connection.tenantId, + connectionTimestamp, + throttleAndUsageStorageManager, + ), + ); + } + } + } + // Send notification messages for all client IDs in the room map + for (const [clientId, room] of roomMap) { + if (clientIdClientsDisconnected.has(clientId)) { + // We already removed this clientId once. Skip it. + continue; + } + const messageMetaData = getMessageMetadata(room.documentId, room.tenantId); + // excluding summarizer for total client count. + if (connectionTimeMap.has(clientId)) { + connectionCountLogger.decrementConnectionCount(); + } + logger.info(`Disconnect of ${clientId} from room`, { messageMetaData }); + Lumberjack.info( + `Disconnect of ${clientId} from room`, + getLumberBaseProperties(room.documentId, room.tenantId), + ); + removeAndStoreP.push( + clientManager + .removeClient(room.tenantId, room.documentId, clientId) + .then(() => { + // Keep track of disconnected clientIds so that we don't repeat the disconnect signal + // for the same clientId if retrying when connectDocument completes after disconnectDocument. + clientIdClientsDisconnected.add(clientId); + }), + ); + socket + .emitToRoom(getRoomId(room), "signal", createRoomLeaveMessage(clientId)) + .catch((error) => { + const errorMsg = `Failed to emit signal to room ${clientId}, ${getRoomId( + room, + )}.`; + Lumberjack.error( + errorMsg, + getLumberBaseProperties(room.documentId, room.tenantId), + error, + ); + }); + } + // Clear socket tracker upon disconnection + if (socketTracker) { + socketTracker.removeSocket(socket.id); + } + await Promise.all(removeAndStoreP); + } + // Note connect is a reserved socket.io word so we use connect_document to represent the connect request // eslint-disable-next-line @typescript-eslint/no-misused-promises socket.on("connect_document", async (connectionMessage: IConnect) => { @@ -667,6 +765,62 @@ export function configureWebSocketServices( }) .finally(() => { connectDocumentComplete = true; + if (disconnectDocumentP) { + Lumberjack.warning( + `ConnectDocument completed after disconnect was handled.`, + ); + // We have already received disconnect for this connection. + disconnectDocumentP.finally(() => { + // We might need to re-run disconnect handler after previous disconnect handler completes. + // DisconnectDocument internally handles the cases where we have already run disconnect for + // roomsMap and connectionsMap so that we don't duplicate disconnect efforts. + // The primary need for this retry is when we receive "disconnect" in the narrow window after + // "connect_document" but before "connectDocumentP" is defined. + const alreadyDisconnectedAllConnections: boolean = + connectionsMap.size === clientIdConnectionsDisconnected.size; + const alreadyDisconnectedAllClients: boolean = + roomMap.size === clientIdClientsDisconnected.size; + if ( + alreadyDisconnectedAllConnections && + alreadyDisconnectedAllClients + ) { + // Don't retry disconnect if all connections and clients are already handled. + return; + } + + const disconnectRetryMetric = Lumberjack.newLumberMetric( + LumberEventName.DisconnectDocumentRetry, + ); + disconnectRetryMetric.setProperties({ + [CommonProperties.connectionCount]: connectionsMap.size, + [CommonProperties.connectionClients]: JSON.stringify( + Array.from(connectionsMap.keys()), + ), + [CommonProperties.roomClients]: JSON.stringify( + Array.from(roomMap.keys()), + ), + }); + + if (roomMap.size >= 1) { + const rooms = Array.from(roomMap.values()); + const documentId = rooms[0].documentId; + const tenantId = rooms[0].tenantId; + disconnectRetryMetric.setProperties({ + ...getLumberBaseProperties(documentId, tenantId), + }); + } + + disconnectDocument() + .then(() => { + disconnectRetryMetric.success( + `Successfully retried disconnect.`, + ); + }) + .catch((error) => { + disconnectRetryMetric.error(`Disconnect retry failed.`, error); + }); + }); + } }); }); @@ -896,77 +1050,8 @@ export function configureWebSocketServices( } try { - clearExpirationTimer(); - const removeAndStoreP: Promise[] = []; - // Send notification messages for all client IDs in the connection map - for (const [clientId, connection] of connectionsMap) { - const messageMetaData = getMessageMetadata( - connection.documentId, - connection.tenantId, - ); - logger.info(`Disconnect of ${clientId}`, { messageMetaData }); - Lumberjack.info( - `Disconnect of ${clientId}`, - getLumberBaseProperties(connection.documentId, connection.tenantId), - ); - - connection.disconnect().catch((error) => { - const errorMsg = `Failed to disconnect client ${clientId} from orderer connection.`; - Lumberjack.error( - errorMsg, - getLumberBaseProperties(connection.documentId, connection.tenantId), - error, - ); - }); - if (isClientConnectivityCountingEnabled && throttleAndUsageStorageManager) { - const connectionTimestamp = connectionTimeMap.get(clientId); - if (connectionTimestamp) { - removeAndStoreP.push( - storeClientConnectivityTime( - clientId, - connection.documentId, - connection.tenantId, - connectionTimestamp, - throttleAndUsageStorageManager, - ), - ); - } - } - } - // Send notification messages for all client IDs in the room map - for (const [clientId, room] of roomMap) { - const messageMetaData = getMessageMetadata(room.documentId, room.tenantId); - // excluding summarizer for total client count. - if (connectionTimeMap.has(clientId)) { - connectionCountLogger.decrementConnectionCount(); - } - logger.info(`Disconnect of ${clientId} from room`, { messageMetaData }); - Lumberjack.info( - `Disconnect of ${clientId} from room`, - getLumberBaseProperties(room.documentId, room.tenantId), - ); - removeAndStoreP.push( - clientManager.removeClient(room.tenantId, room.documentId, clientId), - ); - socket - .emitToRoom(getRoomId(room), "signal", createRoomLeaveMessage(clientId)) - .catch((error) => { - const errorMsg = `Failed to emit signal to room ${clientId}, ${getRoomId( - room, - )}.`; - Lumberjack.error( - errorMsg, - getLumberBaseProperties(room.documentId, room.tenantId), - error, - ); - }); - } - // Clear socket tracker upon disconnection - if (socketTracker) { - socketTracker.removeSocket(socket.id); - } - await Promise.all(removeAndStoreP); - + disconnectDocumentP = disconnectDocument(); + await disconnectDocumentP; disconnectMetric.success(`Successfully disconnected.`); } catch (error) { disconnectMetric.error(`Disconnect failed.`, error); diff --git a/server/routerlicious/packages/services-telemetry/src/lumberEventNames.ts b/server/routerlicious/packages/services-telemetry/src/lumberEventNames.ts index 4a4f1c42cce7..859296752353 100644 --- a/server/routerlicious/packages/services-telemetry/src/lumberEventNames.ts +++ b/server/routerlicious/packages/services-telemetry/src/lumberEventNames.ts @@ -42,6 +42,7 @@ export enum LumberEventName { CreateDocumentUpdateDocumentCollection = "CreateDocumentUpdateDocumentCollection", CreateDocInitialSummaryWrite = "CreateDocInitialSummaryWrite", DisconnectDocument = "DisconnectDocument", + DisconnectDocumentRetry = "DisconnectDocumentRetry", RiddlerFetchTenantKey = "RiddlerFetchTenantKey", HttpRequest = "HttpRequest", TotalConnectionCount = "TotalConnectionCount",