diff --git a/.gitignore b/.gitignore index b805198025..55234644ac 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,10 @@ # Generated trace files *.lft +util/tracing/trace_to_chrome +util/tracing/trace_to_chrome.o +util/tracing/trace_to_csv +util/tracing/trace_to_csv.o +util/tracing/trace_to_influxdb +util/tracing/trace_to_influxdb.o +util/tracing/trace_util.o diff --git a/CMakeLists.txt b/CMakeLists.txt index 9eadfeda2d..7a088cc0c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,12 +24,14 @@ endif() set(Test test) set(Lib lib) -set(CoreLib core) +set(CoreLibPath core) +set(CoreLib reactor-c) set(PlatformLib platform) include_directories(${CMAKE_SOURCE_DIR}/include) include_directories(${CMAKE_SOURCE_DIR}/include/core) include_directories(${CMAKE_SOURCE_DIR}/include/core/federated) +include_directories(${CMAKE_SOURCE_DIR}/include/core/federated/network) include_directories(${CMAKE_SOURCE_DIR}/include/core/modal_models) include_directories(${CMAKE_SOURCE_DIR}/include/core/platform) include_directories(${CMAKE_SOURCE_DIR}/include/core/threaded) @@ -39,6 +41,6 @@ include_directories(${CMAKE_SOURCE_DIR}/include/api) enable_testing() add_subdirectory(${Test}) add_subdirectory(${Lib}) -add_subdirectory(${CoreLib}) +add_subdirectory(${CoreLibPath}) include(test/Tests.cmake) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 16bf0e6166..1279b8a321 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -9,23 +9,20 @@ if (DEFINED LF_TRACE) list(APPEND GENERAL_SOURCES trace.c) endif() -# Store all sources used to build the reactor-c lib in INFO_SOURCES -list(APPEND INFO_SOURCES ${GENERAL_SOURCES}) - -# Create the core library -add_library(core ${GENERAL_SOURCES}) +# Add the general sources to the list of REACTORC_SOURCES +list(APPEND REACTORC_SOURCES ${GENERAL_SOURCES}) # Add sources for either threaded or single-threaded runtime if (DEFINED FEDERATED) include(federated/CMakeLists.txt) + include(federated/network/CMakeLists.txt) endif() # Add sources for either threaded or single-threaded runtime if(DEFINED LF_SINGLE_THREADED) message(STATUS "Including sources for single-threaded runtime.") list(APPEND SINGLE_THREADED_SOURCES reactor.c) - target_sources(core PRIVATE ${SINGLE_THREADED_SOURCES}) - list(APPEND INFO_SOURCES ${SINGLE_THREADED_SOURCES}) + list(APPEND REACTORC_SOURCES ${SINGLE_THREADED_SOURCES}) else() message(STATUS "Including sources for threaded runtime with \ ${NUMBER_OF_WORKERS} worker(s) with scheduler=${SCHEDULER} and \ @@ -33,6 +30,10 @@ tracing=${LF_TRACE}.") include(threaded/CMakeLists.txt) endif() +# Add sources for the local RTI if we are using scheduling enclaves +if(DEFINED LF_ENCLAVES) +include(federated/RTI/local_rti.cmake) +endif() # Include sources from subdirectories include(utils/CMakeLists.txt) @@ -40,16 +41,32 @@ include(modal_models/CMakeLists.txt) include(platform/CMakeLists.txt) # Print sources used for compilation -list(JOIN INFO_SOURCES ", " PRINTABLE_SOURCE_LIST) +list(JOIN REACTORC_SOURCES ", " PRINTABLE_SOURCE_LIST) message(STATUS "Including the following sources: " ${PRINTABLE_SOURCE_LIST}) -target_include_directories(core PUBLIC ../include) -target_include_directories(core PUBLIC ../include/core) -target_include_directories(core PUBLIC ../include/core/federated) -target_include_directories(core PUBLIC ../include/core/platform) -target_include_directories(core PUBLIC ../include/core/modal_models) -target_include_directories(core PUBLIC ../include/core/threaded) -target_include_directories(core PUBLIC ../include/core/utils) +# Create the reactor-c library. If we are targeting Zephyr we have to use the +# Zephyr Cmake extension to create the library and add the sources. +if(PLATFORM_ZEPHYR) + message("--- Building Zephyr library") + zephyr_library_named(reactor-c) + zephyr_library_sources(${REACTORC_SOURCES}) + zephyr_library_link_libraries(kernel) +else() + add_library(reactor-c ${REACTORC_SOURCES}) +endif() + +# Apply compile definitions to the reactor-c library. +target_compile_definitions(reactor-c PUBLIC ${REACTORC_COMPILE_DEFS}) + +target_include_directories(reactor-c PUBLIC ../include) +target_include_directories(reactor-c PUBLIC ../include/core) +target_include_directories(reactor-c PUBLIC ../include/core/federated) +target_include_directories(reactor-c PUBLIC ../include/core/federated/network) +target_include_directories(reactor-c PUBLIC ../include/core/platform) +target_include_directories(reactor-c PUBLIC ../include/core/modal_models) +target_include_directories(reactor-c PUBLIC ../include/core/threaded) +target_include_directories(reactor-c PUBLIC ../include/core/utils) +target_include_directories(reactor-c PUBLIC federated/RTI/) if (APPLE) SET(CMAKE_C_ARCHIVE_CREATE " Scr ") @@ -64,38 +81,38 @@ if(DEFINED FEDERATED_AUTHENTICATED) set(OPENSSL_ROOT_DIR /usr/local/opt/openssl) endif() find_package(OpenSSL REQUIRED) - target_link_libraries(core PUBLIC OpenSSL::SSL) + target_link_libraries(reactor-c PUBLIC OpenSSL::SSL) endif() if(DEFINED _LF_CLOCK_SYNC_ON) find_library(MATH_LIBRARY m) if(MATH_LIBRARY) - target_link_libraries(core PUBLIC ${MATH_LIBRARY}) + target_link_libraries(reactor-c PUBLIC ${MATH_LIBRARY}) endif() endif() -# Link with thread library, unless if we are targeting the Zephyr RTOS +# Link with thread library, unless we are on the Zephyr platform. if(NOT DEFINED LF_SINGLE_THREADED OR DEFINED LF_TRACE) - if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Zephyr") + if (NOT PLATFORM_ZEPHYR) find_package(Threads REQUIRED) - target_link_libraries(core PUBLIC Threads::Threads) + target_link_libraries(reactor-c PUBLIC Threads::Threads) endif() endif() # Macro for translating a command-line argument into compile definition for -# core lib +# reactor-c lib macro(define X) if(DEFINED ${X}) message(STATUS ${X}=${${X}}) - target_compile_definitions(core PUBLIC ${X}=${${X}}) + target_compile_definitions(reactor-c PUBLIC ${X}=${${X}}) endif(DEFINED ${X}) endmacro() # FIXME: May want these to be application dependent, hence passed as # parameters to Cmake. -target_compile_definitions(core PRIVATE INITIAL_EVENT_QUEUE_SIZE=10) -target_compile_definitions(core PRIVATE INITIAL_REACT_QUEUE_SIZE=10) -target_compile_definitions(core PUBLIC PLATFORM_${CMAKE_SYSTEM_NAME}) +target_compile_definitions(reactor-c PRIVATE INITIAL_EVENT_QUEUE_SIZE=10) +target_compile_definitions(reactor-c PRIVATE INITIAL_REACT_QUEUE_SIZE=10) +target_compile_definitions(reactor-c PUBLIC PLATFORM_${CMAKE_SYSTEM_NAME}) # Search and apply all possible compile definitions message(STATUS "Applying preprocessor definitions...") @@ -123,3 +140,5 @@ define(SCHEDULER) define(LF_SOURCE_DIRECTORY) define(LF_PACKAGE_DIRECTORY) define(LF_FILE_SEPARATOR) +define(WORKERS_NEEDED_FOR_FEDERATE) +define(LF_ENCLAVES) diff --git a/core/environment.c b/core/environment.c index 4ca79c11ed..7d5981181c 100644 --- a/core/environment.c +++ b/core/environment.c @@ -45,7 +45,7 @@ static void environment_init_threaded(environment_t* env, int num_workers) { #if !defined(LF_SINGLE_THREADED) env->num_workers = num_workers; env->thread_ids = (lf_thread_t*)calloc(num_workers, sizeof(lf_thread_t)); - lf_assert(env->thread_ids != NULL, "Out of memory"); + LF_ASSERT(env->thread_ids, "Out of memory"); env->barrier.requestors = 0; env->barrier.horizon = FOREVER_TAG; @@ -84,15 +84,19 @@ static void environment_init_modes(environment_t* env, int num_modes, int num_st #ifdef MODAL_REACTORS if (num_modes > 0) { mode_environment_t* modes = (mode_environment_t *) calloc(1, sizeof(mode_environment_t)); - lf_assert(modes != NULL, "Out of memory"); + LF_ASSERT(modes, "Out of memory"); modes->modal_reactor_states = (reactor_mode_state_t**) calloc(num_modes, sizeof(reactor_mode_state_t*)); - lf_assert(modes->modal_reactor_states != NULL, "Out of memory"); + LF_ASSERT(modes->modal_reactor_states, "Out of memory"); modes->modal_reactor_states_size = num_modes; modes->triggered_reactions_request = 0; - modes->state_resets = (mode_state_variable_reset_data_t *) calloc(num_state_resets, sizeof(mode_state_variable_reset_data_t)); - lf_assert(modes->state_resets != NULL, "Out of memory"); modes->state_resets_size = num_state_resets; + if (modes->state_resets_size > 0) { + modes->state_resets = (mode_state_variable_reset_data_t *) calloc(modes->state_resets_size, sizeof(mode_state_variable_reset_data_t)); + LF_ASSERT(modes->state_resets, "Out of memory"); + } else { + modes->state_resets = NULL; + } env->modes = modes; @@ -107,9 +111,13 @@ static void environment_init_modes(environment_t* env, int num_modes, int num_st */ static void environment_init_federated(environment_t* env, int num_is_present_fields) { #ifdef FEDERATED_DECENTRALIZED - env->_lf_intended_tag_fields = (tag_t**) calloc(num_is_present_fields, sizeof(tag_t*)); - lf_assert(env->_lf_intended_tag_fields != NULL, "Out of memory"); - env->_lf_intended_tag_fields_size = num_is_present_fields; + if (num_is_present_fields > 0) { + env->_lf_intended_tag_fields = (tag_t**) calloc(num_is_present_fields, sizeof(tag_t*)); + LF_ASSERT(env->_lf_intended_tag_fields, "Out of memory"); + env->_lf_intended_tag_fields_size = num_is_present_fields; + } else { + env->_lf_intended_tag_fields_size = NULL; + } #endif } @@ -155,6 +163,7 @@ static void environment_free_federated(environment_t* env) { } void environment_free(environment_t* env) { + free(env->name); free(env->timer_triggers); free(env->startup_reactions); free(env->shutdown_reactions); @@ -175,6 +184,7 @@ void environment_free(environment_t* env) { int environment_init( environment_t* env, + const char *name, int id, int num_workers, int num_timers, @@ -187,33 +197,57 @@ int environment_init( const char * trace_file_name ) { + env->name = malloc(strlen(name) + 1); // +1 for the null terminator + LF_ASSERT(env->name, "Out of memory"); + strcpy(env->name, name); + env->id = id; env->stop_tag = FOREVER_TAG; env->timer_triggers_size=num_timers; - env->timer_triggers = (trigger_t **) calloc(num_timers, sizeof(trigger_t)); - lf_assert(env->timer_triggers != NULL, "Out of memory"); + if(env->timer_triggers_size > 0) { + env->timer_triggers = (trigger_t **) calloc(num_timers, sizeof(trigger_t)); + LF_ASSERT(env->timer_triggers, "Out of memory"); + } else { + env->timer_triggers = NULL; + } env->startup_reactions_size=num_startup_reactions; - env->startup_reactions = (reaction_t **) calloc(num_startup_reactions, sizeof(reaction_t)); - lf_assert(env->startup_reactions != NULL, "Out of memory"); + if (env->startup_reactions_size > 0) { + env->startup_reactions = (reaction_t **) calloc(num_startup_reactions, sizeof(reaction_t)); + LF_ASSERT(env->startup_reactions, "Out of memory"); + } else { + env->startup_reactions = NULL; + } env->shutdown_reactions_size=num_shutdown_reactions; - env->shutdown_reactions = (reaction_t **) calloc(num_shutdown_reactions, sizeof(reaction_t)); - lf_assert(env->shutdown_reactions != NULL, "Out of memory"); + if(env->shutdown_reactions_size > 0) { + env->shutdown_reactions = (reaction_t **) calloc(num_shutdown_reactions, sizeof(reaction_t)); + LF_ASSERT(env->shutdown_reactions, "Out of memory"); + } else { + env->shutdown_reactions = NULL; + } env->reset_reactions_size=num_reset_reactions; - env->reset_reactions = (reaction_t **) calloc(num_reset_reactions, sizeof(reaction_t)); - lf_assert(env->reset_reactions != NULL, "Out of memory"); + if (env->reset_reactions_size > 0) { + env->reset_reactions = (reaction_t **) calloc(num_reset_reactions, sizeof(reaction_t)); + LF_ASSERT(env->reset_reactions, "Out of memory"); + } else { + env->reset_reactions = NULL; + } env->is_present_fields_size = num_is_present_fields; env->is_present_fields_abbreviated_size = 0; - env->is_present_fields = (bool**)calloc(num_is_present_fields, sizeof(bool*)); - lf_assert(env->is_present_fields != NULL, "Out of memory"); - - env->is_present_fields_abbreviated = (bool**)calloc(num_is_present_fields, sizeof(bool*)); - lf_assert(env->is_present_fields_abbreviated != NULL, "Out of memory"); + if (env->is_present_fields_size > 0) { + env->is_present_fields = (bool**)calloc(num_is_present_fields, sizeof(bool*)); + LF_ASSERT(env->is_present_fields, "Out of memory"); + env->is_present_fields_abbreviated = (bool**)calloc(num_is_present_fields, sizeof(bool*)); + LF_ASSERT(env->is_present_fields_abbreviated, "Out of memory"); + } else { + env->is_present_fields = NULL; + env->is_present_fields_abbreviated = NULL; + } env->_lf_handle=1; diff --git a/core/federated/CMakeLists.txt b/core/federated/CMakeLists.txt index d5af17c2be..1366462854 100644 --- a/core/federated/CMakeLists.txt +++ b/core/federated/CMakeLists.txt @@ -1,5 +1,4 @@ -set(FEDERATED_SOURCES clock-sync.c federate.c net_util.c) -list(APPEND INFO_SOURCES ${FEDERATED_SOURCES}) +set(FEDERATED_SOURCES clock-sync.c federate.c) list(TRANSFORM FEDERATED_SOURCES PREPEND federated/) -target_sources(core PRIVATE ${FEDERATED_SOURCES}) +list(APPEND REACTORC_SOURCES ${FEDERATED_SOURCES}) diff --git a/core/federated/RTI/CMakeLists.txt b/core/federated/RTI/CMakeLists.txt index b0fb5a326e..73b1b0d4e0 100644 --- a/core/federated/RTI/CMakeLists.txt +++ b/core/federated/RTI/CMakeLists.txt @@ -52,6 +52,7 @@ endif() set(IncludeDir ../../../include/core) include_directories(${IncludeDir}) include_directories(${IncludeDir}/federated) +include_directories(${IncludeDir}/federated/network) include_directories(${IncludeDir}/modal_models) include_directories(${IncludeDir}/platform) include_directories(${IncludeDir}/utils) @@ -60,17 +61,18 @@ include_directories(${IncludeDir}/utils) # Declare a new executable target and list all its sources add_executable( RTI - enclave.c - rti.c - rti_lib.c + main.c + rti_common.c + rti_remote.c ${CoreLib}/trace.c ${LF_PLATFORM_FILE} ${CoreLib}/platform/lf_unix_clock_support.c ${CoreLib}/utils/util.c ${CoreLib}/tag.c - ${CoreLib}/federated/net_util.c + ${CoreLib}/federated/network/net_util.c + ${CoreLib}/utils/pqueue_base.c + ${CoreLib}/utils/pqueue_tag.c ${CoreLib}/utils/pqueue.c - message_record/message_record.c ) IF(CMAKE_BUILD_TYPE MATCHES DEBUG) @@ -79,8 +81,12 @@ IF(CMAKE_BUILD_TYPE MATCHES DEBUG) target_compile_definitions(RTI PUBLIC LOG_LEVEL=4) ENDIF(CMAKE_BUILD_TYPE MATCHES DEBUG) +# Set the STANDALONE_RTI flag to include the rti_remote and rti_common. +target_compile_definitions(RTI PUBLIC STANDALONE_RTI=1) + # Set FEDERATED to get federated compilation support target_compile_definitions(RTI PUBLIC FEDERATED=1) + target_compile_definitions(RTI PUBLIC PLATFORM_${CMAKE_SYSTEM_NAME}) # Set RTI Tracing diff --git a/core/federated/RTI/README.md b/core/federated/RTI/README.md index 82c3f003e4..916959f6e1 100644 --- a/core/federated/RTI/README.md +++ b/core/federated/RTI/README.md @@ -33,5 +33,17 @@ If you would like to go back to non-AUTH mode, you would have to remove all cont To build a docker image for the RTI, do ```bash -docker build -t rti:rti -f rti.Dockerfile ../../../core/ -``` \ No newline at end of file +docker build -t lflang/rti:latest -f rti.Dockerfile ../../../ +``` + +To push it to DockerHub, run: +```bash +docker push lflang/rti:latest +``` + +You may need to login first: +```bash +docker login -u [username] +``` + +To authenticate, request a PAT on [DockerHub](https://hub.docker.com/settings/security). diff --git a/core/federated/RTI/enclave.c b/core/federated/RTI/enclave.c deleted file mode 100644 index 9af2d60ac9..0000000000 --- a/core/federated/RTI/enclave.c +++ /dev/null @@ -1,308 +0,0 @@ -#include "enclave.h" - -/** - * Reference to enclave_rti_t instance. - */ -enclave_rti_t* _e_rti; - -// Global variables defined in tag.c: -extern instant_t start_time; - -// RTI mutex, which is the main lock -extern lf_mutex_t rti_mutex; - -// FIXME: For log and debug message in this file, what sould be kept: 'enclave', -// 'federate', or 'enlcave/federate'? Currently its is 'enclave/federate'. -// FIXME: Should enclaves tracing use the same mechanism as federates? -// It needs to account a federate having itself a number of enclaves. -// Currently, all calls to tracepoint_from_federate() and -// tracepoint_to_federate() are in rti_lib.c - -void initialize_enclave(enclave_t* e, uint16_t id) { - e->id = id; - e->completed = NEVER_TAG; - e->last_granted = NEVER_TAG; - e->last_provisionally_granted = NEVER_TAG; - e->next_event = NEVER_TAG; - e->state = NOT_CONNECTED; - e->upstream = NULL; - e->upstream_delay = NULL; - e->num_upstream = 0; - e->downstream = NULL; - e->num_downstream = 0; - e->mode = REALTIME; - - // Initialize the next event condition variable. - lf_cond_init(&e->next_event_condition, &rti_mutex); -} - -void logical_tag_complete(enclave_t* enclave, tag_t completed) { - // FIXME: Consolidate this message with NET to get NMR (Next Message Request). - // Careful with handling startup and shutdown. - lf_mutex_lock(&rti_mutex); - - enclave->completed = completed; - - LF_PRINT_LOG("RTI received from federate/enclave %d the Logical Tag Complete (LTC) " PRINTF_TAG ".", - enclave->id, enclave->completed.time - start_time, enclave->completed.microstep); - - // Check downstream enclaves to see whether they should now be granted a TAG. - for (int i = 0; i < enclave->num_downstream; i++) { - enclave_t *downstream = _e_rti->enclaves[enclave->downstream[i]]; - // Notify downstream enclave if appropriate. - notify_advance_grant_if_safe(downstream); - bool *visited = (bool *)calloc(_e_rti->number_of_enclaves, sizeof(bool)); // Initializes to 0. - // Notify enclaves downstream of downstream if appropriate. - notify_downstream_advance_grant_if_safe(downstream, visited); - free(visited); - } - - lf_mutex_unlock(&rti_mutex); -} - -tag_advance_grant_t tag_advance_grant_if_safe(enclave_t* e) { - tag_advance_grant_t result = {.tag = NEVER_TAG, .is_provisional = false}; - - // Find the earliest LTC of upstream enclaves (M). - tag_t min_upstream_completed = FOREVER_TAG; - - for (int j = 0; j < e->num_upstream; j++) { - enclave_t *upstream = _e_rti->enclaves[e->upstream[j]]; - - // Ignore this enclave if it no longer connected. - if (upstream->state == NOT_CONNECTED) continue; - - // Adjust by the "after" delay. - // Note that "no delay" is encoded as NEVER, - // whereas one microstep delay is encoded as 0LL. - tag_t candidate = lf_delay_strict(upstream->completed, e->upstream_delay[j]); - - if (lf_tag_compare(candidate, min_upstream_completed) < 0) { - min_upstream_completed = candidate; - } - } - LF_PRINT_LOG("Minimum upstream LTC for federate/enclave %d is " PRINTF_TAG - "(adjusted by after delay).", - e->id, - min_upstream_completed.time - start_time, min_upstream_completed.microstep); - if (lf_tag_compare(min_upstream_completed, e->last_granted) > 0 - && lf_tag_compare(min_upstream_completed, e->next_event) >= 0 // The enclave has to advance its tag - ) { - result.tag = min_upstream_completed; - return result; - } - - // Can't make progress based only on upstream LTCs. - // If all (transitive) upstream enclaves of the enclave - // have earliest event tags such that the - // enclave can now advance its tag, then send it a TAG message. - // Find the earliest event time of each such upstream enclave, - // adjusted by delays on the connections. - - // To handle cycles, need to create a boolean array to keep - // track of which upstream enclave have been visited. - bool *visited = (bool *)calloc(_e_rti->number_of_enclaves, sizeof(bool)); // Initializes to 0. - - // Find the tag of the earliest possible incoming message from - // upstream enclaves. - tag_t t_d_nonzero_delay = FOREVER_TAG; - // The tag of the earliest possible incoming message from a zero-delay connection. - // Delayed connections are not guarded from STP violations by the MLAA; this property is - // acceptable because delayed connections impose no deadlock risk and in some cases (startup) - // this property is necessary to avoid deadlocks. However, it requires some special care here - // when potentially sending a PTAG because we must not send a PTAG for a tag at which data may - // still be received over nonzero-delay connections. - tag_t t_d_zero_delay = FOREVER_TAG; - LF_PRINT_DEBUG("NOTE: FOREVER is displayed as " PRINTF_TAG " and NEVER as " PRINTF_TAG, - FOREVER_TAG.time - start_time, FOREVER_TAG.microstep, - NEVER_TAG.time - start_time, 0); - - for (int j = 0; j < e->num_upstream; j++) { - enclave_t *upstream = _e_rti->enclaves[e->upstream[j]]; - - // Ignore this enclave if it is no longer connected. - if (upstream->state == NOT_CONNECTED) continue; - - // Find the (transitive) next event tag upstream. - tag_t upstream_next_event = transitive_next_event( - upstream, upstream->next_event, visited); - - LF_PRINT_DEBUG("Earliest next event upstream of fed/encl %d at fed/encl %d has tag " PRINTF_TAG ".", - e->id, - upstream->id, - upstream_next_event.time - start_time, upstream_next_event.microstep); - - // Adjust by the "after" delay. - // Note that "no delay" is encoded as NEVER, - // whereas one microstep delay is encoded as 0LL. - tag_t candidate = lf_delay_strict(upstream_next_event, e->upstream_delay[j]); - - if (e->upstream_delay[j] == NEVER) { - if (lf_tag_compare(candidate, t_d_zero_delay) < 0) { - t_d_zero_delay = candidate; - } - } else { - if (lf_tag_compare(candidate, t_d_nonzero_delay) < 0) { - t_d_nonzero_delay = candidate; - } - } - } - free(visited); - tag_t t_d = (lf_tag_compare(t_d_zero_delay, t_d_nonzero_delay) < 0) ? t_d_zero_delay : t_d_nonzero_delay; - - LF_PRINT_LOG("Earliest next event upstream has tag " PRINTF_TAG ".", - t_d.time - start_time, t_d.microstep); - - if ( - lf_tag_compare(t_d, e->next_event) > 0 // The enclave has something to do. - && lf_tag_compare(t_d, e->last_provisionally_granted) >= 0 // The grant is not redundant - // (equal is important to override any previous - // PTAGs). - && lf_tag_compare(t_d, e->last_granted) > 0 // The grant is not redundant. - ) { - // All upstream enclaves have events with a larger tag than fed, so it is safe to send a TAG. - LF_PRINT_LOG("Earliest upstream message time for fed/encl %d is " PRINTF_TAG - "(adjusted by after delay). Granting tag advance for " PRINTF_TAG, - e->id, - t_d.time - lf_time_start(), t_d.microstep, - e->next_event.time - lf_time_start(), - e->next_event.microstep); - result.tag = e->next_event; - } else if ( - lf_tag_compare(t_d_zero_delay, e->next_event) == 0 // The enclave has something to do. - && lf_tag_compare(t_d_zero_delay, t_d_nonzero_delay) < 0 // The statuses of nonzero-delay connections are known at tag t_d_zero_delay - && lf_tag_compare(t_d_zero_delay, e->last_provisionally_granted) > 0 // The grant is not redundant. - && lf_tag_compare(t_d_zero_delay, e->last_granted) > 0 // The grant is not redundant. - ) { - // Some upstream enclaves has an event that has the same tag as fed's next event, so we can only provisionally - // grant a TAG (via a PTAG). - LF_PRINT_LOG("Earliest upstream message time for fed/encl %d is " PRINTF_TAG - " (adjusted by after delay). Granting provisional tag advance.", - e->id, - t_d_zero_delay.time - start_time, t_d_zero_delay.microstep); - result.tag = t_d_zero_delay; - result.is_provisional = true; - } - return result; -} - -void notify_downstream_advance_grant_if_safe(enclave_t* e, bool visited[]) { - visited[e->id] = true; - for (int i = 0; i < e->num_downstream; i++) { - enclave_t* downstream = _e_rti->enclaves[e->downstream[i]]; - if (visited[downstream->id]) continue; - notify_advance_grant_if_safe(downstream); - notify_downstream_advance_grant_if_safe(downstream, visited); - } -} - -void update_enclave_next_event_tag_locked(enclave_t* e, tag_t next_event_tag) { - e->next_event = next_event_tag; - - LF_PRINT_DEBUG( - "RTI: Updated the recorded next event tag for federate/enclave %d to " PRINTF_TAG, - e->id, - next_event_tag.time - lf_time_start(), - next_event_tag.microstep - ); - - // Check to see whether we can reply now with a tag advance grant. - // If the enclave has no upstream enclaves, then it does not wait for - // nor expect a reply. It just proceeds to advance time. - if (e->num_upstream > 0) { - notify_advance_grant_if_safe(e); - } - // Check downstream enclaves to see whether they should now be granted a TAG. - // To handle cycles, need to create a boolean array to keep - // track of which upstream enclaves have been visited. - bool *visited = (bool *)calloc(_e_rti->number_of_enclaves, sizeof(bool)); // Initializes to 0. - notify_downstream_advance_grant_if_safe(e, visited); - free(visited); -} - -tag_advance_grant_t next_event_tag(enclave_t* e, tag_t next_event_tag) { - tag_advance_grant_t result; - - // First, update the enclave data structure to record this next_event_tag, - // and notify any downstream enclaves, and unblock them if appropriate. - lf_mutex_lock(&rti_mutex); - - // FIXME: If last_granted is already greater than next_event_tag, return next_event_tag. - - tag_t previous_tag = e->last_granted; - tag_t previous_ptag = e->last_provisionally_granted; - - update_enclave_next_event_tag_locked(e, next_event_tag); - - while(true) { - // Determine whether the above call notified e of a TAG or PTAG. - // If so, return that value. - if (lf_tag_compare(previous_tag, e->last_granted) < 0) { - result.tag = e->last_granted; - result.is_provisional = false; - lf_mutex_unlock(&rti_mutex); - return result; - } - if (lf_tag_compare(previous_ptag, e->last_provisionally_granted) < 0) { - result.tag = e->last_provisionally_granted; - result.is_provisional = true; - lf_mutex_unlock(&rti_mutex); - return result; - } - - // If not, block. - lf_cond_wait(&e->next_event_condition); - } -} - -void notify_advance_grant_if_safe(enclave_t* e) { - tag_advance_grant_t grant = tag_advance_grant_if_safe(e); - if (lf_tag_compare(grant.tag, NEVER_TAG) != 0) { - if (grant.is_provisional) { - notify_provisional_tag_advance_grant(e, grant.tag); - } else { - notify_tag_advance_grant(e, grant.tag); - } - } -} - -tag_t transitive_next_event(enclave_t* e, tag_t candidate, bool visited[]) { - if (visited[e->id] || e->state == NOT_CONNECTED) { - // Enclave has stopped executing or we have visited it before. - // No point in checking upstream enclaves. - return candidate; - } - - visited[e->id] = true; - tag_t result = e->next_event; - - // If the candidate is less than this enclave's next_event, use the candidate. - if (lf_tag_compare(candidate, result) < 0) { - result = candidate; - } - - // The result cannot be earlier than the start time. - if (result.time < start_time) { - // Earliest next event cannot be before the start time. - result = (tag_t){.time = start_time, .microstep = 0u}; - } - - // Check upstream enclaves to see whether any of them might send - // an event that would result in an earlier next event. - for (int i = 0; i < e->num_upstream; i++) { - tag_t upstream_result = transitive_next_event( - _e_rti->enclaves[e->upstream[i]], result, visited); - - // Add the "after" delay of the connection to the result. - upstream_result = lf_delay_tag(upstream_result, e->upstream_delay[i]); - - // If the adjusted event time is less than the result so far, update the result. - if (lf_tag_compare(upstream_result, result) < 0) { - result = upstream_result; - } - } - if (lf_tag_compare(result, e->completed) < 0) { - result = e->completed; - } - return result; -} diff --git a/core/federated/RTI/enclave.h b/core/federated/RTI/enclave.h deleted file mode 100644 index 51e2cc0ee2..0000000000 --- a/core/federated/RTI/enclave.h +++ /dev/null @@ -1,263 +0,0 @@ -/** - * @file - * @author Edward A. Lee (eal@berkeley.edu) - * @author Soroush Bateni (soroush@utdallas.edu) - * @author Erling Jellum (erling.r.jellum@ntnu.no) - * @author Chadlia Jerad (chadlia.jerad@ensi-uma.tn) - * @copyright (c) 2020-2023, The University of California at Berkeley - * License in [BSD 2-clause](https://github.com/lf-lang/reactor-c/blob/main/LICENSE.md) - * @brief Declarations for runtime infrastructure (RTI) for scheduling enclaves and distributed Lingua Franca programs. - * This file declares RTI features that are used by scheduling enclaves as well as federated - * LF programs. - */ - -#ifndef ENCLAVE_H -#define ENCLAVE_H - -#include // Defines perror(), errno -#include -#include "platform.h" // Platform-specific types and functions -#include "util.h" // Defines print functions (e.g., lf_print). -#include "net_util.h" // Defines network functions. -#include "net_common.h" // Defines message types, etc. Includes and "reactor.h". -#include "tag.h" // Time-related types and functions. -#include "trace.h" // Tracing related functions - - -/** Mode of execution of a federate. */ -typedef enum execution_mode_t { - FAST, - REALTIME -} execution_mode_t; - -/** State of a enclave during execution. */ -typedef enum fed_state_t { - NOT_CONNECTED, // The federate has not connected. - GRANTED, // Most recent MSG_TYPE_NEXT_EVENT_TAG has been granted. - PENDING // Waiting for upstream federates. -} fed_state_t; - -/** - * Information about enclave known to the RTI, including its runtime state, - * mode of execution, and connectivity with other enclaves. - * The list of upstream and downstream enclaves does not include - * those that are connected via a "physical" connection (one - * denoted with ~>) because those connections do not impose - * any scheduling constraints. - */ -typedef struct enclave_t { - uint16_t id; // ID of this enclave. - tag_t completed; // The largest logical tag completed by the federate (or NEVER if no LTC has been received). - tag_t last_granted; // The maximum TAG that has been granted so far (or NEVER if none granted) - tag_t last_provisionally_granted; // The maximum PTAG that has been provisionally granted (or NEVER if none granted) - tag_t next_event; // Most recent NET received from the federate (or NEVER if none received). - fed_state_t state; // State of the federate. - int* upstream; // Array of upstream federate ids. - interval_t* upstream_delay; // Minimum delay on connections from upstream federates. - // Here, NEVER encodes no delay. 0LL is a microstep delay. - int num_upstream; // Size of the array of upstream federates and delays. - int* downstream; // Array of downstream federate ids. - int num_downstream; // Size of the array of downstream federates. - execution_mode_t mode; // FAST or REALTIME. - lf_cond_t next_event_condition; // Condition variable used by enclaves to notify an enclave - // that it's call to next_event_tag() should unblock. -} enclave_t; - -/** - * Structure that an enclave RTI instance uses to keep track of its own and its - * corresponding enclaves'state. - * // **************** IMPORTANT!!! ******************** - * // ** If you make any change to this struct, ** - * // ** you MUST also change federation_rti_t in ** - * // ** (rti_lib.h)! The change must exactly match. ** - * // ************************************************** - */ - -typedef struct enclave_rti_t { - // The enclaves. - enclave_t **enclaves; - - // Number of enclaves - int32_t number_of_enclaves; - - // RTI's decided stop tag for enclaves - tag_t max_stop_tag; - - // Number of enclaves handling stop - int num_enclaves_handling_stop; - - // Boolean indicating that tracing is enabled. - bool tracing_enabled; - - // Trace object - trace_t* trace; -} enclave_rti_t; - - -/** - * An enclave calls this function after it completed a tag. - * The function updates the completed tag and check if the downstream enclaves - * are eligible for receiving TAGs. - * - * @param enclave The enclave - * @param completed The completed tag of the enclave - */ -void logical_tag_complete(enclave_t* enclave, tag_t completed); - -typedef struct { - tag_t tag; // NEVER if there is no tag advance grant. - bool is_provisional; // True for PTAG, false for TAG. -} tag_advance_grant_t; - -/** - * Initialize the enclave with the specified ID. - * - * @param e The enclave - * @param id The enclave ID. - */ -void initialize_enclave(enclave_t* e, uint16_t id); - -/** - * For all enclaves downstream of the specified enclave, determine - * whether they should be notified of a TAG or PTAG and notify them if so. - * - * This assumes the caller holds the mutex. - * - * @param e The upstream enclave. - * @param visited An array of booleans used to determine whether an enclave has - * been visited (initially all false). - */ -void notify_downstream_advance_grant_if_safe(enclave_t* e, bool visited[]); - -/** - * Notify a tag advance grant (TAG) message to the specified federate. - * Do not notify it if a previously sent PTAG was greater or if a - * previously sent TAG was greater or equal. - * - * This function will keep a record of this TAG in the federate's last_granted - * field. - * - * This function assumes that the caller holds the mutex lock. - * - * FIXME: This needs two implementations, one for enclaves and one for federates. - * - * @param e The enclave. - * @param tag The tag to grant. - */ -void notify_tag_advance_grant(enclave_t* e, tag_t tag); - -/** - * @brief Either send to a federate or unblock an enclave to give it a tag. - * This function requires two different implementations, one for enclaves - * and one for federates. - * - * This assumes the caller holds the mutex. - * - * @param e The enclave. - */ -void notify_advance_grant_if_safe(enclave_t* e); - -/** - * Nontify a provisional tag advance grant (PTAG) message to the specified enclave. - * Do not notify it if a previously sent PTAG or TAG was greater or equal. - * - * This function will keep a record of this PTAG in the federate's last_provisionally_granted - * field. - * - * This function assumes that the caller holds the mutex lock. - * - * FIXME: This needs two implementations, one for enclaves and one for federates. - * - * @param e The enclave. - * @param tag The tag to grant. - */ -void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag); - -/** - * Determine whether the specified enclave is eligible for a tag advance grant, - * (TAG) and, if so, return the details. This is called upon receiving a LTC, NET - * or resign from an upstream enclave. - * - * This function calculates the minimum M over - * all upstream enclaves of the "after" delay plus the most recently - * received LTC from that enclave. If M is greater than the - * most recent TAG to e or greater than or equal to the most - * recent PTAG, then return TAG(M). - * - * If the above conditions do not result in returning a TAG, then find the - * minimum M of the earliest possible future message from upstream federates. - * This is calculated by transitively looking at the most recently received - * NET calls from upstream enclaves. - * If M is greater than the NET of e or the most recent PTAG to e, then - * return a TAG with tag equal to the NET of e or the PTAG. - * If M is equal to the NET of the federate, then return PTAG(M). - * - * This should be called whenever an immediately upstream federate sends to - * the RTI an LTC (Logical Tag Complete), or when a transitive upstream - * federate sends a NET (Next Event Tag) message. - * It is also called when an upstream federate resigns from the federation. - * - * This function assumes that the caller holds the mutex lock. - * - * @param e The enclave - * @return If granted, return the tag value and whether it is provisional. - * Otherwise, return the NEVER_TAG. - */ -tag_advance_grant_t tag_advance_grant_if_safe(enclave_t* e); - -/** - * @brief Get the tag to advance to. - * - * An enclave should call this function when it is ready to advance its tag, - * passing as the second argument the tag of the earliest event on its event queue. - * The returned tag may be less than or equal to the argument tag and is interpreted - * by the enclave as the tag to which it can advance. - * - * This will also notify downstream enclaves with a TAG or PTAG if appropriate, - * possibly unblocking their own calls to this same function. - * - * @param e The enclave. - * @param next_event_tag The next event tag for e. - * @return If granted, return the TAG and whether it is provisional or not. - * Otherwise, return the NEVER_TAG. - */ -tag_advance_grant_t next_event_tag(enclave_t* e, tag_t next_event_tag); - -/** - * @brief Update the next event tag of an enclave. - * - * This will notify downstream enclaves with a TAG or PTAG if appropriate. - * - * This function assumes that the caller is holding the rti_mutex. - * - * @param e The enclave. - * @param next_event_tag The next event tag for e. - */ -void update_enclave_next_event_tag_locked(enclave_t* e, tag_t next_event_tag); - -/** - * Find the earliest tag at which the specified federate may - * experience its next event. This is the least next event tag (NET) - * of the specified federate and (transitively) upstream federates - * (with delays of the connections added). For upstream federates, - * we assume (conservatively) that federate upstream of those - * may also send an event. The result will never be less than - * the completion time of the federate (which may be NEVER, - * if the federate has not yet completed a logical time). - * - * FIXME: This could be made less conservative by building - * at code generation time a causality interface table indicating - * which outputs can be triggered by which inputs. For now, we - * assume any output can be triggered by any input. - * - * @param e The enclave. - * @param candidate A candidate tag (for the first invocation, - * this should be fed->next_event). - * @param visited An array of booleans indicating which federates - * have been visited (for the first invocation, this should be - * an array of falses of size _RTI.number_of_federates). - * @return The earliest next event tag of the enclave e. - */ -tag_t transitive_next_event(enclave_t *e, tag_t candidate, bool visited[]); - -#endif // ENCLAVE_H \ No newline at end of file diff --git a/core/federated/RTI/enclave_impl.c b/core/federated/RTI/enclave_impl.c deleted file mode 100644 index 00c444a071..0000000000 --- a/core/federated/RTI/enclave_impl.c +++ /dev/null @@ -1,20 +0,0 @@ -#include "enclave.h" - -// References to the enclave RTI. -extern enclave_rti_t * _e_rti; - -void notify_tag_advance_grant(enclave_t* e, tag_t tag) { - if (e->state == NOT_CONNECTED - || lf_tag_compare(tag, e->last_granted) <= 0 - || lf_tag_compare(tag, e->last_provisionally_granted) < 0 - ) { - return; - } - if (_e_rti.tracing_enabled) { - tracepoint_rti_to_federate(_e_rti.trace, send_TAG, e->id, &tag); - } - e->last_granted = tag; - lf_cond_signal(&e->next_event_condition); -} - - diff --git a/core/federated/RTI/local_rti.cmake b/core/federated/RTI/local_rti.cmake new file mode 100644 index 0000000000..0d43fd312c --- /dev/null +++ b/core/federated/RTI/local_rti.cmake @@ -0,0 +1,13 @@ +# This adds the local RTI sources required for scheduling enclaves +# to the build. +set( + LOCAL_RTI_SOURCES + rti_common.c + rti_local.c +) + +list(APPEND INFO_SOURCES ${LOCAL_RTI_SOURCES}) + +list(TRANSFORM LOCAL_RTI_SOURCES PREPEND federated/RTI/) +target_sources(core PRIVATE ${LOCAL_RTI_SOURCES}) + diff --git a/core/federated/RTI/main.c b/core/federated/RTI/main.c new file mode 100644 index 0000000000..700304aea9 --- /dev/null +++ b/core/federated/RTI/main.c @@ -0,0 +1,349 @@ +#if defined STANDALONE_RTI +/** + * @file + * @author Edward A. Lee (eal@berkeley.edu) + * @author Soroush Bateni + * + * @section LICENSE +Copyright (c) 2020, The University of California at Berkeley. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + * @section DESCRIPTION + * Runtime infrastructure for distributed Lingua Franca programs. + * + * This implementation creates one thread per federate so as to be able + * to take advantage of multiple cores. It may be more efficient, however, + * to use select() instead to read from the multiple socket connections + * to each federate. + * + * This implementation sends messages in little endian order + * because Intel, RISC V, and Arm processors are little endian. + * This is not what is normally considered "network order", + * but we control both ends, and hence, for commonly used + * processors, this will be more efficient since it won't have + * to swap bytes. + * + * This implementation of the RTI should be considered a reference + * implementation. In the future it might be re-implemented in Java or Kotlin. + * Or we could bootstrap and implement it using Lingua Franca. + */ + +#include "rti_remote.h" +#include "net_util.h" +#include // To trap ctrl-c and invoke a clean stop to save the trace file, if needed. +#include + + +/** + * The tracing mechanism uses the number of workers variable `_lf_number_of_workers`. + * For RTI tracing, the number of workers is set as the number of federates. + */ +unsigned int _lf_number_of_workers = 0u; + +// The global RTI object. It is static, and can only be referenced in this file. +// A pointer is passed during initialization to rti_remote.c +static rti_remote_t rti; + +/** + * RTI trace file name + */ +const char *rti_trace_file_name = "rti.lft"; + +/** Indicator that normal termination of the RTI has occurred. */ +bool normal_termination = false; + +/** + * Send a failed signal to the specified federate. + */ +static void send_failed_signal(federate_info_t* fed) { + size_t bytes_to_write = 1; + unsigned char buffer[bytes_to_write]; + buffer[0] = MSG_TYPE_FAILED; + if (rti.base.tracing_enabled) { + tracepoint_rti_to_federate(rti.base.trace, send_FAILED, fed->enclave.id, NULL); + } + int failed = write_to_socket(fed->socket, bytes_to_write, &(buffer[0])); + if (failed == 0) { + LF_PRINT_LOG("RTI has sent failed signal to federate %d due to abnormal termination.", fed->enclave.id); + } else { + lf_print_error("RTI failed to send failed signal to federate %d on socket ID %d.", fed->enclave.id, fed->socket); + } +} + +/** + * @brief Function to run upon termination. + * This function will be invoked both after main() returns and when a signal + * that results in terminating the process, such as SIGINT. In the former + * case, it should do nothing. In the latter case, it will send a MSG_TYPE_FAILED + * signal to each federate and attempt to write the trace file, but without + * acquiring a mutex lock, so the resulting files may be incomplete or even + * corrupted. But this is better than just failing to write the data we have + * collected so far. + */ +void termination() { + if (!normal_termination) { + for (int i = 0; i < rti.base.number_of_scheduling_nodes; i++) { + federate_info_t *f = (federate_info_t*)rti.base.scheduling_nodes[i]; + if (!f || f->enclave.state == NOT_CONNECTED) continue; + send_failed_signal(f); + } + if (rti.base.tracing_enabled) { + stop_trace_locked(rti.base.trace); + lf_print("RTI trace file saved."); + } + lf_print("RTI is exiting abnormally."); + } +} + +void usage(int argc, const char* argv[]) { + lf_print("\nCommand-line arguments: \n"); + lf_print(" -i, --id "); + lf_print(" The ID of the federation that this RTI will control.\n"); + lf_print(" -n, --number_of_federates "); + lf_print(" The number of federates in the federation that this RTI will control.\n"); + lf_print(" -p, --port "); + lf_print(" The port number to use for the RTI. Must be larger than 0 and smaller than %d. Default is %d.\n", UINT16_MAX, DEFAULT_PORT); + lf_print(" -c, --clock_sync [off|init|on] [period ] [exchanges-per-interval ]"); + lf_print(" The status of clock synchronization for this federate."); + lf_print(" - off: Clock synchronization is off."); + lf_print(" - init (default): Clock synchronization is done only during startup."); + lf_print(" - on: Clock synchronization is done both at startup and during the execution."); + lf_print(" Relevant parameters that can be set: "); + lf_print(" - period (in nanoseconds): Controls how often a clock synchronization attempt is made"); + lf_print(" (period in nanoseconds, default is 5 msec). Only applies to 'on'."); + lf_print(" - exchanges-per-interval : Controls the number of messages that are exchanged for each"); + lf_print(" clock sync attempt (default is 10). Applies to 'init' and 'on'.\n"); + lf_print(" -a, --auth Turn on HMAC authentication options.\n"); + lf_print(" -t, --tracing Turn on tracing.\n"); + + lf_print("Command given:"); + for (int i = 0; i < argc; i++) { + lf_print("%s ", argv[i]); + } + lf_print("\n"); +} + +int process_clock_sync_args(int argc, const char* argv[]) { + for (int i = 0; i < argc; i++) { + if (strcmp(argv[i], "off") == 0) { + rti.clock_sync_global_status = clock_sync_off; + lf_print("RTI: Clock sync: off"); + } else if (strcmp(argv[i], "init") == 0 || strcmp(argv[i], "initial") == 0) { + rti.clock_sync_global_status = clock_sync_init; + lf_print("RTI: Clock sync: init"); + } else if (strcmp(argv[i], "on") == 0) { + rti.clock_sync_global_status = clock_sync_on; + lf_print("RTI: Clock sync: on"); + } else if (strcmp(argv[i], "period") == 0) { + if (rti.clock_sync_global_status != clock_sync_on) { + lf_print_error("clock sync period can only be set if --clock-sync is set to on."); + usage(argc, argv); + i++; + continue; // Try to parse the rest of the arguments as clock sync args. + } else if (argc < i + 2) { + lf_print_error("clock sync period needs a time (in nanoseconds) argument."); + usage(argc, argv); + continue; + } + i++; + long long period_ns = strtoll(argv[i], NULL, 10); + if (period_ns == 0LL || period_ns == LLONG_MAX || period_ns == LLONG_MIN) { + lf_print_error("clock sync period value is invalid."); + continue; // Try to parse the rest of the arguments as clock sync args. + } + rti.clock_sync_period_ns = (int64_t)period_ns; + lf_print("RTI: Clock sync period: %lld", (long long int)rti.clock_sync_period_ns); + } else if (strcmp(argv[i], "exchanges-per-interval") == 0) { + if (rti.clock_sync_global_status != clock_sync_on && rti.clock_sync_global_status != clock_sync_init) { + lf_print_error("clock sync exchanges-per-interval can only be set if\n" + "--clock-sync is set to on or init."); + usage(argc, argv); + continue; // Try to parse the rest of the arguments as clock sync args. + } else if (argc < i + 2) { + lf_print_error("clock sync exchanges-per-interval needs an integer argument."); + usage(argc, argv); + continue; // Try to parse the rest of the arguments as clock sync args. + } + i++; + long exchanges = (long)strtol(argv[i], NULL, 10); + if (exchanges == 0L || exchanges == LONG_MAX || exchanges == LONG_MIN) { + lf_print_error("clock sync exchanges-per-interval value is invalid."); + continue; // Try to parse the rest of the arguments as clock sync args. + } + rti.clock_sync_exchanges_per_interval = (int32_t)exchanges; // FIXME: Loses numbers on 64-bit machines + lf_print("RTI: Clock sync exchanges per interval: %d", rti.clock_sync_exchanges_per_interval); + } else if (strcmp(argv[i], " ") == 0) { + // Tolerate spaces + continue; + } else { + // Either done with the clock sync args or there is an invalid + // character. In either case, let the parent function deal with + // the rest of the characters; + return i; + } + } + return argc; +} + +int process_args(int argc, const char* argv[]) { + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "--id") == 0) { + if (argc < i + 2) { + lf_print_error("--id needs a string argument."); + usage(argc, argv); + return 0; + } + i++; + lf_print("RTI: Federation ID: %s", argv[i]); + rti.federation_id = argv[i]; + } else if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--number_of_federates") == 0) { + if (argc < i + 2) { + lf_print_error("--number_of_federates needs an integer argument."); + usage(argc, argv); + return 0; + } + i++; + long num_federates = strtol(argv[i], NULL, 10); + if (num_federates == 0L || num_federates == LONG_MAX || num_federates == LONG_MIN) { + lf_print_error("--number_of_federates needs a valid positive integer argument."); + usage(argc, argv); + return 0; + } + rti.base.number_of_scheduling_nodes = (int32_t)num_federates; // FIXME: Loses numbers on 64-bit machines + lf_print("RTI: Number of federates: %d", rti.base.number_of_scheduling_nodes); + } else if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--port") == 0) { + if (argc < i + 2) { + lf_print_error( + "--port needs a short unsigned integer argument ( > 0 and < %d).", + UINT16_MAX + ); + usage(argc, argv); + return 0; + } + i++; + uint32_t RTI_port = (uint32_t)strtoul(argv[i], NULL, 10); + if (RTI_port <= 0 || RTI_port >= UINT16_MAX) { + lf_print_error( + "--port needs a short unsigned integer argument ( > 0 and < %d).", + UINT16_MAX + ); + usage(argc, argv); + return 0; + } + rti.user_specified_port = (uint16_t)RTI_port; + } else if (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--clock_sync") == 0) { + if (argc < i + 2) { + lf_print_error("--clock-sync needs off|init|on."); + usage(argc, argv); + return 0; + } + i++; + i += process_clock_sync_args((argc-i), &argv[i]); + } else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--auth") == 0) { + #ifndef __RTI_AUTH__ + lf_print_error("--auth requires the RTI to be built with the -DAUTH=ON option."); + usage(argc, argv); + return 0; + #endif + rti.authentication_enabled = true; + } else if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--tracing") == 0) { + rti.base.tracing_enabled = true; + } else if (strcmp(argv[i], " ") == 0) { + // Tolerate spaces + continue; + } else { + lf_print_error("Unrecognized command-line argument: %s", argv[i]); + usage(argc, argv); + return 0; + } + } + if (rti.base.number_of_scheduling_nodes == 0) { + lf_print_error("--number_of_federates needs a valid positive integer argument."); + usage(argc, argv); + return 0; + } + return 1; +} +int main(int argc, const char* argv[]) { + + initialize_RTI(&rti); + + // Catch the Ctrl-C signal, for a clean exit that does not lose the trace information + signal(SIGINT, exit); +#ifdef SIGPIPE + // Ignore SIGPIPE errors, which terminate the entire application if + // socket write() fails because the reader has closed the socket. + // Instead, cause an EPIPE error to be set when write() fails. + // NOTE: The reason for a broken socket causing a SIGPIPE signal + // instead of just having write() return an error is to robutly + // a foo | bar pipeline where bar crashes. The default behavior + // is for foo to also exit. + signal(SIGPIPE, SIG_IGN); +#endif // SIGPIPE + if (atexit(termination) != 0) { + lf_print_warning("Failed to register termination function!"); + } + + if (!process_args(argc, argv)) { + // Processing command-line arguments failed. + return -1; + } + + if (rti.base.tracing_enabled) { + _lf_number_of_workers = rti.base.number_of_scheduling_nodes; + rti.base.trace = trace_new(NULL, rti_trace_file_name); + LF_ASSERT(rti.base.trace, "Out of memory"); + start_trace(rti.base.trace); + lf_print("Tracing the RTI execution in %s file.", rti_trace_file_name); + } + + lf_print("Starting RTI for %d federates in federation ID %s.", rti.base.number_of_scheduling_nodes, rti.federation_id); + assert(rti.base.number_of_scheduling_nodes < UINT16_MAX); + + // Allocate memory for the federates + rti.base.scheduling_nodes = (scheduling_node_t**)calloc(rti.base.number_of_scheduling_nodes, sizeof(scheduling_node_t*)); + for (uint16_t i = 0; i < rti.base.number_of_scheduling_nodes; i++) { + federate_info_t *fed_info = (federate_info_t *) calloc(1, sizeof(federate_info_t)); + initialize_federate(fed_info, i); + rti.base.scheduling_nodes[i] = (scheduling_node_t *) fed_info; + } + + int socket_descriptor = start_rti_server(rti.user_specified_port); + if (socket_descriptor >= 0) { + wait_for_federates(socket_descriptor); + normal_termination = true; + if (rti.base.tracing_enabled) { + // No need for a mutex lock because all threads have exited. + stop_trace_locked(rti.base.trace); + lf_print("RTI trace file saved."); + } + } + + lf_print("RTI is exiting."); // Do this before freeing scheduling nodes. + free_scheduling_nodes(rti.base.scheduling_nodes, rti.base.number_of_scheduling_nodes); + + // Even if the RTI is exiting normally, it should report an error code if one of the + // federates has reported an error. + return (int)_lf_federate_reports_error; +} +#endif // STANDALONE_RTI + diff --git a/core/federated/RTI/message_record/message_record.c b/core/federated/RTI/message_record/message_record.c deleted file mode 100644 index 697b9ee867..0000000000 --- a/core/federated/RTI/message_record/message_record.c +++ /dev/null @@ -1,173 +0,0 @@ -/** - * @file message_record.c - * @author Soroush Bateni (soroush@berkeley.edu) - * @brief Record-keeping for in-transit messages. - * @version 0.1 - * @date 2022-06-02 - * - * @copyright Copyright (c) 2022, The University of California at Berkeley. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -***************/ - -#include "message_record.h" -#include "platform.h" -#include - -/** - * @brief Initialize the in-transit message record queue. - * - * @return in_transit_message_record_q - */ -in_transit_message_record_q_t* initialize_in_transit_message_q() { - in_transit_message_record_q_t* queue = - (in_transit_message_record_q_t*)calloc( - 1, - sizeof(in_transit_message_record_q_t) - ); - queue->main_queue = pqueue_init( - 10, - in_reverse_order, - get_message_record_index, - get_message_record_position, - set_message_record_position, - tags_match, - print_message_record - ); - - queue->transfer_queue = pqueue_init( - 10, - in_reverse_order, - get_message_record_index, - get_message_record_position, - set_message_record_position, - tags_match, - print_message_record - ); - - return queue; -} - -/** - * @brief Free the memory occupied by the `queue`. - * - * @param queue The queue to free. - */ -void free_in_transit_message_q(in_transit_message_record_q_t* queue) { - pqueue_free(queue->main_queue); - pqueue_free(queue->transfer_queue); - free(queue); -} - -/** - * @brief Add a record of the in-transit message. - * - * @param queue The queue to add to. - * @param tag The tag of the in-transit message. - * @return 0 on success. - */ -int add_in_transit_message_record(in_transit_message_record_q_t* queue, tag_t tag) { - in_transit_message_record_t* in_transit_record = malloc(sizeof(in_transit_message_record_t)); - in_transit_record->tag = tag; - return pqueue_insert( - queue->main_queue, - (void*)in_transit_record - ); -} - -/** - * @brief Clean the record of in-transit messages up to and including `tag`. - * - * @param queue The queue to clean. - * @param tag Will clean all messages with tags <= tag. - */ -void clean_in_transit_message_record_up_to_tag(in_transit_message_record_q_t* queue, tag_t tag) { - in_transit_message_record_t* head_of_in_transit_messages = (in_transit_message_record_t*)pqueue_peek(queue->main_queue); - while ( - head_of_in_transit_messages != NULL && // Queue is not empty - head_of_in_transit_messages->tag.time <= tag.time // The head message record has a time less than or equal to - // `tag.time`. - ) { - // Now compare the tags. The message record queue is ordered according to the `time` field, so we need to check - // all records with that `time` and find those that have smaller or equal full tags. - if (lf_tag_compare( - head_of_in_transit_messages->tag, - tag - ) <= 0 - ) { - LF_PRINT_DEBUG( - "RTI: Removed a message with tag (" PRINTF_TIME ", %u) from the list of in-transit messages.", - head_of_in_transit_messages->tag.time - lf_time_start(), - head_of_in_transit_messages->tag.microstep - ); - - free(pqueue_pop(queue->main_queue)); - } else { - // Add it to the transfer queue - pqueue_insert(queue->transfer_queue, pqueue_pop(queue->main_queue)); - } - head_of_in_transit_messages = (in_transit_message_record_t*)pqueue_peek(queue->main_queue); - } - // Empty the transfer queue (which holds messages with equal time but larger microstep) into the main queue. - pqueue_empty_into(&queue->main_queue, &queue->transfer_queue); -} - -/** - * @brief Get the minimum tag of all currently recorded in-transit messages. - * - * @param queue The queue to search in (of type `in_transit_message_record_q`). - * @return tag_t The minimum tag of all currently recorded in-transit messages. Return `FOREVER_TAG` if the queue is empty. - */ -tag_t get_minimum_in_transit_message_tag(in_transit_message_record_q_t* queue) { - tag_t minimum_tag = FOREVER_TAG; - - in_transit_message_record_t* head_of_in_transit_messages = (in_transit_message_record_t*)pqueue_peek(queue->main_queue); - while (head_of_in_transit_messages != NULL) { // Queue is not empty - // The message record queue is ordered according to the `time` field, so we need to check - // all records with the minimum `time` and find those that have the smallest tag. - if (lf_tag_compare( - head_of_in_transit_messages->tag, - minimum_tag - ) <= 0 - ) { - minimum_tag = head_of_in_transit_messages->tag; - } else if (head_of_in_transit_messages->tag.time > minimum_tag.time) { - break; - } - - // Add the head to the transfer queue. - pqueue_insert(queue->transfer_queue, pqueue_pop(queue->main_queue)); - - head_of_in_transit_messages = (in_transit_message_record_t*)pqueue_peek(queue->main_queue); - } - // Empty the transfer queue (which holds messages with equal time but larger microstep) into the main queue. - pqueue_empty_into(&queue->main_queue, &queue->transfer_queue); - - if (head_of_in_transit_messages != NULL) { - LF_PRINT_DEBUG( - "RTI: Minimum tag of all in-transit messages: " PRINTF_TAG, - head_of_in_transit_messages->tag.time - lf_time_start(), - head_of_in_transit_messages->tag.microstep - ); - } - - return minimum_tag; -} diff --git a/core/federated/RTI/message_record/message_record.h b/core/federated/RTI/message_record/message_record.h deleted file mode 100644 index b70e4ac1f4..0000000000 --- a/core/federated/RTI/message_record/message_record.h +++ /dev/null @@ -1,84 +0,0 @@ -/** - * @file message_record.h - * @author Soroush Bateni (soroush@berkeley.edu) - * @brief Record-keeping for in-transit messages. - * @version 0.1 - * @date 2022-06-02 - * - * @copyright Copyright (c) 2022, The University of California at Berkeley. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -***************/ - -#ifndef RTI_MESSAGE_RECORD_H -#define RTI_MESSAGE_RECORD_H - -#include "rti_pqueue_support.h" - -/** - * @brief Queue to keep a record of in-transit messages. - * - */ -typedef struct { - pqueue_t* main_queue; // The primary queue. - pqueue_t* transfer_queue; // Queue used for housekeeping. -} in_transit_message_record_q_t; - -/** - * @brief Initialize the in-transit message record queue. - * - * @return in_transit_message_record_q - */ -in_transit_message_record_q_t* initialize_in_transit_message_q(); - -/** - * @brief Free the memory occupied by the `queue`. - * - * @param queue The queue to free. - */ -void free_in_transit_message_q(in_transit_message_record_q_t* queue); - -/** - * @brief Add a record of the in-transit message. - * - * @param queue The queue to add to (of type `in_transit_message_record_q`). - * @param tag The tag of the in-transit message. - * @return 0 on success. - */ -int add_in_transit_message_record(in_transit_message_record_q_t* queue, tag_t tag); - -/** - * @brief Clean the record of in-transit messages up to and including `tag`. - * - * @param queue The queue to clean (of type `in_transit_message_record_q`). - * @param tag Will clean all messages with tags <= tag. - */ -void clean_in_transit_message_record_up_to_tag(in_transit_message_record_q_t* queue, tag_t tag); - -/** - * @brief Get the minimum tag of all currently recorded in-transit messages. - * - * @param queue The queue to search in (of type `in_transit_message_record_q`). - * @return tag_t The minimum tag of all currently recorded in-transit messages. Return `FOREVER_TAG` if the queue is empty. - */ -tag_t get_minimum_in_transit_message_tag(in_transit_message_record_q_t* queue); - -#endif // RTI_MESSAGE_RECORD_H diff --git a/core/federated/RTI/message_record/rti_pqueue_support.h b/core/federated/RTI/message_record/rti_pqueue_support.h deleted file mode 100644 index 6265f1f998..0000000000 --- a/core/federated/RTI/message_record/rti_pqueue_support.h +++ /dev/null @@ -1,98 +0,0 @@ -/** - * @file rti_pqueue_support.h - * @author Soroush Bateni (soroush@berkeley.edu) - * @brief Header-only support functions for pqueue (in the RTI). - * @version 0.1 - * @date 2022-06-02 - * - * @copyright Copyright (c) 2022, The University of California at Berkeley. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -***************/ - -#ifndef RTI_PQUEUE_SUPPORT_H -#define RTI_PQUEUE_SUPPORT_H - -#include "tag.h" -#include "utils/pqueue.h" -#include "utils/util.h" -#include "platform.h" - -// ********** Priority Queue Support Start -/** - * @brief Represent an in-transit message. - * - */ -typedef struct in_transit_message_record { - tag_t tag; // Tag of the in-transit message. - size_t pos; // Position in the priority queue. -} in_transit_message_record_t; - -/** - * Return whether or not the given `in_transit_message_record_t` types have the same tag. - */ -static int tags_match(void* next, void* curr) { - return (lf_tag_compare( - ((in_transit_message_record_t*)next)->tag, - ((in_transit_message_record_t*)curr)->tag - ) == 0); -} - -/** - * Report a priority equal to the time of the given in-transit message. - * Used for sorting pointers to in_transit_message_record_t structs. - */ -static pqueue_pri_t get_message_record_index(void *a) { - return (pqueue_pri_t)(((in_transit_message_record_t*) a)->tag.time); -} - -/** - * Return the given in_transit_message_record_t's position in the queue. - */ -static size_t get_message_record_position(void *a) { - return ((in_transit_message_record_t*) a)->pos; -} - -/** - * Set the given in_transit_message_record_t's position in the queue. - */ -static void set_message_record_position(void *a, size_t pos) { - ((in_transit_message_record_t*) a)->pos = pos; -} - -/** - * Print some information about the given in-transit message. - * - * DEBUG function only. - */ -static void print_message_record(void *message) { - in_transit_message_record_t *r = (in_transit_message_record_t*)message; - LF_PRINT_DEBUG( - "Tag of the in_transit_message_record_t: (" PRINTF_TIME ", %u). " - "Its position in the priority queue: %zu", - r->tag.time - lf_time_start(), - r->tag.microstep, - r->pos - ); -} - -// ********** Priority Queue Support End -#endif diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c deleted file mode 100644 index 34826d7e7b..0000000000 --- a/core/federated/RTI/rti.c +++ /dev/null @@ -1,134 +0,0 @@ -/** - * @file - * @author Edward A. Lee (eal@berkeley.edu) - * @author Soroush Bateni - * - * @section LICENSE -Copyright (c) 2020, The University of California at Berkeley. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - * @section DESCRIPTION - * Runtime infrastructure for distributed Lingua Franca programs. - * - * This implementation creates one thread per federate so as to be able - * to take advantage of multiple cores. It may be more efficient, however, - * to use select() instead to read from the multiple socket connections - * to each federate. - * - * This implementation sends messages in little endian order - * because Intel, RISC V, and Arm processors are little endian. - * This is not what is normally considered "network order", - * but we control both ends, and hence, for commonly used - * processors, this will be more efficient since it won't have - * to swap bytes. - * - * This implementation of the RTI should be considered a reference - * implementation. In the future it might be re-implemented in Java or Kotlin. - * Or we could bootstrap and implement it using Lingua Franca. - */ - -#include "rti_lib.h" -#include // To trap ctrl-c and invoke a clean stop to save the trace file, if needed. - -/** - * References to the federation RTI and the enclave RTI. - * They both point to the same enclaves stuctures. In the case of federation RTI, - * however, enclaves are encapsulated in federates. - */ -extern enclave_rti_t * _e_rti; -extern federation_rti_t* _f_rti; - -/** - * The tracing mechanism uses the number of workers variable `_lf_number_of_workers`. - * For RTI tracing, the number of workers is set as the number of federates. - */ -unsigned int _lf_number_of_workers = 0u; - -extern lf_mutex_t rti_mutex; -extern lf_cond_t received_start_times; -extern lf_cond_t sent_start_time; - -/** - * RTI trace file name - */ -const char *rti_trace_file_name = "rti.lft"; - -/** - * @brief A clean termination of the RTI will write the trace file, if tracing is - * enabled, before exiting. - */ -void termination() { - if (_f_rti->tracing_enabled) { - stop_trace(_f_rti->trace); - trace_free(_f_rti->trace); - lf_print("RTI trace file saved."); - } - lf_print("RTI is exiting."); -} - -int main(int argc, const char* argv[]) { - - initialize_RTI(); - - lf_mutex_init(&rti_mutex); - lf_cond_init(&received_start_times, &rti_mutex); - lf_cond_init(&sent_start_time, &rti_mutex); - - // Catch the Ctrl-C signal, for a clean exit that does not lose the trace information - signal(SIGINT, exit); - if (atexit(termination) != 0) { - lf_print_warning("Failed to register termination function!"); - } - - - if (!process_args(argc, argv)) { - // Processing command-line arguments failed. - return -1; - } - if (_f_rti->tracing_enabled) { - _lf_number_of_workers = _f_rti->number_of_enclaves; - _f_rti->trace = trace_new(NULL, rti_trace_file_name); - - lf_assert(_f_rti->trace, "Out of memory"); - start_trace(_f_rti->trace); - - lf_print("Tracing the RTI execution in %s file.", rti_trace_file_name); - } - - lf_print("Starting RTI for %d federates in federation ID %s.", _f_rti->number_of_enclaves, _f_rti->federation_id); - assert(_f_rti->number_of_enclaves < UINT16_MAX); - - // Allocate memory for the federates - _f_rti->enclaves = (federate_t**)calloc(_f_rti->number_of_enclaves, sizeof(federate_t*)); - for (uint16_t i = 0; i < _f_rti->number_of_enclaves; i++) { - _f_rti->enclaves[i] = (federate_t *)malloc(sizeof(federate_t)); - initialize_federate(_f_rti->enclaves[i], i); - } - - // Initialize the RTI enclaves - _e_rti = (enclave_rti_t*)_f_rti; - - int socket_descriptor = start_rti_server(_f_rti->user_specified_port); - wait_for_federates(socket_descriptor); - lf_print("RTI is exiting."); - return 0; -} diff --git a/core/federated/RTI/rti_common.c b/core/federated/RTI/rti_common.c new file mode 100644 index 0000000000..33049db505 --- /dev/null +++ b/core/federated/RTI/rti_common.c @@ -0,0 +1,395 @@ +/** + * @file + * @author Edward A. Lee (eal@berkeley.edu) + * @author Soroush Bateni (soroush@utdallas.edu) + * @author Erling Jellum (erling.r.jellum@ntnu.no) + * @author Chadlia Jerad (chadlia.jerad@ensi-uma.tn) + * @copyright (c) 2020-2023, The University of California at Berkeley + * License in [BSD 2-clause](https://github.com/lf-lang/reactor-c/blob/main/LICENSE.md) + */ +#if defined STANDALONE_RTI || defined LF_ENCLAVES +#include "rti_common.h" + +/** + * Local reference to rti_common_t instance. + */ +static rti_common_t* rti_common = NULL; + +// Global variables defined in tag.c: +extern instant_t start_time; + + +void initialize_rti_common(rti_common_t * _rti_common) { + rti_common = _rti_common; + rti_common->max_stop_tag = NEVER_TAG; + rti_common->number_of_scheduling_nodes = 0; + rti_common->num_scheduling_nodes_handling_stop = 0; +} + +// FIXME: Should scheduling_nodes tracing use the same mechanism as federates? +// It needs to account a federate having itself a number of scheduling_nodes. +// Currently, all calls to tracepoint_from_federate() and +// tracepoint_to_federate() are in rti_lib.c + +#define IS_IN_ZERO_DELAY_CYCLE 1 +#define IS_IN_CYCLE 2 + +void invalidate_min_delays_upstream(scheduling_node_t* node) { + if(node->min_delays != NULL) free(node->min_delays); + node->min_delays = NULL; + node->num_min_delays = 0; + node->flags = 0; // All flags cleared because they get set lazily. +} + +void initialize_scheduling_node(scheduling_node_t* e, uint16_t id) { + e->id = id; + e->completed = NEVER_TAG; + e->last_granted = NEVER_TAG; + e->last_provisionally_granted = NEVER_TAG; + e->next_event = NEVER_TAG; + e->state = NOT_CONNECTED; + e->upstream = NULL; + e->upstream_delay = NULL; + e->num_upstream = 0; + e->downstream = NULL; + e->num_downstream = 0; + e->mode = REALTIME; + invalidate_min_delays_upstream(e); +} + +void _logical_tag_complete(scheduling_node_t* enclave, tag_t completed) { + // FIXME: Consolidate this message with NET to get NMR (Next Message Request). + // Careful with handling startup and shutdown. + lf_mutex_lock(rti_common->mutex); + + enclave->completed = completed; + + LF_PRINT_LOG("RTI received from federate/enclave %d the latest tag complete (LTC) " PRINTF_TAG ".", + enclave->id, enclave->completed.time - start_time, enclave->completed.microstep); + + // Check downstream scheduling_nodes to see whether they should now be granted a TAG. + for (int i = 0; i < enclave->num_downstream; i++) { + scheduling_node_t *downstream = rti_common->scheduling_nodes[enclave->downstream[i]]; + // Notify downstream enclave if appropriate. + notify_advance_grant_if_safe(downstream); + bool *visited = (bool *)calloc(rti_common->number_of_scheduling_nodes, sizeof(bool)); // Initializes to 0. + // Notify scheduling_nodes downstream of downstream if appropriate. + notify_downstream_advance_grant_if_safe(downstream, visited); + free(visited); + } + + lf_mutex_unlock(rti_common->mutex); +} + +tag_t earliest_future_incoming_message_tag(scheduling_node_t* e) { + // First, we need to find the shortest path (minimum delay) path to each upstream node + // and then find the minimum of the node's recorded NET plus the minimum path delay. + // Update the shortest paths, if necessary. + update_min_delays_upstream(e); + + // Next, find the tag of the earliest possible incoming message from upstream enclaves or + // federates, which will be the smallest upstream NET plus the least delay. + // This could be NEVER_TAG if the RTI has not seen a NET from some upstream node. + tag_t t_d = FOREVER_TAG; + for (int i = 0; i < e->num_min_delays; i++) { + // Node e->min_delays[i].id is upstream of e with min delay e->min_delays[i].min_delay. + scheduling_node_t* upstream = rti_common->scheduling_nodes[e->min_delays[i].id]; + // If we haven't heard from the upstream node, then assume it can send an event at the start time. + if (lf_tag_compare(upstream->next_event, NEVER_TAG) == 0) { + tag_t start_tag = {.time = start_time, .microstep = 0}; + upstream->next_event = start_tag; + } + // The min_delay here is a tag_t, not an interval_t because it may account for more than + // one connection. No delay at all is represented by (0,0). A delay of 0 is represented + // by (0,1). If the time part of the delay is greater than 0, then we want to ignore + // the microstep in upstream->next_event because that microstep will have been lost. + // Otherwise, we want preserve it and add to it. This is handled by lf_tag_add(). + tag_t earliest_tag_from_upstream = lf_tag_add(upstream->next_event, e->min_delays[i].min_delay); + + /* Following debug message is too verbose for normal use: + LF_PRINT_DEBUG("RTI: Earliest next event upstream of fed/encl %d at fed/encl %d has tag " PRINTF_TAG ".", + e->id, + upstream->id, + earliest_tag_from_upstream.time - start_time, earliest_tag_from_upstream.microstep); + */ + if (lf_tag_compare(earliest_tag_from_upstream, t_d) < 0) { + t_d = earliest_tag_from_upstream; + } + } + return t_d; +} + +tag_t eimt_strict(scheduling_node_t* e) { + // Find the tag of the earliest possible incoming message from immediately upstream + // enclaves or federates that are not part of a zero-delay cycle. + // This will be the smallest upstream NET plus the least delay. + // This could be NEVER_TAG if the RTI has not seen a NET from some upstream node. + tag_t t_d = FOREVER_TAG; + for (int i = 0; i < e->num_upstream; i++) { + scheduling_node_t* upstream = rti_common->scheduling_nodes[e->upstream[i]]; + // Skip this node if it is part of a zero-delay cycle. + if (is_in_zero_delay_cycle(upstream)) continue; + // If we haven't heard from the upstream node, then assume it can send an event at the start time. + if (lf_tag_compare(upstream->next_event, NEVER_TAG) == 0) { + tag_t start_tag = {.time = start_time, .microstep = 0}; + upstream->next_event = start_tag; + } + // Need to consider nodes that are upstream of the upstream node because those + // nodes may send messages to the upstream node. + tag_t earliest = earliest_future_incoming_message_tag(upstream); + // If the next event of the upstream node is earlier, then use that. + if (lf_tag_compare(upstream->next_event, earliest) < 0) { + earliest = upstream->next_event; + } + tag_t earliest_tag_from_upstream = lf_delay_tag(earliest, e->upstream_delay[i]); + LF_PRINT_DEBUG("RTI: Strict EIMT of fed/encl %d at fed/encl %d has tag " PRINTF_TAG ".", + e->id, + upstream->id, + earliest_tag_from_upstream.time - start_time, earliest_tag_from_upstream.microstep); + if (lf_tag_compare(earliest_tag_from_upstream, t_d) < 0) { + t_d = earliest_tag_from_upstream; + } + } + return t_d; +} + +tag_advance_grant_t tag_advance_grant_if_safe(scheduling_node_t* e) { + tag_advance_grant_t result = {.tag = NEVER_TAG, .is_provisional = false}; + + // Find the earliest LTC of upstream scheduling_nodes (M). + tag_t min_upstream_completed = FOREVER_TAG; + + for (int j = 0; j < e->num_upstream; j++) { + scheduling_node_t *upstream = rti_common->scheduling_nodes[e->upstream[j]]; + + // Ignore this enclave/federate if it is not connected. + if (upstream->state == NOT_CONNECTED) continue; + + // Adjust by the "after" delay. + // Note that "no delay" is encoded as NEVER, + // whereas one microstep delay is encoded as 0LL. + tag_t candidate = lf_delay_strict(upstream->completed, e->upstream_delay[j]); + + if (lf_tag_compare(candidate, min_upstream_completed) < 0) { + min_upstream_completed = candidate; + } + } + LF_PRINT_LOG("RTI: Minimum upstream LTC for federate/enclave %d is " PRINTF_TAG + "(adjusted by after delay).", + e->id, + min_upstream_completed.time - start_time, min_upstream_completed.microstep); + if (lf_tag_compare(min_upstream_completed, e->last_granted) > 0 + && lf_tag_compare(min_upstream_completed, e->next_event) >= 0 // The enclave has to advance its tag + ) { + result.tag = min_upstream_completed; + return result; + } + + // Can't make progress based only on upstream LTCs. + // If all (transitive) upstream scheduling_nodes of the enclave + // have earliest event tags such that the + // enclave can now advance its tag, then send it a TAG message. + // Find the tag of the earliest event that may be later received from an upstream enclave + // or federate (which includes any after delays on the connections). + tag_t t_d = earliest_future_incoming_message_tag(e); + // Non-ZDC version of the above. This is a tag that must be strictly greater than + // that of the next granted PTAG. + tag_t t_d_strict = eimt_strict(e); + + LF_PRINT_LOG("RTI: Earliest next event upstream of node %d has tag " PRINTF_TAG ".", + e->id, t_d.time - start_time, t_d.microstep); + + // Given an EIMT (earliest incoming message tag) there are these possible scenarios: + // 1) The EIMT is greater than the NET we want to advance to. Grant a TAG. + // 2) The EIMT is equal to the NET and the strict EIMT is greater than the net + // and the federate is part of a zero-delay cycle (ZDC). Grant a PTAG. + // 3) Otherwise, grant nothing and wait for further updates. + + if ( // Scenario (1) above + lf_tag_compare(t_d, e->next_event) > 0 // EIMT greater than NET + && lf_tag_compare(e->next_event, NEVER_TAG) > 0 // NET is not NEVER_TAG + && lf_tag_compare(t_d, e->last_provisionally_granted) >= 0 // The grant is not redundant + // (equal is important to override any previous + // PTAGs). + && lf_tag_compare(t_d, e->last_granted) > 0 // The grant is not redundant. + ) { + // No upstream node can send events that will be received with a tag less than or equal to + // e->next_event, so it is safe to send a TAG. + LF_PRINT_LOG("RTI: Earliest upstream message time for fed/encl %d is " PRINTF_TAG + "(adjusted by after delay). Granting tag advance (TAG) for " PRINTF_TAG, + e->id, + t_d.time - lf_time_start(), t_d.microstep, + e->next_event.time - lf_time_start(), + e->next_event.microstep); + result.tag = e->next_event; + } else if( // Scenario (2) above + lf_tag_compare(t_d, e->next_event) == 0 // EIMT equal to NET + && is_in_zero_delay_cycle(e) // The node is part of a ZDC + && lf_tag_compare(t_d_strict, e->next_event) > 0 // The strict EIMT is greater than the NET + && lf_tag_compare(t_d, e->last_provisionally_granted) > 0 // The grant is not redundant + && lf_tag_compare(t_d, e->last_granted) > 0 // The grant is not redundant. + ) { + // Some upstream node may send an event that has the same tag as this node's next event, + // so we can only grant a PTAG. + LF_PRINT_LOG("RTI: Earliest upstream message time for fed/encl %d is " PRINTF_TAG + " (adjusted by after delay). Granting provisional tag advance (PTAG) for " PRINTF_TAG, + e->id, + t_d.time - start_time, t_d.microstep, + e->next_event.time - lf_time_start(), + e->next_event.microstep); + result.tag = e->next_event; + result.is_provisional = true; + } + return result; +} + +void notify_downstream_advance_grant_if_safe(scheduling_node_t* e, bool visited[]) { + visited[e->id] = true; + for (int i = 0; i < e->num_downstream; i++) { + scheduling_node_t* downstream = rti_common->scheduling_nodes[e->downstream[i]]; + if (visited[downstream->id]) continue; + notify_advance_grant_if_safe(downstream); + notify_downstream_advance_grant_if_safe(downstream, visited); + } +} + +void update_scheduling_node_next_event_tag_locked(scheduling_node_t* e, tag_t next_event_tag) { + e->next_event = next_event_tag; + + LF_PRINT_DEBUG( + "RTI: Updated the recorded next event tag for federate/enclave %d to " PRINTF_TAG, + e->id, + next_event_tag.time - lf_time_start(), + next_event_tag.microstep + ); + + // Check to see whether we can reply now with a tag advance grant. + // If the enclave has no upstream scheduling_nodes, then it does not wait for + // nor expect a reply. It just proceeds to advance time. + if (e->num_upstream > 0) { + notify_advance_grant_if_safe(e); + } else { + // Even though there was no grant, mark the tag as if there was. + e->last_granted = next_event_tag; + } + // Check downstream scheduling_nodes to see whether they should now be granted a TAG. + // To handle cycles, need to create a boolean array to keep + // track of which downstream scheduling_nodes have been visited. + bool *visited = (bool *)calloc(rti_common->number_of_scheduling_nodes, sizeof(bool)); // Initializes to 0. + notify_downstream_advance_grant_if_safe(e, visited); + free(visited); +} + +void notify_advance_grant_if_safe(scheduling_node_t* e) { + tag_advance_grant_t grant = tag_advance_grant_if_safe(e); + if (lf_tag_compare(grant.tag, NEVER_TAG) != 0) { + if (grant.is_provisional) { + notify_provisional_tag_advance_grant(e, grant.tag); + } else { + notify_tag_advance_grant(e, grant.tag); + } + } +} + +// Local function used recursively to find minimum delays upstream. +// Return in count the number of non-FOREVER_TAG entries in path_delays[]. +static void _update_min_delays_upstream( + scheduling_node_t* end, + scheduling_node_t* intermediate, + tag_t path_delays[], + size_t* count) { + // On first call, intermediate will be NULL, so the path delay is initialized to zero. + tag_t delay_from_intermediate_so_far = ZERO_TAG; + if (intermediate == NULL) { + intermediate = end; + } else { + // Not the first call, so intermediate is upstream of end. + delay_from_intermediate_so_far = path_delays[intermediate->id]; + } + if (intermediate->state == NOT_CONNECTED) { + // Enclave or federate is not connected. + // No point in checking upstream scheduling_nodes. + return; + } + // Check nodes upstream of intermediate (or end on first call). + // NOTE: It would be better to iterate through these sorted by minimum delay, + // but for most programs, the gain might be negligible since there are relatively few + // upstream nodes. + for (int i = 0; i < intermediate->num_upstream; i++) { + // Add connection delay to path delay so far. + tag_t path_delay = lf_delay_tag(delay_from_intermediate_so_far, intermediate->upstream_delay[i]); + // If the path delay is less than the so-far recorded path delay from upstream, update upstream. + if (lf_tag_compare(path_delay, path_delays[intermediate->upstream[i]]) < 0) { + if (path_delays[intermediate->upstream[i]].time == FOREVER) { + // Found a finite path. + *count = *count + 1; + } + path_delays[intermediate->upstream[i]] = path_delay; + // Since the path delay to upstream has changed, recursively update those upstream of it. + // Do not do this, however, if the upstream node is the end node because this means we have + // completed a cycle. + if (end->id != intermediate->upstream[i]) { + _update_min_delays_upstream(end, rti_common->scheduling_nodes[intermediate->upstream[i]], path_delays, count); + } else { + // Found a cycle. + end->flags = end->flags | IS_IN_CYCLE; + // Is it a zero-delay cycle? + if (lf_tag_compare(path_delay, ZERO_TAG) == 0 && intermediate->upstream_delay[i] < 0) { + end->flags = end->flags | IS_IN_ZERO_DELAY_CYCLE; + } else { + // Clear the flag. + end->flags = end->flags & ~IS_IN_ZERO_DELAY_CYCLE; + } + } + } + } +} + +void update_min_delays_upstream(scheduling_node_t* node) { + // Check whether cached result is valid. + if (node->min_delays == NULL) { + + // This is not Dijkstra's algorithm, but rather one optimized for sparse upstream nodes. + // There must be a name for this algorithm. + + // Array of results on the stack: + tag_t path_delays[rti_common->number_of_scheduling_nodes]; + // This will be the number of non-FOREVER entries put into path_delays. + size_t count = 0; + + for (int i = 0; i < rti_common->number_of_scheduling_nodes; i++) { + path_delays[i] = FOREVER_TAG; + } + _update_min_delays_upstream(node, NULL, path_delays, &count); + + // Put the results onto the node's struct. + node->num_min_delays = count; + node->min_delays = (minimum_delay_t*)calloc(count, sizeof(minimum_delay_t)); + LF_PRINT_DEBUG("++++ Node %hu is in ZDC: %d", node->id, is_in_zero_delay_cycle(node)); + int k = 0; + for (int i = 0; i < rti_common->number_of_scheduling_nodes; i++) { + if (lf_tag_compare(path_delays[i], FOREVER_TAG) < 0) { + // Node i is upstream. + if (k >= count) { + lf_print_error_and_exit("Internal error! Count of upstream nodes %zu for node %d is wrong!", count, i); + } + minimum_delay_t min_delay = {.id = i, .min_delay = path_delays[i]}; + node->min_delays[k++] = min_delay; + // N^2 debug statement could be a problem with large benchmarks. + // LF_PRINT_DEBUG("++++ Node %hu is upstream with delay" PRINTF_TAG "\n", i, path_delays[i].time, path_delays[i].microstep); + } + } + } +} + +bool is_in_zero_delay_cycle(scheduling_node_t* node) { + update_min_delays_upstream(node); + return node->flags & IS_IN_ZERO_DELAY_CYCLE; +} + +bool is_in_cycle(scheduling_node_t* node) { + update_min_delays_upstream(node); + return node->flags & IS_IN_CYCLE; +} + +#endif diff --git a/core/federated/RTI/rti_common.h b/core/federated/RTI/rti_common.h new file mode 100644 index 0000000000..770918d5b8 --- /dev/null +++ b/core/federated/RTI/rti_common.h @@ -0,0 +1,289 @@ +/** + * @file + * @author Edward A. Lee (eal@berkeley.edu) + * @author Soroush Bateni (soroush@utdallas.edu) + * @author Erling Jellum (erling.r.jellum@ntnu.no) + * @author Chadlia Jerad (chadlia.jerad@ensi-uma.tn) + * @copyright (c) 2020-2023, The University of California at Berkeley + * License in [BSD 2-clause](https://github.com/lf-lang/reactor-c/blob/main/LICENSE.md) + * @brief Common declarations for runtime infrastructure (RTI) for scheduling enclaves + * and distributed Lingua Franca programs. + */ +#if defined STANDALONE_RTI || defined LF_ENCLAVES +#ifndef RTI_COMMON_H +#define RTI_COMMON_H + +#include // Defines perror(), errno +#include +#include "platform.h" // Platform-specific types and functions +#include "util.h" // Defines print functions (e.g., lf_print). +#include "tag.h" // Time-related types and functions. +#include "trace.h" // Tracing related functions + +/** Mode of execution of a federate. */ +typedef enum execution_mode_t { + FAST, + REALTIME +} execution_mode_t; + +/** State of the scheduling node during execution. */ +typedef enum scheduling_node_state_t { + NOT_CONNECTED, // The scheduling node has not connected. + GRANTED, // Most recent MSG_TYPE_NEXT_EVENT_TAG has been granted. + PENDING // Waiting for upstream scheduling nodes. +} scheduling_node_state_t; + +/** Struct for minimum delays from upstream nodes. */ +typedef struct minimum_delay_t { + int id; // ID of the upstream node. + tag_t min_delay; // Minimum delay from upstream. +} minimum_delay_t; + +/** + * Information about the scheduling nodes coordinated by the RTI. + * The abstract scheduling node could either be an enclave or a federate. + * The information includes its runtime state, + * mode of execution, and connectivity with other scheduling nodes. + * The list of upstream and downstream scheduling nodes does not include + * those that are connected via a "physical" connection (one + * denoted with ~>) because those connections do not impose + * any scheduling constraints. + */ +typedef struct scheduling_node_t { + uint16_t id; // ID of this scheduling node. + tag_t completed; // The largest logical tag completed by the scheduling node + // (or NEVER if no LTC has been received). + tag_t last_granted; // The maximum TAG that has been granted so far (or NEVER if none granted) + tag_t last_provisionally_granted; // The maximum PTAG that has been provisionally granted (or NEVER if none granted) + tag_t next_event; // Most recent NET received from the scheduling node (or NEVER if none received). + scheduling_node_state_t state; // State of the scheduling node. + int* upstream; // Array of upstream scheduling node ids. + interval_t* upstream_delay; // Minimum delay on connections from upstream scheduling nodes. + // Here, NEVER encodes no delay. 0LL is a microstep delay. + int num_upstream; // Size of the array of upstream scheduling nodes and delays. + int* downstream; // Array of downstream scheduling node ids. + int num_downstream; // Size of the array of downstream scheduling nodes. + execution_mode_t mode; // FAST or REALTIME. + minimum_delay_t* min_delays; // Array of minimum delays from upstream nodes, not including this node. + size_t num_min_delays; // Size of min_delays array. + int flags; // Or of IS_IN_ZERO_DELAY_CYCLE, IS_IN_CYCLE +} scheduling_node_t; + +/** + * Data structure which is common to both the remote standalone RTI and the local RTI used in enclaved execution. + * rti_remote_t and rti_local_t will "inherit" from this data structure. The first field is an array of pointers + * to scheduling nodes. These will be scheduling nodes for the local RTI and federates for the remote RTI + */ +typedef struct rti_common_t { + // The scheduling nodes. + scheduling_node_t **scheduling_nodes; + + // Number of scheduling nodes + int32_t number_of_scheduling_nodes; + + // RTI's decided stop tag for the scheduling nodes + tag_t max_stop_tag; + + // Number of scheduling nodes handling stop + int num_scheduling_nodes_handling_stop; + + // Boolean indicating that tracing is enabled. + bool tracing_enabled; + + // Pointer to a tracing object + trace_t* trace; + + // The RTI mutex for making thread-safe access to the shared state. + lf_mutex_t* mutex; +} rti_common_t; + +typedef struct { + tag_t tag; // NEVER if there is no tag advance grant. + bool is_provisional; // True for PTAG, false for TAG. +} tag_advance_grant_t; + +/** + * @brief Initialize the fields of the rti_common struct. It also stores + * the pointer to the struct and uses it internally. + * + * @param The rti_common_t struct to initialize. + */ +void initialize_rti_common(rti_common_t * rti_common); + +/** + * @brief Update the completed tag for the specified node. + * + * This checks whether any downstream nodes become eligible to receive TAG + * or PTAG, and sends those signals if appropriate. + * + * The function is prepended with an underscore because a function called + * `logical_tag_complete` is code-generated by the compiler. + * + * @param e The scheduling node. + * @param completed The completed tag of the scheduling node. + */ +void _logical_tag_complete(scheduling_node_t* e, tag_t completed); + +/** + * Initialize the scheduling node with the specified ID. + * + * @param e The scheduling node. + * @param id The scheduling node ID. + */ +void initialize_scheduling_node(scheduling_node_t* e, uint16_t id); + +/** + * For all scheduling nodes downstream of the specified node, determine + * whether they should be notified of a TAG or PTAG and notify them if so. + * + * This assumes the caller holds the RTI mutex. + * + * @param e The upstream node. + * @param visited An array of booleans used to determine whether a node has + * been visited (initially all false). + */ +void notify_downstream_advance_grant_if_safe(scheduling_node_t* e, bool visited[]); + +/** + * Notify a tag advance grant (TAG) message to the specified scheduling node. + * Do not notify it if a previously sent PTAG was greater or if a + * previously sent TAG was greater or equal. + * + * This function will keep a record of this TAG in the node's last_granted + * field. + * + * This function assumes that the caller holds the RTI mutex. + * + * @param e The scheduling node. + * @param tag The tag to grant. + */ +void notify_tag_advance_grant(scheduling_node_t* e, tag_t tag); + +/** + * @brief Either send to a federate or unblock an enclave to give it a tag. + * This function requires two different implementations, one for enclaves + * and one for federates. + * + * This assumes the caller holds the RTI mutex. + * + * @param e The scheduling node. + */ +void notify_advance_grant_if_safe(scheduling_node_t* e); + +/** + * Notify a provisional tag advance grant (PTAG) message to the specified scheduling node. + * Do not notify it if a previously sent PTAG or TAG was greater or equal. + * + * This function will keep a record of this PTAG in the node's last_provisionally_granted + * field. + * + * This function assumes that the caller holds the RTI mutex. + * + * @param e The scheduling node. + * @param tag The tag to grant. + */ +void notify_provisional_tag_advance_grant(scheduling_node_t* e, tag_t tag); + +/** + * Determine whether the specified scheduling node is eligible for a tag advance grant, + * (TAG) and, if so, return the details. This is called upon receiving a LTC, NET + * or resign from an upstream node. + * + * This function calculates the minimum M over + * all upstream scheduling nodes of the "after" delay plus the most recently + * received LTC from that node. If M is greater than the + * most recent TAG to e or greater than or equal to the most + * recent PTAG, then return TAG(M). + * + * If the above conditions do not result in returning a TAG, then find the + * minimum M of the earliest possible future message from upstream federates. + * This is calculated by transitively looking at the most recently received + * NET calls from upstream scheduling nodes. + * If M is greater than the NET of e or the most recent PTAG to e, then + * return a TAG with tag equal to the NET of e or the PTAG. + * If M is equal to the NET of the federate, then return PTAG(M). + * + * This should be called whenever an immediately upstream federate sends to + * the RTI an LTC (latest tag complete), or when a transitive upstream + * federate sends a NET (Next Event Tag) message. + * It is also called when an upstream federate resigns from the federation. + * + * This function assumes that the caller holds the RTI mutex. + * + * @param e The scheduling node. + * @return If granted, return the tag value and whether it is provisional. + * Otherwise, return the NEVER_TAG. + */ +tag_advance_grant_t tag_advance_grant_if_safe(scheduling_node_t* e); + +/** + * @brief Update the next event tag of an scheduling node. + * + * This will notify downstream scheduling nodes with a TAG or PTAG if appropriate. + * + * This function assumes that the caller is holding the RTI mutex. + * + * @param e The scheduling node. + * @param next_event_tag The next event tag for e. + */ +void update_scheduling_node_next_event_tag_locked(scheduling_node_t* e, tag_t next_event_tag); + +/** + * Given a node (enclave or federate), find the tag of the earliest possible incoming + * message (EIMT) from upstream enclaves or federates, which will be the smallest upstream NET + * plus the least delay. This could be NEVER_TAG if the RTI has not seen a NET from some + * upstream node. + * @param e The target node. + * @return The earliest possible incoming message tag. + */ +tag_t earliest_future_incoming_message_tag(scheduling_node_t* e); + +/** + * Given a node (enclave or federate), find the earliest incoming message tag (EIMT) from + * any immediately upstream node that is not part of zero-delay cycle (ZDC). + * These tags are treated strictly by the RTI when deciding whether to grant a PTAG. + * Since the upstream node is not part of a ZDC, there is no need to block on the input + * from that node since we can simply wait for it to complete its tag without chance of + * introducing a deadlock. This will return FOREVER_TAG if there are no non-ZDC upstream nodes. + * @param e The target node. + * @return The earliest possible incoming message tag from a non-ZDC upstream node. + */ +tag_t eimt_strict(scheduling_node_t* e); + +/** + * Return true if the node is in a zero-delay cycle. + * @param node The node. + */ +bool is_in_zero_delay_cycle(scheduling_node_t* node); + +/** + * Return true if the node is in a cycle (possibly a zero-delay cycle). + * @param node The node. + */ +bool is_in_cycle(scheduling_node_t* node); + +/** + * For the given scheduling node (enclave or federate), if necessary, update the `min_delays`, + * `num_min_delays`, and the fields that indicate cycles. These fields will be + * updated only if they have not been previously updated or if invalidate_min_delays_upstream + * has been called since they were last updated. + * @param node The node. + */ +void update_min_delays_upstream(scheduling_node_t* node); + +/** + * For the given scheduling node (enclave or federate), invalidate the `min_delays`, + * `num_min_delays`, and the fields that indicate cycles. + * This should be called whenever the structure of the connections upstream of the + * given node have changed. + * @param node The node. + */ +void invalidate_min_delays_upstream(scheduling_node_t* node); + +/** + * Free dynamically allocated memory on the scheduling nodes and the scheduling node array itself. + */ +void free_scheduling_nodes(scheduling_node_t** scheduling_nodes, uint16_t number_of_scheduling_nodes); + +#endif // RTI_COMMON_H +#endif // STANDALONE_RTI || LF_ENCLAVES diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c deleted file mode 100644 index 4cab7bc15a..0000000000 --- a/core/federated/RTI/rti_lib.c +++ /dev/null @@ -1,1800 +0,0 @@ -/** - * @file - * @author Edward A. Lee - * @author Soroush Bateni - * @copyright (c) 2020-2023, The University of California at Berkeley - * License in [BSD 2-clause](https://github.com/lf-lang/reactor-c/blob/main/LICENSE.md) - * @brief Runtime infrastructure (RTI) for distributed Lingua Franca programs. - * - * This implementation creates one thread per federate so as to be able - * to take advantage of multiple cores. It may be more efficient, however, - * to use select() instead to read from the multiple socket connections - * to each federate. - * - * This implementation sends messages in little endian order - * because Intel, RISC V, and Arm processors are little endian. - * This is not what is normally considered "network order", - * but we control both ends, and hence, for commonly used - * processors, this will be more efficient since it won't have - * to swap bytes. - * - * This implementation of the RTI should be considered a reference - * implementation. In the future it might be re-implemented in Java or Kotlin. - * Or we could bootstrap and implement it using Lingua Franca. - */ - -#include "rti_lib.h" -#include - -// Global variables defined in tag.c: -extern instant_t start_time; - -/** - * Reference to federate_rti_t instance. - */ -federation_rti_t *_f_rti; - -lf_mutex_t rti_mutex; -lf_cond_t received_start_times; -lf_cond_t sent_start_time; - -extern int lf_critical_section_enter(environment_t* env) { - return lf_mutex_lock(&rti_mutex); -} - -extern int lf_critical_section_exit(environment_t* env) { - return lf_mutex_unlock(&rti_mutex); -} - -int create_server(int32_t specified_port, uint16_t port, socket_type_t socket_type) { - // Timeout time for the communications of the server - struct timeval timeout_time = {.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; - // Create an IPv4 socket for TCP (not UDP) communication over IP (0). - int socket_descriptor = -1; - if (socket_type == TCP) { - socket_descriptor = create_real_time_tcp_socket_errexit(); - } else if (socket_type == UDP) { - socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - // Set the appropriate timeout time - timeout_time = (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; - } - if (socket_descriptor < 0) { - lf_print_error_and_exit("Failed to create RTI socket."); - } - - // Set the option for this socket to reuse the same address - int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, &true_variable, sizeof(int32_t)) < 0) { - lf_print_error("RTI failed to set SO_REUSEADDR option on the socket: %s.", strerror(errno)); - } - // Set the timeout on the socket so that read and write operations don't block for too long - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout_time, sizeof(timeout_time)) < 0) { - lf_print_error("RTI failed to set SO_RCVTIMEO option on the socket: %s.", strerror(errno)); - } - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_SNDTIMEO, (const char*)&timeout_time, sizeof(timeout_time)) < 0) { - lf_print_error("RTI failed to set SO_SNDTIMEO option on the socket: %s.", strerror(errno)); - } - - /* - * The following used to permit reuse of a port that an RTI has previously - * used that has not been released. We no longer do this, but instead - * increment the port number until an available port is found. - - // SO_REUSEPORT (since Linux 3.9) - // Permits multiple AF_INET or AF_INET6 sockets to be bound to an - // identical socket address. This option must be set on each - // socket (including the first socket) prior to calling bind(2) - // on the socket. To prevent port hijacking, all of the - // processes binding to the same address must have the same - // effective UID. This option can be employed with both TCP and - // UDP sockets. - - int reuse = 1; - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, - (const char*)&reuse, sizeof(reuse)) < 0) { - perror("setsockopt(SO_REUSEADDR) failed"); - } - - #ifdef SO_REUSEPORT - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEPORT, - (const char*)&reuse, sizeof(reuse)) < 0) { - perror("setsockopt(SO_REUSEPORT) failed"); - } - #endif - */ - - // Server file descriptor. - struct sockaddr_in server_fd; - // Zero out the server address structure. - bzero((char *) &server_fd, sizeof(server_fd)); - - server_fd.sin_family = AF_INET; // IPv4 - server_fd.sin_addr.s_addr = INADDR_ANY; // All interfaces, 0.0.0.0. - // Convert the port number from host byte order to network byte order. - server_fd.sin_port = htons(port); - - int result = bind( - socket_descriptor, - (struct sockaddr *) &server_fd, - sizeof(server_fd)); - - // If the binding fails with this port and no particular port was specified - // in the LF program, then try the next few ports in sequence. - while (result != 0 - && specified_port == 0 - && port >= STARTING_PORT - && port <= STARTING_PORT + PORT_RANGE_LIMIT) { - lf_print("RTI failed to get port %d. Trying %d.", port, port + 1); - port++; - server_fd.sin_port = htons(port); - result = bind( - socket_descriptor, - (struct sockaddr *) &server_fd, - sizeof(server_fd)); - } - if (result != 0) { - if (specified_port == 0) { - lf_print_error_and_exit("Failed to bind the RTI socket. Cannot find a usable port. " - "Consider increasing PORT_RANGE_LIMIT in net_common.h."); - } else { - lf_print_error_and_exit("Failed to bind the RTI socket. Specified port is not available. " - "Consider leaving the port unspecified"); - } - } - char* type = "TCP"; - if (socket_type == UDP) { - type = "UDP"; - } - lf_print("RTI using %s port %d for federation %s.", type, port, _f_rti->federation_id); - - if (socket_type == TCP) { - _f_rti->final_port_TCP = port; - // Enable listening for socket connections. - // The second argument is the maximum number of queued socket requests, - // which according to the Mac man page is limited to 128. - listen(socket_descriptor, 128); - } else if (socket_type == UDP) { - _f_rti->final_port_UDP = port; - // No need to listen on the UDP socket - } - - return socket_descriptor; -} - -void notify_tag_advance_grant(enclave_t* e, tag_t tag) { - if (e->state == NOT_CONNECTED - || lf_tag_compare(tag, e->last_granted) <= 0 - || lf_tag_compare(tag, e->last_provisionally_granted) < 0 - ) { - return; - } - // Need to make sure that the destination federate's thread has already - // sent the starting MSG_TYPE_TIMESTAMP message. - while (e->state == PENDING) { - // Need to wait here. - lf_cond_wait(&sent_start_time); - } - size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); - unsigned char buffer[message_length]; - buffer[0] = MSG_TYPE_TAG_ADVANCE_GRANT; - encode_int64(tag.time, &(buffer[1])); - encode_int32((int32_t)tag.microstep, &(buffer[1 + sizeof(int64_t)])); - - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_TAG, e->id, &tag); - } - // This function is called in notify_advance_grant_if_safe(), which is a long - // function. During this call, the socket might close, causing the following write_to_socket - // to fail. Consider a failure here a soft failure and update the federate's status. - ssize_t bytes_written = write_to_socket(((federate_t*)e)->socket, message_length, buffer); - if (bytes_written < (ssize_t)message_length) { - lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); - if (bytes_written < 0) { - e->state = NOT_CONNECTED; - // FIXME: We need better error handling, but don't stop other execution here. - } - } else { - e->last_granted = tag; - LF_PRINT_LOG("RTI sent to federate %d the tag advance grant (TAG) " PRINTF_TAG ".", - e->id, tag.time - start_time, tag.microstep); - } -} - -void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag) { - if (e->state == NOT_CONNECTED - || lf_tag_compare(tag, e->last_granted) <= 0 - || lf_tag_compare(tag, e->last_provisionally_granted) <= 0 - ) { - return; - } - // Need to make sure that the destination federate's thread has already - // sent the starting MSG_TYPE_TIMESTAMP message. - while (e->state == PENDING) { - // Need to wait here. - lf_cond_wait(&sent_start_time); - } - size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); - unsigned char buffer[message_length]; - buffer[0] = MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT; - encode_int64(tag.time, &(buffer[1])); - encode_int32((int32_t)tag.microstep, &(buffer[1 + sizeof(int64_t)])); - - if (_f_rti->tracing_enabled){ - tracepoint_rti_to_federate(_f_rti->trace, send_PTAG, e->id, &tag); - } - // This function is called in notify_advance_grant_if_safe(), which is a long - // function. During this call, the socket might close, causing the following write_to_socket - // to fail. Consider a failure here a soft failure and update the federate's status. - ssize_t bytes_written = write_to_socket(((federate_t*)e)->socket, message_length, buffer); - - if (bytes_written < (ssize_t)message_length) { - lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); - if (bytes_written < 0) { - e->state = NOT_CONNECTED; - // FIXME: We need better error handling, but don't stop other execution here. - } - } else { - e->last_provisionally_granted = tag; - LF_PRINT_LOG("RTI sent to federate %d the Provisional Tag Advance Grant (PTAG) " PRINTF_TAG ".", - e->id, tag.time - start_time, tag.microstep); - - // Send PTAG to all upstream federates, if they have not had - // a later or equal PTAG or TAG sent previously and if their transitive - // NET is greater than or equal to the tag. - // NOTE: This could later be replaced with a TNET mechanism once - // we have an available encoding of causality interfaces. - // That might be more efficient. - for (int j = 0; j < e->num_upstream; j++) { - federate_t* upstream = _f_rti->enclaves[e->upstream[j]]; - - // Ignore this federate if it has resigned. - if (upstream->enclave.state == NOT_CONNECTED) continue; - // To handle cycles, need to create a boolean array to keep - // track of which upstream federates have been visited. - bool* visited = (bool*)calloc(_f_rti->number_of_enclaves, sizeof(bool)); // Initializes to 0. - - // Find the (transitive) next event tag upstream. - tag_t upstream_next_event = transitive_next_event( - &(upstream->enclave), upstream->enclave.next_event, visited); - free(visited); - // If these tags are equal, then - // a TAG or PTAG should have already been granted, - // in which case, another will not be sent. But it - // may not have been already granted. - if (lf_tag_compare(upstream_next_event, tag) >= 0) { - notify_provisional_tag_advance_grant(&(upstream->enclave), tag); - } - - } - } -} - -void update_federate_next_event_tag_locked(uint16_t federate_id, tag_t next_event_tag) { - federate_t* fed = _f_rti->enclaves[federate_id]; - tag_t min_in_transit_tag = get_minimum_in_transit_message_tag(fed->in_transit_message_tags); - if (lf_tag_compare( - min_in_transit_tag, - next_event_tag - ) < 0 - ) { - next_event_tag = min_in_transit_tag; - } - update_enclave_next_event_tag_locked(&(fed->enclave), next_event_tag); -} - -void handle_port_absent_message(federate_t* sending_federate, unsigned char* buffer) { - size_t message_size = sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int64_t) + sizeof(uint32_t); - - read_from_socket_errexit(sending_federate->socket, message_size, &(buffer[1]), - " RTI failed to read port absent message from federate %u.", - sending_federate->enclave.id); - - uint16_t reactor_port_id = extract_uint16(&(buffer[1])); - uint16_t federate_id = extract_uint16(&(buffer[1 + sizeof(uint16_t)])); - tag_t tag = extract_tag(&(buffer[1 + 2 * sizeof(uint16_t)])); - - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_PORT_ABS, sending_federate->enclave.id, &tag); - } - - // Need to acquire the mutex lock to ensure that the thread handling - // messages coming from the socket connected to the destination does not - // issue a TAG before this message has been forwarded. - lf_mutex_lock(&rti_mutex); - - // If the destination federate is no longer connected, issue a warning - // and return. - federate_t* fed = (federate_t*) _f_rti->enclaves[federate_id]; - if (fed->enclave.state == NOT_CONNECTED) { - lf_mutex_unlock(&rti_mutex); - lf_print_warning("RTI: Destination federate %d is no longer connected. Dropping message.", - federate_id); - LF_PRINT_LOG("Fed status: next_event (" PRINTF_TIME ", %d), " - "completed (" PRINTF_TIME ", %d), " - "last_granted (" PRINTF_TIME ", %d), " - "last_provisionally_granted (" PRINTF_TIME ", %d).", - fed->enclave.next_event.time - start_time, - fed->enclave.next_event.microstep, - fed->enclave.completed.time - start_time, - fed->enclave.completed.microstep, - fed->enclave.last_granted.time - start_time, - fed->enclave.last_granted.microstep, - fed->enclave.last_provisionally_granted.time - start_time, - fed->enclave.last_provisionally_granted.microstep - ); - return; - } - - LF_PRINT_LOG("RTI forwarding port absent message for port %u to federate %u.", - reactor_port_id, - federate_id); - - // Need to make sure that the destination federate's thread has already - // sent the starting MSG_TYPE_TIMESTAMP message. - while (fed->enclave.state == PENDING) { - // Need to wait here. - lf_cond_wait(&sent_start_time); - } - - // Forward the message. - int destination_socket = fed->socket; - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_PORT_ABS, federate_id, &tag); - } - write_to_socket_errexit(destination_socket, message_size + 1, buffer, - "RTI failed to forward message to federate %d.", federate_id); - - lf_mutex_unlock(&rti_mutex); -} - -void handle_timed_message(federate_t* sending_federate, unsigned char* buffer) { - size_t header_size = 1 + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t) + sizeof(int64_t) + sizeof(uint32_t); - // Read the header, minus the first byte which has already been read. - read_from_socket_errexit(sending_federate->socket, header_size - 1, &(buffer[1]), "RTI failed to read the timed message header from remote federate."); - // Extract the header information. of the sender - uint16_t reactor_port_id; - uint16_t federate_id; - size_t length; - tag_t intended_tag; - // Extract information from the header. - extract_timed_header(&(buffer[1]), &reactor_port_id, &federate_id, &length, &intended_tag); - - size_t total_bytes_to_read = length + header_size; - size_t bytes_to_read = length; - - if (FED_COM_BUFFER_SIZE < header_size + 1) { - lf_print_error_and_exit("Buffer size (%d) is not large enough to " - "read the header plus one byte.", - FED_COM_BUFFER_SIZE); - } - - // Cut up the payload in chunks. - if (bytes_to_read > FED_COM_BUFFER_SIZE - header_size) { - bytes_to_read = FED_COM_BUFFER_SIZE - header_size; - } - - LF_PRINT_LOG("RTI received message from federate %d for federate %u port %u with intended tag " - PRINTF_TAG ". Forwarding.", - sending_federate->enclave.id, federate_id, reactor_port_id, - intended_tag.time - lf_time_start(), intended_tag.microstep); - - read_from_socket_errexit(sending_federate->socket, bytes_to_read, &(buffer[header_size]), - "RTI failed to read timed message from federate %d.", federate_id); - size_t bytes_read = bytes_to_read + header_size; - // Following only works for string messages. - // LF_PRINT_DEBUG("Message received by RTI: %s.", buffer + header_size); - - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_TAGGED_MSG, sending_federate->enclave.id, &intended_tag); - } - - // Need to acquire the mutex lock to ensure that the thread handling - // messages coming from the socket connected to the destination does not - // issue a TAG before this message has been forwarded. - lf_mutex_lock(&rti_mutex); - - // If the destination federate is no longer connected, issue a warning - // and return. - federate_t *fed = _f_rti->enclaves[federate_id]; - if (fed->enclave.state == NOT_CONNECTED) { - lf_mutex_unlock(&rti_mutex); - lf_print_warning("RTI: Destination federate %d is no longer connected. Dropping message.", - federate_id); - LF_PRINT_LOG("Fed status: next_event (" PRINTF_TIME ", %d), " - "completed (" PRINTF_TIME ", %d), " - "last_granted (" PRINTF_TIME ", %d), " - "last_provisionally_granted (" PRINTF_TIME ", %d).", - fed->enclave.next_event.time - start_time, - fed->enclave.next_event.microstep, - fed->enclave.completed.time - start_time, - fed->enclave.completed.microstep, - fed->enclave.last_granted.time - start_time, - fed->enclave.last_granted.microstep, - fed->enclave.last_provisionally_granted.time - start_time, - fed->enclave.last_provisionally_granted.microstep - ); - return; - } - - // Forward the message or message chunk. - int destination_socket = fed->socket; - - LF_PRINT_DEBUG( - "RTI forwarding message to port %d of federate %hu of length %zu.", - reactor_port_id, - federate_id, - length - ); - - // Record this in-transit message in federate's in-transit message queue. - if (lf_tag_compare(fed->enclave.completed, intended_tag) < 0) { - // Add a record of this message to the list of in-transit messages to this federate. - add_in_transit_message_record( - fed->in_transit_message_tags, - intended_tag - ); - LF_PRINT_DEBUG( - "RTI: Adding a message with tag " PRINTF_TAG " to the list of in-transit messages for federate %d.", - intended_tag.time - lf_time_start(), - intended_tag.microstep, - federate_id - ); - } else { - lf_print_error( - "RTI: Federate %d has already completed tag " PRINTF_TAG - ", but there is an in-transit message with tag " PRINTF_TAG " from federate %hu. " - "This is going to cause an STP violation under centralized coordination.", - federate_id, - fed->enclave.completed.time - lf_time_start(), - fed->enclave.completed.microstep, - intended_tag.time - lf_time_start(), - intended_tag.microstep, - sending_federate->enclave.id - ); - // FIXME: Drop the federate? - } - - // Need to make sure that the destination federate's thread has already - // sent the starting MSG_TYPE_TIMESTAMP message. - while (fed->enclave.state == PENDING) { - // Need to wait here. - lf_cond_wait(&sent_start_time); - } - - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_TAGGED_MSG, federate_id, &intended_tag); - } - - write_to_socket_errexit(destination_socket, bytes_read, buffer, - "RTI failed to forward message to federate %d.", federate_id); - - // The message length may be longer than the buffer, - // in which case we have to handle it in chunks. - size_t total_bytes_read = bytes_read; - while (total_bytes_read < total_bytes_to_read) { - LF_PRINT_DEBUG("Forwarding message in chunks."); - bytes_to_read = total_bytes_to_read - total_bytes_read; - if (bytes_to_read > FED_COM_BUFFER_SIZE) { - bytes_to_read = FED_COM_BUFFER_SIZE; - } - read_from_socket_errexit(sending_federate->socket, bytes_to_read, buffer, - "RTI failed to read message chunks."); - total_bytes_read += bytes_to_read; - - // FIXME: a mutex needs to be held for this so that other threads - // do not write to destination_socket and cause interleaving. However, - // holding the rti_mutex might be very expensive. Instead, each outgoing - // socket should probably have its own mutex. - write_to_socket_errexit(destination_socket, bytes_to_read, buffer, - "RTI failed to send message chunks."); - } - - update_federate_next_event_tag_locked(federate_id, intended_tag); - - lf_mutex_unlock(&rti_mutex); -} - -void handle_logical_tag_complete(federate_t* fed) { - unsigned char buffer[sizeof(int64_t) + sizeof(uint32_t)]; - read_from_socket_errexit(fed->socket, sizeof(int64_t) + sizeof(uint32_t), buffer, - "RTI failed to read the content of the logical tag complete from federate %d.", fed->enclave.id); - tag_t completed = extract_tag(buffer); - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_LTC, fed->enclave.id, &completed); - } - logical_tag_complete(&(fed->enclave), completed); - - // FIXME: Should this function be in the enclave version? - lf_mutex_lock(&rti_mutex); - // See if we can remove any of the recorded in-transit messages for this. - clean_in_transit_message_record_up_to_tag(fed->in_transit_message_tags, fed->enclave.completed); - lf_mutex_unlock(&rti_mutex); -} - -void handle_next_event_tag(federate_t* fed) { - unsigned char buffer[sizeof(int64_t) + sizeof(uint32_t)]; - read_from_socket_errexit(fed->socket, sizeof(int64_t) + sizeof(uint32_t), buffer, - "RTI failed to read the content of the next event tag from federate %d.", fed->enclave.id); - - // Acquire a mutex lock to ensure that this state does not change while a - // message is in transport or being used to determine a TAG. - lf_mutex_lock(&rti_mutex); // FIXME: Instead of using a mutex, - // it might be more efficient to use a - // select() mechanism to read and process - // federates' buffers in an orderly fashion. - - - tag_t intended_tag = extract_tag(buffer); - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_NET, fed->enclave.id, &intended_tag); - } - LF_PRINT_LOG("RTI received from federate %d the Next Event Tag (NET) " PRINTF_TAG, - fed->enclave.id, intended_tag.time - start_time, - intended_tag.microstep); - update_federate_next_event_tag_locked( - fed->enclave.id, - intended_tag - ); - lf_mutex_unlock(&rti_mutex); -} - -/////////////////// STOP functions //////////////////// - -/** - * Boolean used to prevent the RTI from sending the - * MSG_TYPE_STOP_GRANTED message multiple times. - */ -bool _lf_rti_stop_granted_already_sent_to_federates = false; - -/** - * Once the RTI has seen proposed tags from all connected federates, - * it will broadcast a MSG_TYPE_STOP_GRANTED carrying the _RTI.max_stop_tag. - * This function also checks the most recently received NET from - * each federate and resets that be no greater than the _RTI.max_stop_tag. - * - * This function assumes the caller holds the _RTI.rti_mutex lock. - */ -void _lf_rti_broadcast_stop_time_to_federates_locked() { - if (_lf_rti_stop_granted_already_sent_to_federates == true) { - return; - } - // Reply with a stop granted to all federates - unsigned char outgoing_buffer[MSG_TYPE_STOP_GRANTED_LENGTH]; - ENCODE_STOP_GRANTED(outgoing_buffer, _f_rti->max_stop_tag.time, _f_rti->max_stop_tag.microstep); - - // Iterate over federates and send each the message. - for (int i = 0; i < _f_rti->number_of_enclaves; i++) { - federate_t *fed = _f_rti->enclaves[i]; - if (fed->enclave.state == NOT_CONNECTED) { - continue; - } - if (lf_tag_compare(fed->enclave.next_event, _f_rti->max_stop_tag) >= 0) { - // Need the next_event to be no greater than the stop tag. - fed->enclave.next_event = _f_rti->max_stop_tag; - } - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_STOP_GRN, fed->enclave.id, &_f_rti->max_stop_tag); - } - write_to_socket_errexit(fed->socket, MSG_TYPE_STOP_GRANTED_LENGTH, outgoing_buffer, - "RTI failed to send MSG_TYPE_STOP_GRANTED message to federate %d.", fed->enclave.id); - } - - LF_PRINT_LOG("RTI sent to federates MSG_TYPE_STOP_GRANTED with tag (" PRINTF_TIME ", %u).", - _f_rti->max_stop_tag.time - start_time, - _f_rti->max_stop_tag.microstep); - _lf_rti_stop_granted_already_sent_to_federates = true; -} - -void mark_federate_requesting_stop(federate_t* fed) { - if (!fed->requested_stop) { - // Assume that the federate - // has requested stop - _f_rti->num_enclaves_handling_stop++; - fed->requested_stop = true; - } - if (_f_rti->num_enclaves_handling_stop == _f_rti->number_of_enclaves) { - // We now have information about the stop time of all - // federates. - _lf_rti_broadcast_stop_time_to_federates_locked(); - } -} - -void handle_stop_request_message(federate_t* fed) { - LF_PRINT_DEBUG("RTI handling stop_request from federate %d.", fed->enclave.id); - - size_t bytes_to_read = MSG_TYPE_STOP_REQUEST_LENGTH - 1; - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(fed->socket, bytes_to_read, buffer, - "RTI failed to read the MSG_TYPE_STOP_REQUEST payload from federate %d.", fed->enclave.id); - - // Acquire a mutex lock to ensure that this state does change while a - // message is in transport or being used to determine a TAG. - lf_mutex_lock(&rti_mutex); - - // Check whether we have already received a stop_tag - // from this federate - if (fed->requested_stop) { - // Ignore this request - lf_mutex_unlock(&rti_mutex); - return; - } - - // Extract the proposed stop tag for the federate - tag_t proposed_stop_tag = extract_tag(buffer); - - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_STOP_REQ, fed->enclave.id, &proposed_stop_tag); - } - - // Update the maximum stop tag received from federates - if (lf_tag_compare(proposed_stop_tag, _f_rti->max_stop_tag) > 0) { - _f_rti->max_stop_tag = proposed_stop_tag; - } - - LF_PRINT_LOG("RTI received from federate %d a MSG_TYPE_STOP_REQUEST message with tag " PRINTF_TAG ".", - fed->enclave.id, proposed_stop_tag.time - start_time, proposed_stop_tag.microstep); - - // If this federate has not already asked - // for a stop, add it to the tally. - mark_federate_requesting_stop(fed); - - if (_f_rti->num_enclaves_handling_stop == _f_rti->number_of_enclaves) { - // We now have information about the stop time of all - // federates. This is extremely unlikely, but it can occur - // all federates call lf_request_stop() at the same tag. - lf_mutex_unlock(&rti_mutex); - return; - } - // Forward the stop request to all other federates that have not - // also issued a stop request. - unsigned char stop_request_buffer[MSG_TYPE_STOP_REQUEST_LENGTH]; - ENCODE_STOP_REQUEST(stop_request_buffer, _f_rti->max_stop_tag.time, _f_rti->max_stop_tag.microstep); - - // Iterate over federates and send each the MSG_TYPE_STOP_REQUEST message - // if we do not have a stop_time already for them. Do not do this more than once. - if (_f_rti->stop_in_progress) { - lf_mutex_unlock(&rti_mutex); - return; - } - _f_rti->stop_in_progress = true; - for (int i = 0; i < _f_rti->number_of_enclaves; i++) { - federate_t *f = _f_rti->enclaves[i]; - if (f->enclave.id != fed->enclave.id && f->requested_stop == false) { - if (f->enclave.state == NOT_CONNECTED) { - mark_federate_requesting_stop(f); - continue; - } - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_STOP_REQ, f->enclave.id, &_f_rti->max_stop_tag); - } - write_to_socket_errexit(f->socket, MSG_TYPE_STOP_REQUEST_LENGTH, stop_request_buffer, - "RTI failed to forward MSG_TYPE_STOP_REQUEST message to federate %d.", f->enclave.id); - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_STOP_REQ, f->enclave.id, &_f_rti->max_stop_tag); - } - } - } - LF_PRINT_LOG("RTI forwarded to federates MSG_TYPE_STOP_REQUEST with tag (" PRINTF_TIME ", %u).", - _f_rti->max_stop_tag.time - start_time, - _f_rti->max_stop_tag.microstep); - lf_mutex_unlock(&rti_mutex); -} - -void handle_stop_request_reply(federate_t* fed) { - size_t bytes_to_read = MSG_TYPE_STOP_REQUEST_REPLY_LENGTH - 1; - unsigned char buffer_stop_time[bytes_to_read]; - read_from_socket_errexit(fed->socket, bytes_to_read, buffer_stop_time, - "RTI failed to read the reply to MSG_TYPE_STOP_REQUEST message from federate %d.", fed->enclave.id); - - tag_t federate_stop_tag = extract_tag(buffer_stop_time); - - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_STOP_REQ_REP, fed->enclave.id, &federate_stop_tag); - } - - LF_PRINT_LOG("RTI received from federate %d STOP reply tag " PRINTF_TAG ".", fed->enclave.id, - federate_stop_tag.time - start_time, - federate_stop_tag.microstep); - - // Acquire the mutex lock so that we can change the state of the RTI - lf_mutex_lock(&rti_mutex); - // If the federate has not requested stop before, count the reply - if (lf_tag_compare(federate_stop_tag, _f_rti->max_stop_tag) > 0) { - _f_rti->max_stop_tag = federate_stop_tag; - } - mark_federate_requesting_stop(fed); - lf_mutex_unlock(&rti_mutex); -} - -////////////////////////////////////////////////// - -void handle_address_query(uint16_t fed_id) { - federate_t *fed = _f_rti->enclaves[fed_id]; - // Use buffer both for reading and constructing the reply. - // The length is what is needed for the reply. - unsigned char buffer[sizeof(int32_t)]; - ssize_t bytes_read = read_from_socket(fed->socket, sizeof(uint16_t), (unsigned char*)buffer); - if (bytes_read == 0) { - lf_print_error_and_exit("Failed to read address query."); - } - uint16_t remote_fed_id = extract_uint16(buffer); - - if (_f_rti->tracing_enabled){ - tracepoint_rti_from_federate(_f_rti->trace, receive_ADR_QR, fed_id, NULL); - } - - LF_PRINT_DEBUG("RTI received address query from %d for %d.", fed_id, remote_fed_id); - - // NOTE: server_port initializes to -1, which means the RTI does not know - // the port number because it has not yet received an MSG_TYPE_ADDRESS_ADVERTISEMENT message - // from this federate. In that case, it will respond by sending -1. - - // Encode the port number. - federate_t *remote_fed = _f_rti->enclaves[remote_fed_id]; - encode_int32(remote_fed->server_port, (unsigned char*)buffer); - // Send the port number (which could be -1). - write_to_socket_errexit(fed->socket, sizeof(int32_t), (unsigned char*)buffer, - "Failed to write port number to socket of federate %d.", fed_id); - - // Send the server IP address to federate. - write_to_socket_errexit(fed->socket, sizeof(remote_fed->server_ip_addr), - (unsigned char *)&remote_fed->server_ip_addr, - "Failed to write ip address to socket of federate %d.", fed_id); - - if (remote_fed->server_port != -1) { - LF_PRINT_DEBUG("Replied to address query from federate %d with address %s:%d.", - fed_id, remote_fed->server_hostname, remote_fed->server_port); - } -} - -void handle_address_ad(uint16_t federate_id) { - federate_t *fed = _f_rti->enclaves[federate_id]; - // Read the port number of the federate that can be used for physical - // connections to other federates - int32_t server_port = -1; - unsigned char buffer[sizeof(int32_t)]; - ssize_t bytes_read = read_from_socket(fed->socket, sizeof(int32_t), (unsigned char *)buffer); - - if (bytes_read < (ssize_t)sizeof(int32_t)) { - LF_PRINT_DEBUG("Error reading port data from federate %d.", federate_id); - // Leave the server port at -1, which means "I don't know". - return; - } - - server_port = extract_int32(buffer); - - assert(server_port < 65536); - - lf_mutex_lock(&rti_mutex); - fed->server_port = server_port; - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_ADR_AD, federate_id, NULL); - } - LF_PRINT_LOG("Received address advertisement from federate %d.", federate_id); - lf_mutex_unlock(&rti_mutex); -} - -void handle_timestamp(federate_t *my_fed) { - unsigned char buffer[sizeof(int64_t)]; - // Read bytes from the socket. We need 8 bytes. - ssize_t bytes_read = read_from_socket(my_fed->socket, sizeof(int64_t), (unsigned char*)&buffer); - if (bytes_read < (ssize_t)sizeof(int64_t)) { - lf_print_error("ERROR reading timestamp from federate %d.", my_fed->enclave.id); - } - - int64_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(&buffer))); - if (_f_rti->tracing_enabled) { - tag_t tag = {.time = timestamp, .microstep = 0}; - tracepoint_rti_from_federate(_f_rti->trace, receive_TIMESTAMP, my_fed->enclave.id, &tag); - } - LF_PRINT_DEBUG("RTI received timestamp message with time: " PRINTF_TIME ".", timestamp); - - lf_mutex_lock(&rti_mutex); - _f_rti->num_feds_proposed_start++; - if (timestamp > _f_rti->max_start_time) { - _f_rti->max_start_time = timestamp; - } - if (_f_rti->num_feds_proposed_start == _f_rti->number_of_enclaves) { - // All federates have proposed a start time. - lf_cond_broadcast(&received_start_times); - } else { - // Some federates have not yet proposed a start time. - // wait for a notification. - while (_f_rti->num_feds_proposed_start < _f_rti->number_of_enclaves) { - // FIXME: Should have a timeout here? - lf_cond_wait(&received_start_times); - } - } - - lf_mutex_unlock(&rti_mutex); - - // Send back to the federate the maximum time plus an offset on a TIMESTAMP - // message. - unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_LENGTH]; - start_time_buffer[0] = MSG_TYPE_TIMESTAMP; - // Add an offset to this start time to get everyone starting together. - start_time = _f_rti->max_start_time + DELAY_START; - encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); - - if (_f_rti->tracing_enabled) { - tag_t tag = {.time = start_time, .microstep = 0}; - tracepoint_rti_to_federate(_f_rti->trace, send_TIMESTAMP, my_fed->enclave.id, &tag); - } - ssize_t bytes_written = write_to_socket( - my_fed->socket, MSG_TYPE_TIMESTAMP_LENGTH, - start_time_buffer - ); - if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { - lf_print_error("Failed to send the starting time to federate %d.", my_fed->enclave.id); - } - - lf_mutex_lock(&rti_mutex); - // Update state for the federate to indicate that the MSG_TYPE_TIMESTAMP - // message has been sent. That MSG_TYPE_TIMESTAMP message grants time advance to - // the federate to the start time. - my_fed->enclave.state = GRANTED; - lf_cond_broadcast(&sent_start_time); - LF_PRINT_LOG("RTI sent start time " PRINTF_TIME " to federate %d.", start_time, my_fed->enclave.id); - lf_mutex_unlock(&rti_mutex); -} - -void send_physical_clock(unsigned char message_type, federate_t* fed, socket_type_t socket_type) { - if (fed->enclave.state == NOT_CONNECTED) { - lf_print_warning("Clock sync: RTI failed to send physical time to federate %d. Socket not connected.", - fed->enclave.id); - return; - } - unsigned char buffer[sizeof(int64_t) + 1]; - buffer[0] = message_type; - int64_t current_physical_time = lf_time_physical(); - encode_int64(current_physical_time, &(buffer[1])); - - // Send the message - if (socket_type == UDP) { - // FIXME: UDP_addr is never initialized. - LF_PRINT_DEBUG("Clock sync: RTI sending UDP message type %u.", buffer[0]); - ssize_t bytes_written = sendto(_f_rti->socket_descriptor_UDP, buffer, 1 + sizeof(int64_t), 0, - (struct sockaddr*)&fed->UDP_addr, sizeof(fed->UDP_addr)); - if (bytes_written < (ssize_t)sizeof(int64_t) + 1) { - lf_print_warning("Clock sync: RTI failed to send physical time to federate %d: %s", - fed->enclave.id, - strerror(errno)); - return; - } - } else if (socket_type == TCP) { - LF_PRINT_DEBUG("Clock sync: RTI sending TCP message type %u.", buffer[0]); - write_to_socket_errexit(fed->socket, 1 + sizeof(int64_t), buffer, - "Clock sync: RTI failed to send physical time to federate %d: %s.", - fed->enclave.id, - strerror(errno)); - } - LF_PRINT_DEBUG("Clock sync: RTI sent PHYSICAL_TIME_SYNC_MESSAGE with timestamp " PRINTF_TIME " to federate %d.", - current_physical_time, - fed->enclave.id); -} - -void handle_physical_clock_sync_message(federate_t* my_fed, socket_type_t socket_type) { - // Lock the mutex to prevent interference between sending the two - // coded probe messages. - lf_mutex_lock(&rti_mutex); - // Reply with a T4 type message - send_physical_clock(MSG_TYPE_CLOCK_SYNC_T4, my_fed, socket_type); - // Send the corresponding coded probe immediately after, - // but only if this is a UDP channel. - if (socket_type == UDP) { - send_physical_clock(MSG_TYPE_CLOCK_SYNC_CODED_PROBE, my_fed, socket_type); - } - lf_mutex_unlock(&rti_mutex); -} - -void* clock_synchronization_thread(void* noargs) { - - // Wait until all federates have been notified of the start time. - // FIXME: Use lf_ version of this when merged with master. - lf_mutex_lock(&rti_mutex); - while (_f_rti->num_feds_proposed_start < _f_rti->number_of_enclaves) { - lf_cond_wait(&received_start_times); - } - lf_mutex_unlock(&rti_mutex); - - // Wait until the start time before starting clock synchronization. - // The above wait ensures that start_time has been set. - interval_t ns_to_wait = start_time - lf_time_physical(); - - if (ns_to_wait > 0LL) { - lf_sleep(ns_to_wait); - } - - // Initiate a clock synchronization every _f_rti->clock_sync_period_ns - // Initiate a clock synchronization every _f_rti->clock_sync_period_ns - struct timespec sleep_time = {(time_t) _f_rti->clock_sync_period_ns / BILLION, - _f_rti->clock_sync_period_ns % BILLION}; - struct timespec remaining_time; - - bool any_federates_connected = true; - while (any_federates_connected) { - // Sleep - lf_sleep(_f_rti->clock_sync_period_ns); // Can be interrupted - any_federates_connected = false; - for (int fed_id = 0; fed_id < _f_rti->number_of_enclaves; fed_id++) { - federate_t* fed = _f_rti->enclaves[fed_id]; - if (fed->enclave.state == NOT_CONNECTED) { - // FIXME: We need better error handling here, but clock sync failure - // should not stop execution. - lf_print_error("Clock sync failed with federate %d. Not connected.", fed_id); - continue; - } else if (!fed->clock_synchronization_enabled) { - continue; - } - // Send the RTI's current physical time to the federate - // Send on UDP. - LF_PRINT_DEBUG("RTI sending T1 message to initiate clock sync round."); - send_physical_clock(MSG_TYPE_CLOCK_SYNC_T1, fed, UDP); - - // Listen for reply message, which should be T3. - size_t message_size = 1 + sizeof(int32_t); - unsigned char buffer[message_size]; - // Maximum number of messages that we discard before giving up on this cycle. - // If the T3 message from this federate does not arrive and we keep receiving - // other messages, then give up on this federate and move to the next federate. - int remaining_attempts = 5; - while (remaining_attempts > 0) { - remaining_attempts--; - int bytes_read = read_from_socket(_f_rti->socket_descriptor_UDP, message_size, buffer); - // If any errors occur, either discard the message or the clock sync round. - if (bytes_read == message_size) { - if (buffer[0] == MSG_TYPE_CLOCK_SYNC_T3) { - int32_t fed_id_2 = extract_int32(&(buffer[1])); - // Check that this message came from the correct federate. - if (fed_id_2 != fed->enclave.id) { - // Message is from the wrong federate. Discard the message. - lf_print_warning("Clock sync: Received T3 message from federate %d, " - "but expected one from %d. Discarding message.", - fed_id_2, fed->enclave.id); - continue; - } - LF_PRINT_DEBUG("Clock sync: RTI received T3 message from federate %d.", fed_id_2); - handle_physical_clock_sync_message(_f_rti->enclaves[fed_id_2], UDP); - break; - } else { - // The message is not a T3 message. Discard the message and - // continue waiting for the T3 message. This is possibly a message - // from a previous cycle that was discarded. - lf_print_warning("Clock sync: Unexpected UDP message %u. Expected %u from federate %d. " - "Discarding message.", - buffer[0], - MSG_TYPE_CLOCK_SYNC_T3, - fed->enclave.id); - continue; - } - } else { - lf_print_warning("Clock sync: Read from UDP socket failed: %s. " - "Skipping clock sync round for federate %d.", - strerror(errno), - fed->enclave.id); - remaining_attempts = -1; - } - } - if (remaining_attempts > 0) { - any_federates_connected = true; - } - } - } - return NULL; -} - -void handle_federate_resign(federate_t *my_fed) { - // Nothing more to do. Close the socket and exit. - lf_mutex_lock(&rti_mutex); - if (_f_rti->tracing_enabled) { - // Extract the tag, for tracing purposes - size_t header_size = 1 + sizeof(tag_t); - unsigned char buffer[header_size]; - // Read the header, minus the first byte which has already been read. - read_from_socket_errexit(my_fed->socket, header_size - 1, &(buffer[1]), - "RTI failed to read the timed message header from remote federate."); - // Extract the tag sent by the resigning federate - tag_t tag = extract_tag(&(buffer[1])); - tracepoint_rti_from_federate(_f_rti->trace, receive_RESIGN, my_fed->enclave.id, &tag); - } - - my_fed->enclave.state = NOT_CONNECTED; - - // Indicate that there will no further events from this federate. - my_fed->enclave.next_event = FOREVER_TAG; - - // According to this: https://stackoverflow.com/questions/4160347/close-vs-shutdown-socket, - // the close should happen when receiving a 0 length message from the other end. - // Here, we just signal the other side that no further writes to the socket are - // forthcoming, which should result in the other end getting a zero-length reception. - shutdown(my_fed->socket, SHUT_WR); - // Do not close because this results in an error on the other side rather than - // an orderly shutdown. - // close(my_fed->socket); // from unistd.h - - lf_print("Federate %d has resigned.", my_fed->enclave.id); - - // Check downstream federates to see whether they should now be granted a TAG. - // To handle cycles, need to create a boolean array to keep - // track of which upstream federates have been visited. - bool* visited = (bool*)calloc(_f_rti->number_of_enclaves, sizeof(bool)); // Initializes to 0. - notify_downstream_advance_grant_if_safe(&(my_fed->enclave), visited); - free(visited); - - lf_mutex_unlock(&rti_mutex); -} - -void* federate_thread_TCP(void* fed) { - federate_t* my_fed = (federate_t*)fed; - - // Buffer for incoming messages. - // This does not constrain the message size because messages - // are forwarded piece by piece. - unsigned char buffer[FED_COM_BUFFER_SIZE]; - - // Listen for messages from the federate. - while (my_fed->enclave.state != NOT_CONNECTED) { - // Read no more than one byte to get the message type. - ssize_t bytes_read = read_from_socket(my_fed->socket, 1, buffer); - if (bytes_read < 1) { - // Socket is closed - lf_print_warning("RTI: Socket to federate %d is closed. Exiting the thread.", my_fed->enclave.id); - my_fed->enclave.state = NOT_CONNECTED; - my_fed->socket = -1; - // FIXME: We need better error handling here, but do not stop execution here. - break; - } - LF_PRINT_DEBUG("RTI: Received message type %u from federate %d.", buffer[0], my_fed->enclave.id); - switch(buffer[0]) { - case MSG_TYPE_TIMESTAMP: - handle_timestamp(my_fed); - break; - case MSG_TYPE_ADDRESS_QUERY: - handle_address_query(my_fed->enclave.id); - break; - case MSG_TYPE_ADDRESS_ADVERTISEMENT: - handle_address_ad(my_fed->enclave.id); - break; - case MSG_TYPE_TAGGED_MESSAGE: - handle_timed_message(my_fed, buffer); - break; - case MSG_TYPE_RESIGN: - handle_federate_resign(my_fed); - return NULL; - break; - case MSG_TYPE_NEXT_EVENT_TAG: - handle_next_event_tag(my_fed); - break; - case MSG_TYPE_LOGICAL_TAG_COMPLETE: - handle_logical_tag_complete(my_fed); - break; - case MSG_TYPE_STOP_REQUEST: - handle_stop_request_message(my_fed); // FIXME: Reviewed until here. - // Need to also look at - // notify_advance_grant_if_safe() - // and notify_downstream_advance_grant_if_safe() - break; - case MSG_TYPE_STOP_REQUEST_REPLY: - handle_stop_request_reply(my_fed); - break; - case MSG_TYPE_PORT_ABSENT: - handle_port_absent_message(my_fed, buffer); - break; - default: - lf_print_error("RTI received from federate %d an unrecognized TCP message type: %u.", my_fed->enclave.id, buffer[0]); - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_UNIDENTIFIED, my_fed->enclave.id, NULL); - } - } - } - - // Nothing more to do. Close the socket and exit. - close(my_fed->socket); // from unistd.h - - return NULL; -} - -void send_reject(int socket_id, unsigned char error_code) { - LF_PRINT_DEBUG("RTI sending MSG_TYPE_REJECT."); - unsigned char response[2]; - response[0] = MSG_TYPE_REJECT; - response[1] = error_code; - // NOTE: Ignore errors on this response. - write_to_socket_errexit(socket_id, 2, response, "RTI failed to write MSG_TYPE_REJECT message on the socket."); - // Close the socket. - close(socket_id); -} - -int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* client_fd) { - // Buffer for message ID, federate ID, and federation ID length. - size_t length = 1 + sizeof(uint16_t) + 1; // Message ID, federate ID, length of fedration ID. - unsigned char buffer[length]; - - // Read bytes from the socket. We need 4 bytes. - // FIXME: This should not exit with error but rather should just reject the connection. - read_from_socket_errexit(socket_id, length, buffer, "RTI failed to read from accepted socket."); - - uint16_t fed_id = _f_rti->number_of_enclaves; // Initialize to an invalid value. - - // First byte received is the message type. - if (buffer[0] != MSG_TYPE_FED_IDS) { - if(buffer[0] == MSG_TYPE_P2P_SENDING_FED_ID || buffer[0] == MSG_TYPE_P2P_TAGGED_MESSAGE) { - // The federate is trying to connect to a peer, not to the RTI. - // It has connected to the RTI instead. - // FIXME: This should not happen, but apparently has been observed. - // It should not happen because the peers get the port and IP address - // of the peer they want to connect to from the RTI. - // If the connection is a peer-to-peer connection between two - // federates, reject the connection with the WRONG_SERVER error. - send_reject(socket_id, WRONG_SERVER); - } else { - send_reject(socket_id, UNEXPECTED_MESSAGE); - } - if (_f_rti->tracing_enabled){ - tracepoint_rti_to_federate(_f_rti->trace, send_REJECT, fed_id, NULL); - } - lf_print_error("RTI expected a MSG_TYPE_FED_IDS message. Got %u (see net_common.h).", buffer[0]); - return -1; - } else { - // Received federate ID. - fed_id = extract_uint16(buffer + 1); - LF_PRINT_DEBUG("RTI received federate ID: %d.", fed_id); - - // Read the federation ID. First read the length, which is one byte. - size_t federation_id_length = (size_t)buffer[sizeof(uint16_t) + 1]; - char federation_id_received[federation_id_length + 1]; // One extra for null terminator. - // Next read the actual federation ID. - // FIXME: This should not exit on error, but rather just reject the connection. - read_from_socket_errexit(socket_id, federation_id_length, - (unsigned char*)federation_id_received, - "RTI failed to read federation id from federate %d.", fed_id); - - // Terminate the string with a null. - federation_id_received[federation_id_length] = 0; - - LF_PRINT_DEBUG("RTI received federation ID: %s.", federation_id_received); - - if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_FED_ID, fed_id, NULL); - } - // Compare the received federation ID to mine. - if (strncmp(_f_rti->federation_id, federation_id_received, federation_id_length) != 0) { - // Federation IDs do not match. Send back a MSG_TYPE_REJECT message. - lf_print_error("WARNING: Federate from another federation %s attempted to connect to RTI in federation %s.", - federation_id_received, - _f_rti->federation_id); - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_REJECT, fed_id, NULL); - } - send_reject(socket_id, FEDERATION_ID_DOES_NOT_MATCH); - return -1; - } else { - if (fed_id >= _f_rti->number_of_enclaves) { - // Federate ID is out of range. - lf_print_error("RTI received federate ID %d, which is out of range.", fed_id); - if (_f_rti->tracing_enabled){ - tracepoint_rti_to_federate(_f_rti->trace, send_REJECT, fed_id, NULL); - } - send_reject(socket_id, FEDERATE_ID_OUT_OF_RANGE); - return -1; - } else { - if ((_f_rti->enclaves[fed_id])->enclave.state != NOT_CONNECTED) { - lf_print_error("RTI received duplicate federate ID: %d.", fed_id); - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_REJECT, fed_id, NULL); - } - send_reject(socket_id, FEDERATE_ID_IN_USE); - return -1; - } - } - } - } - federate_t* fed = _f_rti->enclaves[fed_id]; - // The MSG_TYPE_FED_IDS message has the right federation ID. - // Assign the address information for federate. - // The IP address is stored here as an in_addr struct (in .server_ip_addr) that can be useful - // to create sockets and can be efficiently sent over the network. - // First, convert the sockaddr structure into a sockaddr_in that contains an internet address. - struct sockaddr_in* pV4_addr = client_fd; - // Then extract the internet address (which is in IPv4 format) and assign it as the federate's socket server - fed->server_ip_addr = pV4_addr->sin_addr; - -#if LOG_LEVEL >= LOG_LEVEL_DEBUG - // Create the human readable format and copy that into - // the .server_hostname field of the federate. - char str[INET_ADDRSTRLEN]; - inet_ntop( AF_INET, &fed->server_ip_addr, str, INET_ADDRSTRLEN ); - strncpy (fed->server_hostname, str, INET_ADDRSTRLEN); - - LF_PRINT_DEBUG("RTI got address %s from federate %d.", fed->server_hostname, fed_id); -#endif - fed->socket = socket_id; - - // Set the federate's state as pending - // because it is waiting for the start time to be - // sent by the RTI before beginning its execution. - fed->enclave.state = PENDING; - - LF_PRINT_DEBUG("RTI responding with MSG_TYPE_ACK to federate %d.", fed_id); - // Send an MSG_TYPE_ACK message. - unsigned char ack_message = MSG_TYPE_ACK; - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_ACK, fed_id, NULL); - } - write_to_socket_errexit(socket_id, 1, &ack_message, - "RTI failed to write MSG_TYPE_ACK message to federate %d.", fed_id); - - return (int32_t)fed_id; -} - -int receive_connection_information(int socket_id, uint16_t fed_id) { - LF_PRINT_DEBUG("RTI waiting for MSG_TYPE_NEIGHBOR_STRUCTURE from federate %d.", fed_id); - unsigned char connection_info_header[MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE]; - read_from_socket_errexit( - socket_id, - MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE, - connection_info_header, - "RTI failed to read MSG_TYPE_NEIGHBOR_STRUCTURE message header from federate %d.", - fed_id - ); - - if (connection_info_header[0] != MSG_TYPE_NEIGHBOR_STRUCTURE) { - lf_print_error("RTI was expecting a MSG_TYPE_UDP_PORT message from federate %d. Got %u instead. " - "Rejecting federate.", fed_id, connection_info_header[0]); - send_reject(socket_id, UNEXPECTED_MESSAGE); - return 0; - } else { - federate_t* fed = _f_rti->enclaves[fed_id]; - // Read the number of upstream and downstream connections - fed->enclave.num_upstream = extract_int32(&(connection_info_header[1])); - fed->enclave.num_downstream = extract_int32(&(connection_info_header[1 + sizeof(int32_t)])); - LF_PRINT_DEBUG( - "RTI got %d upstreams and %d downstreams from federate %d.", - fed->enclave.num_upstream, - fed->enclave.num_downstream, - fed_id); - - // Allocate memory for the upstream and downstream pointers - fed->enclave.upstream = (int*)malloc(sizeof(uint16_t) * fed->enclave.num_upstream); - fed->enclave.downstream = (int*)malloc(sizeof(uint16_t) * fed->enclave.num_downstream); - - // Allocate memory for the upstream delay pointers - fed->enclave.upstream_delay = - (interval_t*)malloc( - sizeof(interval_t) * fed->enclave.num_upstream - ); - - size_t connections_info_body_size = ((sizeof(uint16_t) + sizeof(int64_t)) * - fed->enclave.num_upstream) + (sizeof(uint16_t) * fed->enclave.num_downstream); - unsigned char* connections_info_body = (unsigned char*)malloc(connections_info_body_size); - read_from_socket_errexit( - socket_id, - connections_info_body_size, - connections_info_body, - "RTI failed to read MSG_TYPE_NEIGHBOR_STRUCTURE message body from federate %d.", - fed_id - ); - - // Keep track of where we are in the buffer - size_t message_head = 0; - // First, read the info about upstream federates - for (int i=0; ienclave.num_upstream; i++) { - fed->enclave.upstream[i] = extract_uint16(&(connections_info_body[message_head])); - message_head += sizeof(uint16_t); - fed->enclave.upstream_delay[i] = extract_int64(&(connections_info_body[message_head])); - message_head += sizeof(int64_t); - } - - // Next, read the info about downstream federates - for (int i=0; ienclave.num_downstream; i++) { - fed->enclave.downstream[i] = extract_uint16(&(connections_info_body[message_head])); - message_head += sizeof(uint16_t); - } - - free(connections_info_body); - return 1; - } -} - -int receive_udp_message_and_set_up_clock_sync(int socket_id, uint16_t fed_id) { - // Read the MSG_TYPE_UDP_PORT message from the federate regardless of the status of - // clock synchronization. This message will tell the RTI whether the federate - // is doing clock synchronization, and if it is, what port to use for UDP. - LF_PRINT_DEBUG("RTI waiting for MSG_TYPE_UDP_PORT from federate %d.", fed_id); - unsigned char response[1 + sizeof(uint16_t)]; - read_from_socket_errexit(socket_id, 1 + sizeof(uint16_t) , response, - "RTI failed to read MSG_TYPE_UDP_PORT message from federate %d.", fed_id); - if (response[0] != MSG_TYPE_UDP_PORT) { - lf_print_error("RTI was expecting a MSG_TYPE_UDP_PORT message from federate %d. Got %u instead. " - "Rejecting federate.", fed_id, response[0]); - send_reject(socket_id, UNEXPECTED_MESSAGE); - return 0; - } else { - federate_t *fed = _f_rti->enclaves[fed_id]; - if (_f_rti->clock_sync_global_status >= clock_sync_init) {// If no initial clock sync, no need perform initial clock sync. - uint16_t federate_UDP_port_number = extract_uint16(&(response[1])); - - LF_PRINT_DEBUG("RTI got MSG_TYPE_UDP_PORT %u from federate %d.", federate_UDP_port_number, fed_id); - - // A port number of UINT16_MAX means initial clock sync should not be performed. - if (federate_UDP_port_number != UINT16_MAX) { - // Perform the initialization clock synchronization with the federate. - // Send the required number of messages for clock synchronization - for (int i=0; i < _f_rti->clock_sync_exchanges_per_interval; i++) { - // Send the RTI's current physical time T1 to the federate. - send_physical_clock(MSG_TYPE_CLOCK_SYNC_T1, fed, TCP); - - // Listen for reply message, which should be T3. - size_t message_size = 1 + sizeof(int32_t); - unsigned char buffer[message_size]; - read_from_socket_errexit(socket_id, message_size, buffer, - "Socket to federate %d unexpectedly closed.", fed_id); - if (buffer[0] == MSG_TYPE_CLOCK_SYNC_T3) { - int32_t fed_id = extract_int32(&(buffer[1])); - assert(fed_id > -1); - assert(fed_id < 65536); - LF_PRINT_DEBUG("RTI received T3 clock sync message from federate %d.", fed_id); - handle_physical_clock_sync_message(fed, TCP); - } else { - lf_print_error("Unexpected message %u from federate %d.", buffer[0], fed_id); - send_reject(socket_id, UNEXPECTED_MESSAGE); - return 0; - } - } - LF_PRINT_DEBUG("RTI finished initial clock synchronization with federate %d.", fed_id); - } - if (_f_rti->clock_sync_global_status >= clock_sync_on) { // If no runtime clock sync, no need to set up the UDP port. - if (federate_UDP_port_number > 0) { - // Initialize the UDP_addr field of the federate struct - fed->UDP_addr.sin_family = AF_INET; - fed->UDP_addr.sin_port = htons(federate_UDP_port_number); - fed->UDP_addr.sin_addr = fed->server_ip_addr; - } - } else { - // Disable clock sync after initial round. - fed->clock_synchronization_enabled = false; - } - } else { // No clock synchronization at all. - // Clock synchronization is universally disabled via the clock-sync command-line parameter - // (-c off was passed to the RTI). - // Note that the federates are still going to send a MSG_TYPE_UDP_PORT message but with a payload (port) of -1. - fed->clock_synchronization_enabled = false; - } - } - return 1; -} - -#ifdef __RTI_AUTH__ -bool authenticate_federate(int socket) { - // Wait for MSG_TYPE_FED_NONCE from federate. - size_t fed_id_length = sizeof(uint16_t); - unsigned char buffer[1 + fed_id_length + NONCE_LENGTH]; - read_from_socket_errexit(socket, 1 + fed_id_length + NONCE_LENGTH, buffer, - "Failed to read MSG_TYPE_FED_NONCE"); - if (buffer[0] != MSG_TYPE_FED_NONCE) { - lf_print_error_and_exit( - "Received unexpected response %u from the FED (see net_common.h).", - buffer[0]); - } - unsigned int hmac_length = SHA256_HMAC_LENGTH; - size_t federation_id_length = strnlen(_f_rti->federation_id, 255); - // HMAC tag is created with MSG_TYPE, federate ID, received federate nonce. - unsigned char mac_buf[1 + fed_id_length + NONCE_LENGTH]; - mac_buf[0] = MSG_TYPE_RTI_RESPONSE; - memcpy(&mac_buf[1], &buffer[1], fed_id_length); - memcpy(&mac_buf[1 + fed_id_length], &buffer[1 + fed_id_length], NONCE_LENGTH); - unsigned char hmac_tag[hmac_length]; - unsigned char * ret = HMAC(EVP_sha256(), _f_rti->federation_id, - federation_id_length, mac_buf, 1 + fed_id_length + NONCE_LENGTH, - hmac_tag, &hmac_length); - if (ret == NULL) { - lf_print_error_and_exit("HMAC construction failed for MSG_TYPE_RTI_RESPONSE."); - } - // Make buffer for message type, RTI's nonce, and HMAC tag. - unsigned char sender[1 + NONCE_LENGTH + hmac_length]; - sender[0] = MSG_TYPE_RTI_RESPONSE; - unsigned char rti_nonce[NONCE_LENGTH]; - RAND_bytes(rti_nonce, NONCE_LENGTH); - memcpy(&sender[1], rti_nonce, NONCE_LENGTH); - memcpy(&sender[1 + NONCE_LENGTH], hmac_tag, hmac_length); - write_to_socket(socket, 1 + NONCE_LENGTH + hmac_length, sender); - - // Wait for MSG_TYPE_FED_RESPONSE - unsigned char received[1 + hmac_length]; - read_from_socket_errexit(socket, 1 + hmac_length, received, - "Failed to read federate response."); - if (received[0] != MSG_TYPE_FED_RESPONSE) { - lf_print_error_and_exit( - "Received unexpected response %u from the federate (see net_common.h).", - received[0]); - return false; - } - // HMAC tag is created with MSG_TYPE_FED_RESPONSE and RTI's nonce. - unsigned char mac_buf2[1 + NONCE_LENGTH]; - mac_buf2[0] = MSG_TYPE_FED_RESPONSE; - memcpy(&mac_buf2[1], rti_nonce, NONCE_LENGTH); - unsigned char rti_tag[hmac_length]; - ret = HMAC(EVP_sha256(), _f_rti->federation_id, federation_id_length, - mac_buf2, 1 + NONCE_LENGTH, rti_tag, &hmac_length); - if (ret == NULL) { - lf_print_error_and_exit("HMAC construction failed for MSG_TYPE_FED_RESPONSE."); - } - // Compare received tag and created tag. - if (memcmp(&received[1], rti_tag, hmac_length) != 0) { - // Federation IDs do not match. Send back a HMAC_DOES_NOT_MATCH message. - lf_print_warning("HMAC authentication failed. Rejecting the federate."); - send_reject(socket, HMAC_DOES_NOT_MATCH); - return false; - } else { - LF_PRINT_LOG("Federate's HMAC verified."); - return true; - } -} -#endif - -void connect_to_federates(int socket_descriptor) { - for (int i = 0; i < _f_rti->number_of_enclaves; i++) { - // Wait for an incoming connection request. - struct sockaddr client_fd; - uint32_t client_length = sizeof(client_fd); - // The following blocks until a federate connects. - int socket_id = -1; - while(1) { - socket_id = accept(_f_rti->socket_descriptor_TCP, &client_fd, &client_length); - if (socket_id >= 0) { - // Got a socket - break; - } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { - lf_print_error_and_exit("RTI failed to accept the socket. %s.", strerror(errno)); - } else { - // Try again - lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); - continue; - } - } - - // Wait for the first message from the federate when RTI -a option is on. - #ifdef __RTI_AUTH__ - if (_f_rti->authentication_enabled) { - if (!authenticate_federate(socket_id)) { - lf_print_warning("RTI failed to authenticate the incoming federate."); - // Ignore the federate that failed authentication. - i--; - continue; - } - } - #endif - - // The first message from the federate should contain its ID and the federation ID. - int32_t fed_id = receive_and_check_fed_id_message(socket_id, (struct sockaddr_in*)&client_fd); - if (fed_id >= 0 - && receive_connection_information(socket_id, (uint16_t)fed_id) - && receive_udp_message_and_set_up_clock_sync(socket_id, (uint16_t)fed_id)) { - - // Create a thread to communicate with the federate. - // This has to be done after clock synchronization is finished - // or that thread may end up attempting to handle incoming clock - // synchronization messages. - federate_t *fed = _f_rti->enclaves[fed_id]; - lf_thread_create(&(fed->thread_id), federate_thread_TCP, fed); - - } else { - // Received message was rejected. Try again. - i--; - } - } - // All federates have connected. - LF_PRINT_DEBUG("All federates have connected to RTI."); - - if (_f_rti->clock_sync_global_status >= clock_sync_on) { - // Create the thread that performs periodic PTP clock synchronization sessions - // over the UDP channel, but only if the UDP channel is open and at least one - // federate is performing runtime clock synchronization. - bool clock_sync_enabled = false; - for (int i = 0; i < _f_rti->number_of_enclaves; i++) { - if ((_f_rti->enclaves[i])->clock_synchronization_enabled) { - clock_sync_enabled = true; - break; - } - } - if (_f_rti->final_port_UDP != UINT16_MAX && clock_sync_enabled) { - lf_thread_create(&_f_rti->clock_thread, clock_synchronization_thread, NULL); - } - } -} - -void* respond_to_erroneous_connections(void* nothing) { - while (true) { - // Wait for an incoming connection request. - struct sockaddr client_fd; - uint32_t client_length = sizeof(client_fd); - // The following will block until either a federate attempts to connect - // or close(_f_rti->socket_descriptor_TCP) is called. - int socket_id = accept(_f_rti->socket_descriptor_TCP, &client_fd, &client_length); - if (socket_id < 0) return NULL; - - if (_f_rti->all_federates_exited) { - return NULL; - } - - lf_print_error("RTI received an unexpected connection request. Federation is running."); - unsigned char response[2]; - response[0] = MSG_TYPE_REJECT; - response[1] = FEDERATION_ID_DOES_NOT_MATCH; - // Ignore errors on this response. - write_to_socket_errexit(socket_id, 2, response, - "RTI failed to write FEDERATION_ID_DOES_NOT_MATCH to erroneous incoming connection."); - // Close the socket. - close(socket_id); - } - return NULL; -} - -void initialize_federate(federate_t* fed, uint16_t id) { - initialize_enclave(&(fed->enclave), id); - fed->requested_stop = false; - fed->socket = -1; // No socket. - fed->clock_synchronization_enabled = true; - fed->in_transit_message_tags = initialize_in_transit_message_q(); - strncpy(fed->server_hostname ,"localhost", INET_ADDRSTRLEN); - fed->server_ip_addr.s_addr = 0; - fed->server_port = -1; -} - -int32_t start_rti_server(uint16_t port) { - int32_t specified_port = port; - if (port == 0) { - // Use the default starting port. - port = STARTING_PORT; - } - _lf_initialize_clock(); - // Create the TCP socket server - _f_rti->socket_descriptor_TCP = create_server(specified_port, port, TCP); - lf_print("RTI: Listening for federates."); - // Create the UDP socket server - // Try to get the _f_rti->final_port_TCP + 1 port - if (_f_rti->clock_sync_global_status >= clock_sync_on) { - _f_rti->socket_descriptor_UDP = create_server(specified_port, _f_rti->final_port_TCP + 1, UDP); - } - return _f_rti->socket_descriptor_TCP; -} - -void wait_for_federates(int socket_descriptor) { - // Wait for connections from federates and create a thread for each. - connect_to_federates(socket_descriptor); - - // All federates have connected. - lf_print("RTI: All expected federates have connected. Starting execution."); - - // The socket server will not continue to accept connections after all the federates - // have joined. - // In case some other federation's federates are trying to join the wrong - // federation, need to respond. Start a separate thread to do that. - lf_thread_t responder_thread; - lf_thread_create(&responder_thread, respond_to_erroneous_connections, NULL); - - // Wait for federate threads to exit. - void* thread_exit_status; - for (int i = 0; i < _f_rti->number_of_enclaves; i++) { - federate_t* fed = _f_rti->enclaves[i]; - lf_print("RTI: Waiting for thread handling federate %d.", fed->enclave.id); - lf_thread_join(fed->thread_id, &thread_exit_status); - free_in_transit_message_q(fed->in_transit_message_tags); - lf_print("RTI: Federate %d thread exited.", fed->enclave.id); - } - - _f_rti->all_federates_exited = true; - - // Shutdown and close the socket so that the accept() call in - // respond_to_erroneous_connections returns. That thread should then - // check _f_rti->all_federates_exited and it should exit. - if (shutdown(socket_descriptor, SHUT_RDWR)) { - LF_PRINT_LOG("On shut down TCP socket, received reply: %s", strerror(errno)); - } - // NOTE: In all common TCP/IP stacks, there is a time period, - // typically between 30 and 120 seconds, called the TIME_WAIT period, - // before the port is released after this close. This is because - // the OS is preventing another program from accidentally receiving - // duplicated packets intended for this program. - close(socket_descriptor); - - if (_f_rti->socket_descriptor_UDP > 0) { - if (shutdown(_f_rti->socket_descriptor_UDP, SHUT_RDWR)) { - LF_PRINT_LOG("On shut down UDP socket, received reply: %s", strerror(errno)); - } - close(_f_rti->socket_descriptor_UDP); - } -} - -void usage(int argc, const char* argv[]) { - lf_print("\nCommand-line arguments: "); - lf_print(" -i, --id "); - lf_print(" The ID of the federation that this RTI will control."); - lf_print(" -n, --number_of_federates "); - lf_print(" The number of federates in the federation that this RTI will control."); - lf_print(" -p, --port "); - lf_print(" The port number to use for the RTI. Must be larger than 0 and smaller than %d. Default is %d.", UINT16_MAX, STARTING_PORT); - lf_print(" -c, --clock_sync [off|init|on] [period ] [exchanges-per-interval ]"); - lf_print(" The status of clock synchronization for this federate."); - lf_print(" - off: Clock synchronization is off."); - lf_print(" - init (default): Clock synchronization is done only during startup."); - lf_print(" - on: Clock synchronization is done both at startup and during the execution."); - lf_print(" Relevant parameters that can be set: "); - lf_print(" - period (in nanoseconds): Controls how often a clock synchronization attempt is made"); - lf_print(" (period in nanoseconds, default is 5 msec). Only applies to 'on'."); - lf_print(" - exchanges-per-interval : Controls the number of messages that are exchanged for each"); - lf_print(" clock sync attempt (default is 10). Applies to 'init' and 'on'."); - lf_print(" -a, --auth Turn on HMAC authentication options."); - lf_print(" -t, --tracing Turn on tracing."); - - lf_print("Command given:"); - for (int i = 0; i < argc; i++) { - lf_print("%s ", argv[i]); - } -} - -int process_clock_sync_args(int argc, const char* argv[]) { - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "off") == 0) { - _f_rti->clock_sync_global_status = clock_sync_off; - lf_print("RTI: Clock sync: off"); - } else if (strcmp(argv[i], "init") == 0 || strcmp(argv[i], "initial") == 0) { - _f_rti->clock_sync_global_status = clock_sync_init; - lf_print("RTI: Clock sync: init"); - } else if (strcmp(argv[i], "on") == 0) { - _f_rti->clock_sync_global_status = clock_sync_on; - lf_print("RTI: Clock sync: on"); - } else if (strcmp(argv[i], "period") == 0) { - if (_f_rti->clock_sync_global_status != clock_sync_on) { - lf_print_error("clock sync period can only be set if --clock-sync is set to on."); - usage(argc, argv); - i++; - continue; // Try to parse the rest of the arguments as clock sync args. - } else if (argc < i + 2) { - lf_print_error("clock sync period needs a time (in nanoseconds) argument."); - usage(argc, argv); - continue; - } - i++; - long long period_ns = strtoll(argv[i], NULL, 10); - if (period_ns == 0LL || period_ns == LLONG_MAX || period_ns == LLONG_MIN) { - lf_print_error("clock sync period value is invalid."); - continue; // Try to parse the rest of the arguments as clock sync args. - } - _f_rti->clock_sync_period_ns = (int64_t)period_ns; - lf_print("RTI: Clock sync period: %lld", (long long int)_f_rti->clock_sync_period_ns); - } else if (strcmp(argv[i], "exchanges-per-interval") == 0) { - if (_f_rti->clock_sync_global_status != clock_sync_on && _f_rti->clock_sync_global_status != clock_sync_init) { - lf_print_error("clock sync exchanges-per-interval can only be set if" - "--clock-sync is set to on or init."); - usage(argc, argv); - continue; // Try to parse the rest of the arguments as clock sync args. - } else if (argc < i + 2) { - lf_print_error("clock sync exchanges-per-interval needs an integer argument."); - usage(argc, argv); - continue; // Try to parse the rest of the arguments as clock sync args. - } - i++; - long exchanges = (long)strtol(argv[i], NULL, 10); - if (exchanges == 0L || exchanges == LONG_MAX || exchanges == LONG_MIN) { - lf_print_error("clock sync exchanges-per-interval value is invalid."); - continue; // Try to parse the rest of the arguments as clock sync args. - } - _f_rti->clock_sync_exchanges_per_interval = (int32_t)exchanges; // FIXME: Loses numbers on 64-bit machines - lf_print("RTI: Clock sync exchanges per interval: %d", _f_rti->clock_sync_exchanges_per_interval); - } else if (strcmp(argv[i], " ") == 0) { - // Tolerate spaces - continue; - } else { - // Either done with the clock sync args or there is an invalid - // character. In either case, let the parent function deal with - // the rest of the characters; - return i; - } - } - return argc; -} - -int process_args(int argc, const char* argv[]) { - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "--id") == 0) { - if (argc < i + 2) { - lf_print_error("--id needs a string argument."); - usage(argc, argv); - return 0; - } - i++; - lf_print("RTI: Federation ID: %s", argv[i]); - _f_rti->federation_id = argv[i]; - } else if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--number_of_federates") == 0) { - if (argc < i + 2) { - lf_print_error("--number_of_federates needs an integer argument."); - usage(argc, argv); - return 0; - } - i++; - long num_federates = strtol(argv[i], NULL, 10); - if (num_federates == 0L || num_federates == LONG_MAX || num_federates == LONG_MIN) { - lf_print_error("--number_of_federates needs a valid positive integer argument."); - usage(argc, argv); - return 0; - } - _f_rti->number_of_enclaves = (int32_t)num_federates; // FIXME: Loses numbers on 64-bit machines - lf_print("RTI: Number of federates: %d", _f_rti->number_of_enclaves); - } else if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--port") == 0) { - if (argc < i + 2) { - lf_print_error( - "--port needs a short unsigned integer argument ( > 0 and < %d).", - UINT16_MAX - ); - usage(argc, argv); - return 0; - } - i++; - uint32_t RTI_port = (uint32_t)strtoul(argv[i], NULL, 10); - if (RTI_port <= 0 || RTI_port >= UINT16_MAX) { - lf_print_error( - "--port needs a short unsigned integer argument ( > 0 and < %d).", - UINT16_MAX - ); - usage(argc, argv); - return 0; - } - _f_rti->user_specified_port = (uint16_t)RTI_port; - } else if (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--clock_sync") == 0) { - if (argc < i + 2) { - lf_print_error("--clock-sync needs off|init|on."); - usage(argc, argv); - return 0; - } - i++; - i += process_clock_sync_args((argc-i), &argv[i]); - } else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--auth") == 0) { - #ifndef __RTI_AUTH__ - lf_print_error("--auth requires the RTI to be built with the -DAUTH=ON option."); - usage(argc, argv); - return 0; - #endif - _f_rti->authentication_enabled = true; - } else if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--tracing") == 0) { - _f_rti->tracing_enabled = true; - } else if (strcmp(argv[i], " ") == 0) { - // Tolerate spaces - continue; - } else { - lf_print_error("Unrecognized command-line argument: %s", argv[i]); - usage(argc, argv); - return 0; - } - } - if (_f_rti->number_of_enclaves == 0) { - lf_print_error("--number_of_federates needs a valid positive integer argument."); - usage(argc, argv); - return 0; - } - return 1; -} - -void initialize_RTI(){ - _f_rti = (federation_rti_t *)malloc(sizeof(federation_rti_t)); - // enclave_rti related initializations - _f_rti->max_stop_tag = NEVER_TAG, - _f_rti->number_of_enclaves = 0, - _f_rti->num_enclaves_handling_stop = 0, - // federation_rti related initializations - _f_rti->max_start_time = 0LL, - _f_rti->num_feds_proposed_start = 0, - _f_rti->all_federates_exited = false, - _f_rti->federation_id = "Unidentified Federation", - _f_rti->user_specified_port = 0, - _f_rti->final_port_TCP = 0, - _f_rti->socket_descriptor_TCP = -1, - _f_rti->final_port_UDP = UINT16_MAX, - _f_rti->socket_descriptor_UDP = -1, - _f_rti->clock_sync_global_status = clock_sync_init, - _f_rti->clock_sync_period_ns = MSEC(10), - _f_rti->clock_sync_exchanges_per_interval = 10, - _f_rti->authentication_enabled = false, - _f_rti->tracing_enabled = false; - _f_rti->stop_in_progress = false; -} \ No newline at end of file diff --git a/core/federated/RTI/rti_local.c b/core/federated/RTI/rti_local.c new file mode 100644 index 0000000000..c756054266 --- /dev/null +++ b/core/federated/RTI/rti_local.c @@ -0,0 +1,200 @@ +/** + * @file + * @author Erling Jellum (erling.r.jellum@ntnu.no) + * @author Edward A. Lee (eal@berkeley.edu) + * @author Chadlia Jerad (chadlia.jerad@ensi-uma.tn) + * @author Soroush Bateni (soroush@utdallas.edu) + * @copyright (c) 2020-2023, The University of California at Berkeley + * License in [BSD 2-clause](https://github.com/lf-lang/reactor-c/blob/main/LICENSE.md) + * + * This file implements the enclave coordination logic. + * Here we are dealing with multiple mutexes. To avoid deadlocking we follow the + * following rules: + * 1) Mutexes are always locked in the following order: + * Enclave mutexes followed by RTI mutex. + * This means that we never lock an enclave mutex while holding the RTI mutex. + * 2) Mutexes are always unlocked in the following order: + * RTI mutex followed by Enclave mutex. + * 3) If the coordination logic might block. We unlock the enclave mutex while + * blocking, using a condition variable to unblock. + * 4) When blocking on the coordination logic, never hold the RTI mutex. + */ + +#ifdef LF_ENCLAVES +#include "rti_local.h" +#include "rti_common.h" +#include "util.h" +#include "platform.h" +#include "environment.h" +#include "trace.h" +#include "reactor.h" + +// Static global pointer to the RTI object. +static rti_local_t * rti_local; + +// The RTI mutex. A pointer to this mutex will be put on the rti_local struct +lf_mutex_t rti_mutex; + +void initialize_local_rti(environment_t *envs, int num_envs) { + rti_local = (rti_local_t*)calloc(1, sizeof(rti_local_t)); + LF_ASSERT(rti_local, "Out of memory"); + + initialize_rti_common(&rti_local->base); + LF_ASSERT(lf_mutex_init(&rti_mutex) == 0, "Could not create mutex"); + rti_local->base.mutex = &rti_mutex; + rti_local->base.number_of_scheduling_nodes = num_envs; + rti_local->base.tracing_enabled = (envs[0].trace != NULL); + + // Allocate memory for the enclave_info objects + rti_local->base.scheduling_nodes = (scheduling_node_t**)calloc(num_envs, sizeof(scheduling_node_t*)); + for (int i = 0; i < num_envs; i++) { + enclave_info_t *enclave_info = (enclave_info_t *) calloc(1, sizeof(enclave_info_t)); + initialize_enclave_info(enclave_info, i, &envs[i]); + rti_local->base.scheduling_nodes[i] = (scheduling_node_t *) enclave_info; + + // Encode the connection topology into the enclave_info object. + enclave_info->base.num_downstream = _lf_get_downstream_of(i, &enclave_info->base.downstream); + enclave_info->base.num_upstream = _lf_get_upstream_of(i, &enclave_info->base.upstream); + _lf_get_upstream_delay_of(i, &enclave_info->base.upstream_delay); + + enclave_info->base.state = GRANTED; + } +} + +void free_local_rti() { + free_scheduling_nodes(rti_local->base.scheduling_nodes, rti_local->base.number_of_scheduling_nodes); + free(rti_local); +} + +void initialize_enclave_info(enclave_info_t* enclave, int idx, environment_t * env) { + initialize_scheduling_node(&enclave->base, idx); + + env->enclave_info = enclave; + enclave->env = env; + + // Initialize the next event condition variable. + LF_ASSERT(lf_cond_init(&enclave->next_event_condition, &rti_mutex) == 0, "Could not create cond var"); +} + +tag_t rti_next_event_tag_locked(enclave_info_t* e, tag_t next_event_tag) { + LF_PRINT_LOG("RTI: enclave %u sends NET of " PRINTF_TAG " ", + e->base.id, next_event_tag.time - lf_time_start(), next_event_tag.microstep); + + // Return early if there are only a single enclave in the program. + if (rti_local->base.number_of_scheduling_nodes == 1) { + return next_event_tag; + } + // This is called from a critical section within the source enclave. Leave + // this critical section and acquire the RTI mutex. + LF_ASSERT(lf_mutex_unlock(&e->env->mutex) == 0, "Could not unlock mutex"); + LF_ASSERT(lf_mutex_lock(rti_local->base.mutex) == 0, "Could not lock mutex"); + tracepoint_federate_to_rti(e->env->trace, send_NET, e->base.id, &next_event_tag); + // First, update the enclave data structure to record this next_event_tag, + // and notify any downstream scheduling_nodes, and unblock them if appropriate. + tag_advance_grant_t result; + + tag_t previous_tag = e->base.last_granted; + tag_t previous_ptag = e->base.last_provisionally_granted; + + update_scheduling_node_next_event_tag_locked(&e->base, next_event_tag); + + // Return early if we already have been granted past the NET. + if (lf_tag_compare(e->base.last_granted, next_event_tag) >= 0) { + LF_PRINT_LOG("RTI: enclave %u has already been granted a TAG to" PRINTF_TAG ". Returning with a TAG to" PRINTF_TAG " ", + e->base.id, e->base.last_granted.time - lf_time_start(), e->base.last_granted.microstep, + next_event_tag.time - lf_time_start(), next_event_tag.microstep); + tracepoint_federate_from_rti(e->env->trace, receive_TAG, e->base.id, &next_event_tag); + // Release RTI mutex and re-enter the critical section of the source enclave before returning. + LF_ASSERT(lf_mutex_unlock(rti_local->base.mutex) == 0, "Could not unlock mutex"); + LF_ASSERT(lf_mutex_lock(&e->env->mutex) == 0, "Could not lock mutex"); + return next_event_tag; + } + + // If this enclave has no upstream, then we give a TAG till forever straight away. + if (e->base.num_upstream == 0) { + LF_PRINT_LOG("RTI: enclave %u has no upstream. Giving it a to the NET", e->base.id); + e->base.last_granted = next_event_tag; + } + + while(true) { + // Determine whether the above call notified a TAG. + // If so, return that value. Note that we dont care about PTAGs as we + // have disallowed zero-delay enclave loops. + if (lf_tag_compare(previous_tag, e->base.last_granted) < 0) { + result.tag = e->base.last_granted; + result.is_provisional = false; + break; + } + // If not, block. + LF_PRINT_LOG("RTI: enclave %u sleeps waiting for TAG to" PRINTF_TAG " ", + e->base.id, e->base.next_event.time - lf_time_start(), e->base.next_event.microstep); + LF_ASSERT(lf_cond_wait(&e->next_event_condition) == 0, "Could not wait for cond var"); + } + + // At this point we have gotten a new TAG. + LF_PRINT_LOG("RTI: enclave %u returns with TAG to" PRINTF_TAG " ", + e->base.id, e->base.next_event.time - lf_time_start(), e->base.next_event.microstep); + tracepoint_federate_from_rti(e->env->trace, receive_TAG, e->base.id, &result.tag); + // Release RTI mutex and re-enter the critical section of the source enclave. + LF_ASSERT(lf_mutex_unlock(rti_local->base.mutex) == 0, "Could not unlock mutex"); + LF_ASSERT(lf_mutex_lock(&e->env->mutex) == 0, "Could not lock mutex"); + return result.tag; +} + +void rti_logical_tag_complete_locked(enclave_info_t* enclave, tag_t completed) { + if (rti_local->base.number_of_scheduling_nodes == 1) { + return; + } + // Release the enclave mutex while doing the local RTI work. + LF_ASSERT(lf_mutex_unlock(&enclave->env->mutex) == 0, "Could not unlock mutex"); + tracepoint_federate_to_rti(enclave->env->trace, send_LTC, enclave->base.id, &completed); + _logical_tag_complete(&enclave->base, completed); + // Acquire the enclave mutex again before returning. + LF_ASSERT(lf_mutex_lock(&enclave->env->mutex) == 0, "Could not lock mutex"); +} + +void rti_update_other_net_locked(enclave_info_t* src, enclave_info_t * target, tag_t net) { + // Here we do NOT leave the critical section of the target enclave before we + // acquire the RTI mutex. This means that we cannot block within this function. + LF_ASSERT(lf_mutex_lock(rti_local->base.mutex) == 0, "Could not lock mutex"); + tracepoint_federate_to_federate(src->env->trace, send_TAGGED_MSG, src->base.id, target->base.id, &net); + + // If our proposed NET is less than the current NET, update it. + if (lf_tag_compare(net, target->base.next_event) < 0) { + target->base.next_event = net; + } + LF_ASSERT(lf_mutex_unlock(rti_local->base.mutex) == 0, "Could not unlock mutex"); +} + +/////////////////////////////////////////////////////////////////////////////// +// The local RTIs implementation of the notify functions +/////////////////////////////////////////////////////////////////////////////// + +void notify_tag_advance_grant(scheduling_node_t* e, tag_t tag) { + if (e->state == NOT_CONNECTED + || lf_tag_compare(tag, e->last_granted) <= 0 + || lf_tag_compare(tag, e->last_provisionally_granted) < 0 + ) { + return; + } + if (rti_local->base.tracing_enabled) { + tracepoint_rti_to_federate(e->env->trace, send_TAG, e->id, &tag); + } + e->last_granted = tag; + // TODO: Here we can consider adding a flag to the RTI struct and only signal the cond var if we have + // sleeping enclaves. + LF_ASSERT(lf_cond_signal(&((enclave_info_t *)e)->next_event_condition) == 0, "Could not signal cond var"); +} + +// We currently ignore the PTAGs, because they are only relevant with zero +// delay enclave loops. +void notify_provisional_tag_advance_grant(scheduling_node_t* e, tag_t tag) { + LF_PRINT_LOG("RTI: enclave %u callback with PTAG " PRINTF_TAG " ", + e->id, tag.time - lf_time_start(), tag.microstep); +} + +void free_scheduling_nodes(scheduling_node_t** scheduling_nodes, uint16_t number_of_scheduling_nodes) { + // Nothing to do here. +} + +#endif //LF_ENCLAVES diff --git a/core/federated/RTI/rti_local.h b/core/federated/RTI/rti_local.h new file mode 100644 index 0000000000..8960ad1b1f --- /dev/null +++ b/core/federated/RTI/rti_local.h @@ -0,0 +1,92 @@ +#ifndef RTI_LOCAL_H +#define RTI_LOCAL_H + +#ifdef LF_ENCLAVES + + +#include "lf_types.h" +#include "rti_common.h" + +/** + * @brief Structure holding information about each enclave in the program + * The first field is the generic scheduling_node_info struct + * + */ +typedef struct enclave_info_t { + scheduling_node_t base; + environment_t * env; // A pointer to the environment of the enclave + lf_cond_t next_event_condition; // Condition variable used by scheduling_nodes to notify an enclave + // that it's call to next_event_tag() should unblock. +} enclave_info_t; + +/** + * @brief Structure holding information about the local RTI + * + */ +typedef struct { + rti_common_t base; +} rti_local_t; + +/** + * @brief Dynamically create and initialize the local RTI. + */ +void initialize_local_rti(environment_t* envs, int num_envs); + +/** + * @brief Free memory associated with the local the RTI and the local RTI iself. + */ +void free_local_rti(); + +/** + * @brief Initialize the enclave object. + * + * @param enclave + */ +void initialize_enclave_info(enclave_info_t* enclave, int idx, environment_t *env); + +/** + * @brief Notify the local RTI of a next event tag (NET). + * This function call may block. A call to this function serves two purposes. + * 1) It is a promise that, unless receiving events from other enclaves, this + * enclave will not produce any event until the next_event_tag (NET) argument. + * 2) It is a request for permission to advance the logical tag of the enclave + * until the NET. + * + * This function call will block until the enclave has been granted a TAG, + * which might not be the tag requested. + * + * This assumes the caller is holding the environment mutex of the source enclave. + * + * @param enclave The enclave requesting to advance to the NET. + * @param next_event_tag The tag of the next event in the enclave + * @return tag_t A tag which the enclave can safely advance its time to. It + * might be smaller than the requested tag. + */ +tag_t rti_next_event_tag_locked(enclave_info_t* enclave, tag_t next_event_tag); + +/** + * @brief This function informs the local RTI that `enclave` has completed tag + * `completed`. This will update the data structures and can release other + * enclaves waiting on a TAG. + * + * This assumes the caller is holding the environment mutex of the source enclave. + * + * @param enclave The enclave + * @param completed The tag just completed by the enclave. + */ +void rti_logical_tag_complete_locked(enclave_info_t* enclave, tag_t completed); + +/** + * @brief This function is called after scheduling an event onto the event queue + * of another enclave. The source enclave must call this function to potentially update + * the NET of the target enclave. + * + * This assumes the caller is holding the environment mutex of the target enclave. + * + * @param target The enclave of which we want to update the NET of + * @param net The proposed next event tag + */ +void rti_update_other_net_locked(enclave_info_t* src, enclave_info_t* target, tag_t net); + +#endif // LF_ENCLAVES +#endif // RTI_LOCAL_H diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c new file mode 100644 index 0000000000..373a109df0 --- /dev/null +++ b/core/federated/RTI/rti_remote.c @@ -0,0 +1,1837 @@ +#if defined STANDALONE_RTI +/** + * @file + * @author Edward A. Lee + * @author Soroush Bateni + * @author Erling Jellum + * @author Chadlia Jerad + * @copyright (c) 2020-2023, The University of California at Berkeley + * License in [BSD 2-clause](https://github.com/lf-lang/reactor-c/blob/main/LICENSE.md) + * @brief Runtime infrastructure (RTI) for distributed Lingua Franca programs. + * + * This implementation creates one thread per federate so as to be able + * to take advantage of multiple cores. It may be more efficient, however, + * to use select() instead to read from the multiple socket connections + * to each federate. + * + * This implementation sends messages in little endian order + * because Intel, RISC V, and Arm processors are little endian. + * This is not what is normally considered "network order", + * but we control both ends, and hence, for commonly used + * processors, this will be more efficient since it won't have + * to swap bytes. + * + * This implementation of the RTI should be considered a reference + * implementation. In the future it might be re-implemented in Java or Kotlin. + * Or we could bootstrap and implement it using Lingua Franca. + */ + +#include "rti_remote.h" +#include "net_util.h" +#include + +// Global variables defined in tag.c: +extern instant_t start_time; + +/** + * Local reference to the rti_remote object + */ +static rti_remote_t *rti_remote; + +bool _lf_federate_reports_error = false; + +// A convenient macro for getting the `federate_info_t *` at index `_idx` +// and casting it. +#define GET_FED_INFO(_idx) (federate_info_t *)rti_remote->base.scheduling_nodes[_idx] + +lf_mutex_t rti_mutex; +lf_cond_t received_start_times; +lf_cond_t sent_start_time; + +extern int lf_critical_section_enter(environment_t *env) { + return lf_mutex_lock(&rti_mutex); +} + +extern int lf_critical_section_exit(environment_t *env) { + return lf_mutex_unlock(&rti_mutex); +} + +/** + * Create a server and enable listening for socket connections. + * If the specified port if it is non-zero, it will attempt to acquire that port. + * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with + * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is + * zero, then it will attempt to acquire DEFAULT_PORT first. If this fails, then it + * will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times, incrementing the port + * number between attempts, with no delay between attempts. Once it has incremented + * the port number MAX_NUM_PORT_ADDRESSES times, it will cycle around and begin again + * with DEFAULT_PORT. + * + * @param port The port number to use or 0 to start trying at DEFAULT_PORT. + * @param socket_type The type of the socket for the server (TCP or UDP). + * @return The socket descriptor on which to accept connections. + */ +static int create_rti_server(uint16_t port, socket_type_t socket_type) { + // Timeout time for the communications of the server + struct timeval timeout_time = { + .tv_sec = TCP_TIMEOUT_TIME / BILLION, + .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000 + }; + // Create an IPv4 socket for TCP (not UDP) communication over IP (0). + int socket_descriptor = -1; + if (socket_type == TCP) { + socket_descriptor = create_real_time_tcp_socket_errexit(); + } else if (socket_type == UDP) { + socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + // Set the appropriate timeout time + timeout_time = (struct timeval){ + .tv_sec = UDP_TIMEOUT_TIME / BILLION, + .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000 + }; + } + if (socket_descriptor < 0) { + lf_print_error_system_failure("Failed to create RTI socket."); + } + + // Set the option for this socket to reuse the same address + int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option + if (setsockopt( + socket_descriptor, + SOL_SOCKET, + SO_REUSEADDR, + &true_variable, + sizeof(int32_t)) < 0) { + lf_print_error("RTI failed to set SO_REUSEADDR option on the socket: %s.", strerror(errno)); + } + // Set the timeout on the socket so that read and write operations don't block for too long + if (setsockopt( + socket_descriptor, + SOL_SOCKET, + SO_RCVTIMEO, + (const char *)&timeout_time, + sizeof(timeout_time)) < 0) { + lf_print_error("RTI failed to set SO_RCVTIMEO option on the socket: %s.", strerror(errno)); + } + if (setsockopt( + socket_descriptor, + SOL_SOCKET, + SO_SNDTIMEO, + (const char *)&timeout_time, + sizeof(timeout_time)) < 0) { + lf_print_error("RTI failed to set SO_SNDTIMEO option on the socket: %s.", strerror(errno)); + } + + /* + * The following used to permit reuse of a port that an RTI has previously + * used that has not been released. We no longer do this, and instead retry + * some number of times after waiting. + + // SO_REUSEPORT (since Linux 3.9) + // Permits multiple AF_INET or AF_INET6 sockets to be bound to an + // identical socket address. This option must be set on each + // socket (including the first socket) prior to calling bind(2) + // on the socket. To prevent port hijacking, all of the + // processes binding to the same address must have the same + // effective UID. This option can be employed with both TCP and + // UDP sockets. + + int reuse = 1; + #ifdef SO_REUSEPORT + if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEPORT, + (const char*)&reuse, sizeof(reuse)) < 0) { + perror("setsockopt(SO_REUSEPORT) failed"); + } + #endif + */ + + // Server file descriptor. + struct sockaddr_in server_fd; + // Zero out the server address structure. + bzero((char *)&server_fd, sizeof(server_fd)); + + uint16_t specified_port = port; + if (specified_port == 0) port = DEFAULT_PORT; + + server_fd.sin_family = AF_INET; // IPv4 + server_fd.sin_addr.s_addr = INADDR_ANY; // All interfaces, 0.0.0.0. + // Convert the port number from host byte order to network byte order. + server_fd.sin_port = htons(port); + + int result = bind( + socket_descriptor, + (struct sockaddr *)&server_fd, + sizeof(server_fd)); + + // Try repeatedly to bind to a port. If no specific port is specified, then + // increment the port number each time. + + int count = 1; + while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) { + if (specified_port == 0) { + lf_print_warning("RTI failed to get port %d.", port); + port++; + if (port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) port = DEFAULT_PORT; + lf_print_warning("RTI will try again with port %d.", port); + server_fd.sin_port = htons(port); + // Do not sleep. + } else { + lf_print("RTI failed to get port %d. Will try again.", port); + lf_sleep(PORT_BIND_RETRY_INTERVAL); + } + result = bind( + socket_descriptor, + (struct sockaddr *)&server_fd, + sizeof(server_fd)); + } + if (result != 0) { + lf_print_error_and_exit("Failed to bind the RTI socket. Port %d is not available. ", port); + } + char *type = "TCP"; + if (socket_type == UDP) { + type = "UDP"; + } + lf_print("RTI using %s port %d for federation %s.", type, port, rti_remote->federation_id); + + if (socket_type == TCP) { + rti_remote->final_port_TCP = port; + // Enable listening for socket connections. + // The second argument is the maximum number of queued socket requests, + // which according to the Mac man page is limited to 128. + listen(socket_descriptor, 128); + } else if (socket_type == UDP) { + rti_remote->final_port_UDP = port; + // No need to listen on the UDP socket + } + + return socket_descriptor; +} + +void notify_tag_advance_grant(scheduling_node_t *e, tag_t tag) { + if (e->state == NOT_CONNECTED + || lf_tag_compare(tag, e->last_granted) <= 0 + || lf_tag_compare(tag, e->last_provisionally_granted) < 0) { + return; + } + // Need to make sure that the destination federate's thread has already + // sent the starting MSG_TYPE_TIMESTAMP message. + while (e->state == PENDING) { + // Need to wait here. + lf_cond_wait(&sent_start_time); + } + size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); + unsigned char buffer[message_length]; + buffer[0] = MSG_TYPE_TAG_ADVANCE_GRANT; + encode_int64(tag.time, &(buffer[1])); + encode_int32((int32_t)tag.microstep, &(buffer[1 + sizeof(int64_t)])); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_TAG, e->id, &tag); + } + // This function is called in notify_advance_grant_if_safe(), which is a long + // function. During this call, the socket might close, causing the following write_to_socket + // to fail. Consider a failure here a soft failure and update the federate's status. + if (write_to_socket(((federate_info_t *)e)->socket, message_length, buffer)) { + lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); + e->state = NOT_CONNECTED; + } else { + e->last_granted = tag; + LF_PRINT_LOG("RTI sent to federate %d the tag advance grant (TAG) " PRINTF_TAG ".", + e->id, tag.time - start_time, tag.microstep); + } +} + +void notify_provisional_tag_advance_grant(scheduling_node_t *e, tag_t tag) { + if (e->state == NOT_CONNECTED + || lf_tag_compare(tag, e->last_granted) <= 0 + || lf_tag_compare(tag, e->last_provisionally_granted) <= 0) { + return; + } + // Need to make sure that the destination federate's thread has already + // sent the starting MSG_TYPE_TIMESTAMP message. + while (e->state == PENDING) { + // Need to wait here. + lf_cond_wait(&sent_start_time); + } + size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); + unsigned char buffer[message_length]; + buffer[0] = MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT; + encode_int64(tag.time, &(buffer[1])); + encode_int32((int32_t)tag.microstep, &(buffer[1 + sizeof(int64_t)])); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_PTAG, e->id, &tag); + } + // This function is called in notify_advance_grant_if_safe(), which is a long + // function. During this call, the socket might close, causing the following write_to_socket + // to fail. Consider a failure here a soft failure and update the federate's status. + if (write_to_socket(((federate_info_t *)e)->socket, message_length, buffer)) { + lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); + e->state = NOT_CONNECTED; + } else { + e->last_provisionally_granted = tag; + LF_PRINT_LOG("RTI sent to federate %d the Provisional Tag Advance Grant (PTAG) " PRINTF_TAG ".", + e->id, tag.time - start_time, tag.microstep); + + // Send PTAG to all upstream federates, if they have not had + // a later or equal PTAG or TAG sent previously and if their transitive + // NET is greater than or equal to the tag. + // This is needed to stimulate absent messages from upstream and break deadlocks. + // The scenario this deals with is illustrated in `test/C/src/federated/FeedbackDelay2.lf` + // and `test/C/src/federated/FeedbackDelay4.lf`. + // Note that this is transitive. + // NOTE: This is not needed for enclaves because zero-delay loops are prohibited. + // It's only needed for federates, which is why this is implemented here. + for (int j = 0; j < e->num_upstream; j++) { + scheduling_node_t *upstream = rti_remote->base.scheduling_nodes[e->upstream[j]]; + + // Ignore this federate if it has resigned. + if (upstream->state == NOT_CONNECTED) + continue; + + tag_t earliest = earliest_future_incoming_message_tag(upstream); + tag_t strict_earliest = eimt_strict(upstream); // Non-ZDC version. + + // If these tags are equal, then a TAG or PTAG should have already been granted, + // in which case, another will not be sent. But it may not have been already granted. + if (lf_tag_compare(earliest, tag) > 0) { + notify_tag_advance_grant(upstream, tag); + } else if (lf_tag_compare(earliest, tag) == 0 && lf_tag_compare(strict_earliest, tag) > 0) { + notify_provisional_tag_advance_grant(upstream, tag); + } + } + } +} + +void update_federate_next_event_tag_locked(uint16_t federate_id, tag_t next_event_tag) { + federate_info_t *fed = GET_FED_INFO(federate_id); + tag_t min_in_transit_tag = pqueue_tag_peek_tag(fed->in_transit_message_tags); + if (lf_tag_compare(min_in_transit_tag, next_event_tag) < 0) { + next_event_tag = min_in_transit_tag; + } + update_scheduling_node_next_event_tag_locked(&(fed->enclave), next_event_tag); +} + +void handle_port_absent_message(federate_info_t *sending_federate, unsigned char *buffer) { + size_t message_size = sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int64_t) + sizeof(uint32_t); + + read_from_socket_fail_on_error( + &sending_federate->socket, message_size, &(buffer[1]), NULL, + " RTI failed to read port absent message from federate %u.", + sending_federate->enclave.id); + + uint16_t reactor_port_id = extract_uint16(&(buffer[1])); + uint16_t federate_id = extract_uint16(&(buffer[1 + sizeof(uint16_t)])); + tag_t tag = extract_tag(&(buffer[1 + 2 * sizeof(uint16_t)])); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_PORT_ABS, sending_federate->enclave.id, &tag); + } + + // Need to acquire the mutex lock to ensure that the thread handling + // messages coming from the socket connected to the destination does not + // issue a TAG before this message has been forwarded. + LF_MUTEX_LOCK(rti_mutex); + + // If the destination federate is no longer connected, issue a warning + // and return. + federate_info_t *fed = GET_FED_INFO(federate_id); + if (fed->enclave.state == NOT_CONNECTED) { + LF_MUTEX_UNLOCK(rti_mutex); + lf_print_warning("RTI: Destination federate %d is no longer connected. Dropping message.", + federate_id); + LF_PRINT_LOG("Fed status: next_event (" PRINTF_TIME ", %d), " + "completed (" PRINTF_TIME ", %d), " + "last_granted (" PRINTF_TIME ", %d), " + "last_provisionally_granted (" PRINTF_TIME ", %d).", + fed->enclave.next_event.time - start_time, + fed->enclave.next_event.microstep, + fed->enclave.completed.time - start_time, + fed->enclave.completed.microstep, + fed->enclave.last_granted.time - start_time, + fed->enclave.last_granted.microstep, + fed->enclave.last_provisionally_granted.time - start_time, + fed->enclave.last_provisionally_granted.microstep); + return; + } + + LF_PRINT_LOG("RTI forwarding port absent message for port %u to federate %u.", + reactor_port_id, + federate_id); + + // Need to make sure that the destination federate's thread has already + // sent the starting MSG_TYPE_TIMESTAMP message. + while (fed->enclave.state == PENDING) { + // Need to wait here. + lf_cond_wait(&sent_start_time); + } + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_PORT_ABS, federate_id, &tag); + } + + // Forward the message. + write_to_socket_fail_on_error(&fed->socket, message_size + 1, buffer, &rti_mutex, + "RTI failed to forward message to federate %d.", federate_id); + + LF_MUTEX_UNLOCK(rti_mutex); +} + +void handle_timed_message(federate_info_t *sending_federate, unsigned char *buffer) { + size_t header_size = 1 + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t) + + sizeof(int64_t) + sizeof(uint32_t); + // Read the header, minus the first byte which has already been read. + read_from_socket_fail_on_error( + &sending_federate->socket, header_size - 1, &(buffer[1]), NULL, + "RTI failed to read the timed message header from remote federate."); + // Extract the header information. of the sender + uint16_t reactor_port_id; + uint16_t federate_id; + size_t length; + tag_t intended_tag; + // Extract information from the header. + extract_timed_header(&(buffer[1]), &reactor_port_id, &federate_id, &length, &intended_tag); + + size_t total_bytes_to_read = length + header_size; + size_t bytes_to_read = length; + + if (FED_COM_BUFFER_SIZE < header_size + 1) { + lf_print_error_and_exit("Buffer size (%d) is not large enough to " + "read the header plus one byte.", + FED_COM_BUFFER_SIZE); + } + + // Cut up the payload in chunks. + if (bytes_to_read > FED_COM_BUFFER_SIZE - header_size) { + bytes_to_read = FED_COM_BUFFER_SIZE - header_size; + } + + LF_PRINT_LOG("RTI received message from federate %d for federate %u port %u with intended tag " PRINTF_TAG ". Forwarding.", + sending_federate->enclave.id, federate_id, reactor_port_id, + intended_tag.time - lf_time_start(), intended_tag.microstep); + + read_from_socket_fail_on_error( + &sending_federate->socket, bytes_to_read, &(buffer[header_size]), NULL, + "RTI failed to read timed message from federate %d.", federate_id); + size_t bytes_read = bytes_to_read + header_size; + // Following only works for string messages. + // LF_PRINT_DEBUG("Message received by RTI: %s.", buffer + header_size); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_TAGGED_MSG, sending_federate->enclave.id, &intended_tag); + } + + // Need to acquire the mutex lock to ensure that the thread handling + // messages coming from the socket connected to the destination does not + // issue a TAG before this message has been forwarded. + LF_MUTEX_LOCK(rti_mutex); + + // If the destination federate is no longer connected, issue a warning + // and return. + federate_info_t *fed = GET_FED_INFO(federate_id); + if (fed->enclave.state == NOT_CONNECTED) { + LF_MUTEX_UNLOCK(rti_mutex); + lf_print_warning("RTI: Destination federate %d is no longer connected. Dropping message.", + federate_id); + LF_PRINT_LOG("Fed status: next_event (" PRINTF_TIME ", %d), " + "completed (" PRINTF_TIME ", %d), " + "last_granted (" PRINTF_TIME ", %d), " + "last_provisionally_granted (" PRINTF_TIME ", %d).", + fed->enclave.next_event.time - start_time, + fed->enclave.next_event.microstep, + fed->enclave.completed.time - start_time, + fed->enclave.completed.microstep, + fed->enclave.last_granted.time - start_time, + fed->enclave.last_granted.microstep, + fed->enclave.last_provisionally_granted.time - start_time, + fed->enclave.last_provisionally_granted.microstep); + return; + } + + LF_PRINT_DEBUG( + "RTI forwarding message to port %d of federate %hu of length %zu.", + reactor_port_id, + federate_id, + length); + + // Need to make sure that the destination federate's thread has already + // sent the starting MSG_TYPE_TIMESTAMP message. + while (fed->enclave.state == PENDING) { + // Need to wait here. + lf_cond_wait(&sent_start_time); + } + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_TAGGED_MSG, federate_id, &intended_tag); + } + + write_to_socket_fail_on_error(&fed->socket, bytes_read, buffer, &rti_mutex, + "RTI failed to forward message to federate %d.", federate_id); + + // The message length may be longer than the buffer, + // in which case we have to handle it in chunks. + size_t total_bytes_read = bytes_read; + while (total_bytes_read < total_bytes_to_read) { + LF_PRINT_DEBUG("Forwarding message in chunks."); + bytes_to_read = total_bytes_to_read - total_bytes_read; + if (bytes_to_read > FED_COM_BUFFER_SIZE) { + bytes_to_read = FED_COM_BUFFER_SIZE; + } + read_from_socket_fail_on_error(&sending_federate->socket, bytes_to_read, buffer, NULL, + "RTI failed to read message chunks."); + total_bytes_read += bytes_to_read; + + // FIXME: a mutex needs to be held for this so that other threads + // do not write to destination_socket and cause interleaving. However, + // holding the rti_mutex might be very expensive. Instead, each outgoing + // socket should probably have its own mutex. + write_to_socket_fail_on_error(&fed->socket, bytes_to_read, buffer, &rti_mutex, + "RTI failed to send message chunks."); + } + + // Record this in-transit message in federate's in-transit message queue. + if (lf_tag_compare(fed->enclave.completed, intended_tag) < 0) { + // Add a record of this message to the list of in-transit messages to this federate. + pqueue_tag_insert_if_no_match( + fed->in_transit_message_tags, + intended_tag); + LF_PRINT_DEBUG( + "RTI: Adding a message with tag " PRINTF_TAG " to the list of in-transit messages for federate %d.", + intended_tag.time - lf_time_start(), + intended_tag.microstep, + federate_id); + } else { + lf_print_error( + "RTI: Federate %d has already completed tag " PRINTF_TAG + ", but there is an in-transit message with tag " PRINTF_TAG " from federate %hu. " + "This is going to cause an STP violation under centralized coordination.", + federate_id, + fed->enclave.completed.time - lf_time_start(), + fed->enclave.completed.microstep, + intended_tag.time - lf_time_start(), + intended_tag.microstep, + sending_federate->enclave.id); + // FIXME: Drop the federate? + } + + // If the message tag is less than the most recently received NET from the federate, + // then update the federate's next event tag to match the message tag. + if (lf_tag_compare(intended_tag, fed->enclave.next_event) < 0) { + update_federate_next_event_tag_locked(federate_id, intended_tag); + } + + LF_MUTEX_UNLOCK(rti_mutex); +} + +void handle_latest_tag_complete(federate_info_t *fed) { + unsigned char buffer[sizeof(int64_t) + sizeof(uint32_t)]; + read_from_socket_fail_on_error(&fed->socket, sizeof(int64_t) + sizeof(uint32_t), buffer, NULL, + "RTI failed to read the content of the logical tag complete from federate %d.", + fed->enclave.id); + tag_t completed = extract_tag(buffer); + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_LTC, fed->enclave.id, &completed); + } + _logical_tag_complete(&(fed->enclave), completed); + + // FIXME: Should this function be in the enclave version? + LF_MUTEX_LOCK(rti_mutex); + // See if we can remove any of the recorded in-transit messages for this. + pqueue_tag_remove_up_to(fed->in_transit_message_tags, completed); + LF_MUTEX_UNLOCK(rti_mutex); +} + +void handle_next_event_tag(federate_info_t *fed) { + unsigned char buffer[sizeof(int64_t) + sizeof(uint32_t)]; + read_from_socket_fail_on_error(&fed->socket, sizeof(int64_t) + sizeof(uint32_t), buffer, NULL, + "RTI failed to read the content of the next event tag from federate %d.", + fed->enclave.id); + + // Acquire a mutex lock to ensure that this state does not change while a + // message is in transport or being used to determine a TAG. + LF_MUTEX_LOCK(rti_mutex); // FIXME: Instead of using a mutex, it might be more efficient to use a + // select() mechanism to read and process federates' buffers in an orderly fashion. + + tag_t intended_tag = extract_tag(buffer); + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_NET, fed->enclave.id, &intended_tag); + } + LF_PRINT_LOG("RTI received from federate %d the Next Event Tag (NET) " PRINTF_TAG, + fed->enclave.id, intended_tag.time - start_time, + intended_tag.microstep); + update_federate_next_event_tag_locked( + fed->enclave.id, + intended_tag); + LF_MUTEX_UNLOCK(rti_mutex); +} + +/////////////////// STOP functions //////////////////// + +/** + * Boolean used to prevent the RTI from sending the + * MSG_TYPE_STOP_GRANTED message multiple times. + */ +bool stop_granted_already_sent_to_federates = false; + +/** + * Once the RTI has seen proposed tags from all connected federates, + * it will broadcast a MSG_TYPE_STOP_GRANTED carrying the _RTI.max_stop_tag. + * This function also checks the most recently received NET from + * each federate and resets that be no greater than the _RTI.max_stop_tag. + * + * This function assumes the caller holds the rti_mutex lock. + */ +static void broadcast_stop_time_to_federates_locked() { + if (stop_granted_already_sent_to_federates == true) { + return; + } + stop_granted_already_sent_to_federates = true; + + // Reply with a stop granted to all federates + unsigned char outgoing_buffer[MSG_TYPE_STOP_GRANTED_LENGTH]; + ENCODE_STOP_GRANTED(outgoing_buffer, rti_remote->base.max_stop_tag.time, rti_remote->base.max_stop_tag.microstep); + + // Iterate over federates and send each the message. + for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { + federate_info_t *fed = GET_FED_INFO(i); + if (fed->enclave.state == NOT_CONNECTED) { + continue; + } + if (lf_tag_compare(fed->enclave.next_event, rti_remote->base.max_stop_tag) >= 0) { + // Need the next_event to be no greater than the stop tag. + fed->enclave.next_event = rti_remote->base.max_stop_tag; + } + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_STOP_GRN, fed->enclave.id, &rti_remote->base.max_stop_tag); + } + write_to_socket_fail_on_error( + &fed->socket, MSG_TYPE_STOP_GRANTED_LENGTH, outgoing_buffer, &rti_mutex, + "RTI failed to send MSG_TYPE_STOP_GRANTED message to federate %d.", fed->enclave.id); + } + + LF_PRINT_LOG("RTI sent to federates MSG_TYPE_STOP_GRANTED with tag " PRINTF_TAG, + rti_remote->base.max_stop_tag.time - start_time, + rti_remote->base.max_stop_tag.microstep); +} + +/** + * Mark a federate requesting stop. If the number of federates handling stop reaches the + * NUM_OF_FEDERATES, broadcast MSG_TYPE_STOP_GRANTED to every federate. + * This function assumes the _RTI.mutex is already locked. + * @param fed The federate that has requested a stop. + * @return 1 if stop time has been sent to all federates and 0 otherwise. + */ +static int mark_federate_requesting_stop(federate_info_t *fed) { + if (!fed->requested_stop) { + rti_remote->base.num_scheduling_nodes_handling_stop++; + fed->requested_stop = true; + } + if (rti_remote->base.num_scheduling_nodes_handling_stop + == rti_remote->base.number_of_scheduling_nodes) { + // We now have information about the stop time of all + // federates. + broadcast_stop_time_to_federates_locked(); + return 1; + } + return 0; +} + +/** + * Thread to time out if federates do not reply to stop request. + */ +static void* wait_for_stop_request_reply(void* args) { + // Divide the time into small chunks and check periodically. + interval_t chunk = MAX_TIME_FOR_REPLY_TO_STOP_REQUEST/30; + int count = 0; + while (count++ < 30) { + if (stop_granted_already_sent_to_federates) return NULL; + lf_sleep(chunk); + } + // If we reach here, then error out. + lf_print_error_and_exit("Received only %d stop request replies within timeout " + PRINTF_TIME "ns. RTI is exiting.", + rti_remote->base.num_scheduling_nodes_handling_stop, + MAX_TIME_FOR_REPLY_TO_STOP_REQUEST + ); + return NULL; +} + +void handle_stop_request_message(federate_info_t *fed) { + LF_PRINT_DEBUG("RTI handling stop_request from federate %d.", fed->enclave.id); + + size_t bytes_to_read = MSG_TYPE_STOP_REQUEST_LENGTH - 1; + unsigned char buffer[bytes_to_read]; + read_from_socket_fail_on_error(&fed->socket, bytes_to_read, buffer, NULL, + "RTI failed to read the MSG_TYPE_STOP_REQUEST payload from federate %d.", + fed->enclave.id); + + // Extract the proposed stop tag for the federate + tag_t proposed_stop_tag = extract_tag(buffer); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_STOP_REQ, fed->enclave.id, &proposed_stop_tag); + } + + LF_PRINT_LOG("RTI received from federate %d a MSG_TYPE_STOP_REQUEST message with tag " PRINTF_TAG ".", + fed->enclave.id, proposed_stop_tag.time - start_time, proposed_stop_tag.microstep); + + // Acquire a mutex lock to ensure that this state does change while a + // message is in transport or being used to determine a TAG. + LF_MUTEX_LOCK(rti_mutex); + + // Check whether we have already received a stop_tag + // from this federate + if (fed->requested_stop) { + // If stop request messages have already been broadcast, treat this as if it were a reply. + if (rti_remote->stop_in_progress) { + mark_federate_requesting_stop(fed); + } + LF_MUTEX_UNLOCK(rti_mutex); + return; + } + + // Update the maximum stop tag received from federates + if (lf_tag_compare(proposed_stop_tag, rti_remote->base.max_stop_tag) > 0) { + rti_remote->base.max_stop_tag = proposed_stop_tag; + } + + // If all federates have replied, send stop request granted. + if (mark_federate_requesting_stop(fed)) { + // Have send stop request granted to all federates. Nothing more to do. + LF_MUTEX_UNLOCK(rti_mutex); + return; + } + + // Forward the stop request to all other federates that have not + // also issued a stop request. + unsigned char stop_request_buffer[MSG_TYPE_STOP_REQUEST_LENGTH]; + ENCODE_STOP_REQUEST(stop_request_buffer, + rti_remote->base.max_stop_tag.time, rti_remote->base.max_stop_tag.microstep); + + // Iterate over federates and send each the MSG_TYPE_STOP_REQUEST message + // if we do not have a stop_time already for them. Do not do this more than once. + if (rti_remote->stop_in_progress) { + LF_MUTEX_UNLOCK(rti_mutex); + return; + } + rti_remote->stop_in_progress = true; + // Need a timeout here in case a federate never replies. + lf_thread_t timeout_thread; + lf_thread_create(&timeout_thread, wait_for_stop_request_reply, NULL); + + for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { + federate_info_t *f = GET_FED_INFO(i); + if (f->enclave.id != fed->enclave.id && f->requested_stop == false) { + if (f->enclave.state == NOT_CONNECTED) { + mark_federate_requesting_stop(f); + continue; + } + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_STOP_REQ, f->enclave.id, &rti_remote->base.max_stop_tag); + } + write_to_socket_fail_on_error(&f->socket, MSG_TYPE_STOP_REQUEST_LENGTH, stop_request_buffer, &rti_mutex, + "RTI failed to forward MSG_TYPE_STOP_REQUEST message to federate %d.", f->enclave.id); + } + } + LF_PRINT_LOG("RTI forwarded to federates MSG_TYPE_STOP_REQUEST with tag (" PRINTF_TIME ", %u).", + rti_remote->base.max_stop_tag.time - start_time, + rti_remote->base.max_stop_tag.microstep); + LF_MUTEX_UNLOCK(rti_mutex); +} + +void handle_stop_request_reply(federate_info_t *fed) { + size_t bytes_to_read = MSG_TYPE_STOP_REQUEST_REPLY_LENGTH - 1; + unsigned char buffer_stop_time[bytes_to_read]; + read_from_socket_fail_on_error(&fed->socket, bytes_to_read, buffer_stop_time, NULL, + "RTI failed to read the reply to MSG_TYPE_STOP_REQUEST message from federate %d.", + fed->enclave.id); + + tag_t federate_stop_tag = extract_tag(buffer_stop_time); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_STOP_REQ_REP, fed->enclave.id, &federate_stop_tag); + } + + LF_PRINT_LOG("RTI received from federate %d STOP reply tag " PRINTF_TAG ".", fed->enclave.id, + federate_stop_tag.time - start_time, + federate_stop_tag.microstep); + + // Acquire the mutex lock so that we can change the state of the RTI + LF_MUTEX_LOCK(rti_mutex); + // If the federate has not requested stop before, count the reply + if (lf_tag_compare(federate_stop_tag, rti_remote->base.max_stop_tag) > 0) { + rti_remote->base.max_stop_tag = federate_stop_tag; + } + mark_federate_requesting_stop(fed); + LF_MUTEX_UNLOCK(rti_mutex); +} + +////////////////////////////////////////////////// + +void handle_address_query(uint16_t fed_id) { + federate_info_t *fed = GET_FED_INFO(fed_id); + // Use buffer both for reading and constructing the reply. + // The length is what is needed for the reply. + unsigned char buffer[1 + sizeof(int32_t)]; + read_from_socket_fail_on_error(&fed->socket, sizeof(uint16_t), (unsigned char *)buffer, NULL, + "Failed to read address query."); + uint16_t remote_fed_id = extract_uint16(buffer); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_ADR_QR, fed_id, NULL); + } + + LF_PRINT_DEBUG("RTI received address query from %d for %d.", fed_id, remote_fed_id); + + // NOTE: server_port initializes to -1, which means the RTI does not know + // the port number because it has not yet received an MSG_TYPE_ADDRESS_ADVERTISEMENT message + // from this federate. In that case, it will respond by sending -1. + + // Response message is also of type MSG_TYPE_ADDRESS_QUERY. + buffer[0] = MSG_TYPE_ADDRESS_QUERY; + + // Encode the port number. + federate_info_t *remote_fed = GET_FED_INFO(remote_fed_id); + + // Send the port number (which could be -1). + LF_MUTEX_LOCK(rti_mutex); + encode_int32(remote_fed->server_port, (unsigned char *)&buffer[1]); + write_to_socket_fail_on_error( + &fed->socket, sizeof(int32_t) + 1, (unsigned char *)buffer, &rti_mutex, + "Failed to write port number to socket of federate %d.", fed_id); + + // Send the server IP address to federate. + write_to_socket_fail_on_error( + &fed->socket, sizeof(remote_fed->server_ip_addr), + (unsigned char *)&remote_fed->server_ip_addr, &rti_mutex, + "Failed to write ip address to socket of federate %d.", fed_id); + LF_MUTEX_UNLOCK(rti_mutex); + + LF_PRINT_DEBUG("Replied to address query from federate %d with address %s:%d.", + fed_id, remote_fed->server_hostname, remote_fed->server_port); +} + +void handle_address_ad(uint16_t federate_id) { + federate_info_t *fed = GET_FED_INFO(federate_id); + // Read the port number of the federate that can be used for physical + // connections to other federates + int32_t server_port = -1; + unsigned char buffer[sizeof(int32_t)]; + read_from_socket_fail_on_error(&fed->socket, sizeof(int32_t), (unsigned char *)buffer, NULL, + "Error reading port data from federate %d.", federate_id); + + server_port = extract_int32(buffer); + + assert(server_port < 65536); + + LF_MUTEX_LOCK(rti_mutex); + fed->server_port = server_port; + LF_MUTEX_UNLOCK(rti_mutex); + + LF_PRINT_LOG("Received address advertisement with port %d from federate %d.", server_port, federate_id); + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_ADR_AD, federate_id, NULL); + } +} + +void handle_timestamp(federate_info_t *my_fed) { + unsigned char buffer[sizeof(int64_t)]; + // Read bytes from the socket. We need 8 bytes. + read_from_socket_fail_on_error(&my_fed->socket, sizeof(int64_t), (unsigned char *)&buffer, NULL, + "ERROR reading timestamp from federate %d.\n", my_fed->enclave.id); + + int64_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(&buffer))); + if (rti_remote->base.tracing_enabled) { + tag_t tag = {.time = timestamp, .microstep = 0}; + tracepoint_rti_from_federate(rti_remote->base.trace, receive_TIMESTAMP, my_fed->enclave.id, &tag); + } + LF_PRINT_DEBUG("RTI received timestamp message with time: " PRINTF_TIME ".", timestamp); + + LF_MUTEX_LOCK(rti_mutex); + rti_remote->num_feds_proposed_start++; + if (timestamp > rti_remote->max_start_time) { + rti_remote->max_start_time = timestamp; + } + if (rti_remote->num_feds_proposed_start == rti_remote->base.number_of_scheduling_nodes) { + // All federates have proposed a start time. + lf_cond_broadcast(&received_start_times); + } else { + // Some federates have not yet proposed a start time. + // wait for a notification. + while (rti_remote->num_feds_proposed_start < rti_remote->base.number_of_scheduling_nodes) { + // FIXME: Should have a timeout here? + lf_cond_wait(&received_start_times); + } + } + + LF_MUTEX_UNLOCK(rti_mutex); + + // Send back to the federate the maximum time plus an offset on a TIMESTAMP + // message. + unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_LENGTH]; + start_time_buffer[0] = MSG_TYPE_TIMESTAMP; + // Add an offset to this start time to get everyone starting together. + start_time = rti_remote->max_start_time + DELAY_START; + encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); + + if (rti_remote->base.tracing_enabled) { + tag_t tag = {.time = start_time, .microstep = 0}; + tracepoint_rti_to_federate(rti_remote->base.trace, send_TIMESTAMP, my_fed->enclave.id, &tag); + } + if (write_to_socket(my_fed->socket, MSG_TYPE_TIMESTAMP_LENGTH, start_time_buffer)) { + lf_print_error("Failed to send the starting time to federate %d.", my_fed->enclave.id); + } + + LF_MUTEX_LOCK(rti_mutex); + // Update state for the federate to indicate that the MSG_TYPE_TIMESTAMP + // message has been sent. That MSG_TYPE_TIMESTAMP message grants time advance to + // the federate to the start time. + my_fed->enclave.state = GRANTED; + lf_cond_broadcast(&sent_start_time); + LF_PRINT_LOG("RTI sent start time " PRINTF_TIME " to federate %d.", start_time, my_fed->enclave.id); + LF_MUTEX_UNLOCK(rti_mutex); +} + +void send_physical_clock(unsigned char message_type, federate_info_t *fed, socket_type_t socket_type) { + if (fed->enclave.state == NOT_CONNECTED) { + lf_print_warning("Clock sync: RTI failed to send physical time to federate %d. Socket not connected.\n", + fed->enclave.id); + return; + } + unsigned char buffer[sizeof(int64_t) + 1]; + buffer[0] = message_type; + int64_t current_physical_time = lf_time_physical(); + encode_int64(current_physical_time, &(buffer[1])); + + // Send the message + if (socket_type == UDP) { + // FIXME: UDP_addr is never initialized. + LF_PRINT_DEBUG("Clock sync: RTI sending UDP message type %u.", buffer[0]); + ssize_t bytes_written = sendto(rti_remote->socket_descriptor_UDP, buffer, 1 + sizeof(int64_t), 0, + (struct sockaddr *)&fed->UDP_addr, sizeof(fed->UDP_addr)); + if (bytes_written < (ssize_t)sizeof(int64_t) + 1) { + lf_print_warning("Clock sync: RTI failed to send physical time to federate %d: %s\n", + fed->enclave.id, + strerror(errno)); + return; + } + } + else if (socket_type == TCP) { + LF_PRINT_DEBUG("Clock sync: RTI sending TCP message type %u.", buffer[0]); + LF_MUTEX_LOCK(rti_mutex); + write_to_socket_fail_on_error(&fed->socket, 1 + sizeof(int64_t), buffer, &rti_mutex, + "Clock sync: RTI failed to send physical time to federate %d.", + fed->enclave.id); + LF_MUTEX_UNLOCK(rti_mutex); + } + LF_PRINT_DEBUG("Clock sync: RTI sent PHYSICAL_TIME_SYNC_MESSAGE with timestamp " PRINTF_TIME + " to federate %d.", + current_physical_time, + fed->enclave.id); +} + +void handle_physical_clock_sync_message(federate_info_t *my_fed, socket_type_t socket_type) { + // Lock the mutex to prevent interference between sending the two + // coded probe messages. + LF_MUTEX_LOCK(rti_mutex); + // Reply with a T4 type message + send_physical_clock(MSG_TYPE_CLOCK_SYNC_T4, my_fed, socket_type); + // Send the corresponding coded probe immediately after, + // but only if this is a UDP channel. + if (socket_type == UDP) { + send_physical_clock(MSG_TYPE_CLOCK_SYNC_CODED_PROBE, my_fed, socket_type); + } + LF_MUTEX_UNLOCK(rti_mutex); +} + +void *clock_synchronization_thread(void *noargs) { + + // Wait until all federates have been notified of the start time. + // FIXME: Use lf_ version of this when merged with master. + LF_MUTEX_LOCK(rti_mutex); + while (rti_remote->num_feds_proposed_start < rti_remote->base.number_of_scheduling_nodes) { + lf_cond_wait(&received_start_times); + } + LF_MUTEX_UNLOCK(rti_mutex); + + // Wait until the start time before starting clock synchronization. + // The above wait ensures that start_time has been set. + interval_t ns_to_wait = start_time - lf_time_physical(); + + if (ns_to_wait > 0LL) { + lf_sleep(ns_to_wait); + } + + // Initiate a clock synchronization every rti->clock_sync_period_ns + // Initiate a clock synchronization every rti->clock_sync_period_ns + struct timespec sleep_time = {(time_t)rti_remote->clock_sync_period_ns / BILLION, + rti_remote->clock_sync_period_ns % BILLION}; + struct timespec remaining_time; + + bool any_federates_connected = true; + while (any_federates_connected) { + // Sleep + lf_sleep(rti_remote->clock_sync_period_ns); // Can be interrupted + any_federates_connected = false; + for (int fed_id = 0; fed_id < rti_remote->base.number_of_scheduling_nodes; fed_id++) { + federate_info_t *fed = GET_FED_INFO(fed_id); + if (fed->enclave.state == NOT_CONNECTED) { + // FIXME: We need better error handling here, but clock sync failure + // should not stop execution. + lf_print_error("Clock sync failed with federate %d. Not connected.", fed_id); + continue; + } else if (!fed->clock_synchronization_enabled) { + continue; + } + // Send the RTI's current physical time to the federate + // Send on UDP. + LF_PRINT_DEBUG("RTI sending T1 message to initiate clock sync round."); + send_physical_clock(MSG_TYPE_CLOCK_SYNC_T1, fed, UDP); + + // Listen for reply message, which should be T3. + size_t message_size = 1 + sizeof(int32_t); + unsigned char buffer[message_size]; + // Maximum number of messages that we discard before giving up on this cycle. + // If the T3 message from this federate does not arrive and we keep receiving + // other messages, then give up on this federate and move to the next federate. + int remaining_attempts = 5; + while (remaining_attempts > 0) { + remaining_attempts--; + int read_failed = read_from_socket(rti_remote->socket_descriptor_UDP, message_size, buffer); + // If any errors occur, either discard the message or the clock sync round. + if (!read_failed) { + if (buffer[0] == MSG_TYPE_CLOCK_SYNC_T3) { + int32_t fed_id_2 = extract_int32(&(buffer[1])); + // Check that this message came from the correct federate. + if (fed_id_2 != fed->enclave.id) { + // Message is from the wrong federate. Discard the message. + lf_print_warning("Clock sync: Received T3 message from federate %d, " + "but expected one from %d. Discarding message.", + fed_id_2, fed->enclave.id); + continue; + } + LF_PRINT_DEBUG("Clock sync: RTI received T3 message from federate %d.", fed_id_2); + handle_physical_clock_sync_message(GET_FED_INFO(fed_id_2), UDP); + break; + } else { + // The message is not a T3 message. Discard the message and + // continue waiting for the T3 message. This is possibly a message + // from a previous cycle that was discarded. + lf_print_warning( + "Clock sync: Unexpected UDP message %u. Expected %u from federate %d. " + "Discarding message.", + buffer[0], + MSG_TYPE_CLOCK_SYNC_T3, + fed->enclave.id); + continue; + } + } else { + lf_print_warning("Clock sync: Read from UDP socket failed: %s. " + "Skipping clock sync round for federate %d.", + strerror(errno), + fed->enclave.id); + remaining_attempts = -1; + } + } + if (remaining_attempts > 0) { + any_federates_connected = true; + } + } + } + return NULL; +} + +/** + * Handle MSG_TYPE_FAILED sent by a federate. This message is sent by a federate + * that is exiting in failure. In this case, the RTI will + * also terminate abnormally, returning a non-zero exit code when it exits. + * + * This function assumes the caller does not hold the mutex. + * + * @param my_fed The federate sending a MSG_TYPE_FAILED message. + */ +static void handle_federate_failed(federate_info_t *my_fed) { + // Nothing more to do. Close the socket and exit. + LF_MUTEX_LOCK(rti_mutex); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_FAILED, my_fed->enclave.id, NULL); + } + + // Set the flag telling the RTI to exit with an error code when it exits. + _lf_federate_reports_error = true; + lf_print_error("RTI: Federate %d reports an error and has exited.", my_fed->enclave.id); + + my_fed->enclave.state = NOT_CONNECTED; + + // Indicate that there will no further events from this federate. + my_fed->enclave.next_event = FOREVER_TAG; + + // According to this: https://stackoverflow.com/questions/4160347/close-vs-shutdown-socket, + // the close should happen when receiving a 0 length message from the other end. + // Here, we just signal the other side that no further writes to the socket are + // forthcoming, which should result in the other end getting a zero-length reception. + shutdown(my_fed->socket, SHUT_RDWR); + + // We can now safely close the socket. + close(my_fed->socket); // from unistd.h + + // Check downstream federates to see whether they should now be granted a TAG. + // To handle cycles, need to create a boolean array to keep + // track of which upstream federates have been visited. + bool *visited = (bool *)calloc(rti_remote->base.number_of_scheduling_nodes, sizeof(bool)); // Initializes to 0. + notify_downstream_advance_grant_if_safe(&(my_fed->enclave), visited); + free(visited); + + LF_MUTEX_UNLOCK(rti_mutex); +} + +/** + * Handle MSG_TYPE_RESIGN sent by a federate. This message is sent at the time of termination + * after all shutdown events are processed on the federate. + * + * This function assumes the caller does not hold the mutex. + * + * @note At this point, the RTI might have outgoing messages to the federate. This + * function thus first performs a shutdown on the socket, which sends an EOF. It then + * waits for the remote socket to be closed before closing the socket itself. + * + * @param my_fed The federate sending a MSG_TYPE_RESIGN message. + */ +static void handle_federate_resign(federate_info_t *my_fed) { + // Nothing more to do. Close the socket and exit. + LF_MUTEX_LOCK(rti_mutex); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_RESIGN, my_fed->enclave.id, NULL); + } + + lf_print("RTI: Federate %d has resigned.", my_fed->enclave.id); + + my_fed->enclave.state = NOT_CONNECTED; + + // Indicate that there will no further events from this federate. + my_fed->enclave.next_event = FOREVER_TAG; + + // According to this: https://stackoverflow.com/questions/4160347/close-vs-shutdown-socket, + // the close should happen when receiving a 0 length message from the other end. + // Here, we just signal the other side that no further writes to the socket are + // forthcoming, which should result in the other end getting a zero-length reception. + shutdown(my_fed->socket, SHUT_WR); + + // Wait for the federate to send an EOF or a socket error to occur. + // Discard any incoming bytes. Normally, this read should return 0 because + // the federate is resigning and should itself invoke shutdown. + unsigned char buffer[10]; + while (read(my_fed->socket, buffer, 10) > 0); + + // We can now safely close the socket. + close(my_fed->socket); // from unistd.h + + // Check downstream federates to see whether they should now be granted a TAG. + // To handle cycles, need to create a boolean array to keep + // track of which upstream federates have been visited. + bool *visited = (bool *)calloc(rti_remote->base.number_of_scheduling_nodes, sizeof(bool)); // Initializes to 0. + notify_downstream_advance_grant_if_safe(&(my_fed->enclave), visited); + free(visited); + + LF_MUTEX_UNLOCK(rti_mutex); +} + +void *federate_info_thread_TCP(void *fed) { + federate_info_t *my_fed = (federate_info_t *)fed; + + // Buffer for incoming messages. + // This does not constrain the message size because messages + // are forwarded piece by piece. + unsigned char buffer[FED_COM_BUFFER_SIZE]; + + // Listen for messages from the federate. + while (my_fed->enclave.state != NOT_CONNECTED) { + // Read no more than one byte to get the message type. + int read_failed = read_from_socket(my_fed->socket, 1, buffer); + if (read_failed) { + // Socket is closed + lf_print_warning("RTI: Socket to federate %d is closed. Exiting the thread.", my_fed->enclave.id); + my_fed->enclave.state = NOT_CONNECTED; + my_fed->socket = -1; + // FIXME: We need better error handling here, but do not stop execution here. + break; + } + LF_PRINT_DEBUG("RTI: Received message type %u from federate %d.", buffer[0], my_fed->enclave.id); + switch (buffer[0]) { + case MSG_TYPE_TIMESTAMP: + handle_timestamp(my_fed); + break; + case MSG_TYPE_ADDRESS_QUERY: + handle_address_query(my_fed->enclave.id); + break; + case MSG_TYPE_ADDRESS_ADVERTISEMENT: + handle_address_ad(my_fed->enclave.id); + break; + case MSG_TYPE_TAGGED_MESSAGE: + handle_timed_message(my_fed, buffer); + break; + case MSG_TYPE_RESIGN: + handle_federate_resign(my_fed); + return NULL; + case MSG_TYPE_NEXT_EVENT_TAG: + handle_next_event_tag(my_fed); + break; + case MSG_TYPE_LATEST_TAG_COMPLETE: + handle_latest_tag_complete(my_fed); + break; + case MSG_TYPE_STOP_REQUEST: + handle_stop_request_message(my_fed); // FIXME: Reviewed until here. + // Need to also look at + // notify_advance_grant_if_safe() + // and notify_downstream_advance_grant_if_safe() + break; + case MSG_TYPE_STOP_REQUEST_REPLY: + handle_stop_request_reply(my_fed); + break; + case MSG_TYPE_PORT_ABSENT: + handle_port_absent_message(my_fed, buffer); + break; + case MSG_TYPE_FAILED: + handle_federate_failed(my_fed); + return NULL; + default: + lf_print_error("RTI received from federate %d an unrecognized TCP message type: %u.", my_fed->enclave.id, buffer[0]); + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_UNIDENTIFIED, my_fed->enclave.id, NULL); + } + } + } + + // Nothing more to do. Close the socket and exit. + // Prevent multiple threads from closing the same socket at the same time. + LF_MUTEX_LOCK(rti_mutex); + close(my_fed->socket); // from unistd.h + LF_MUTEX_UNLOCK(rti_mutex); + return NULL; +} + +void send_reject(int *socket_id, unsigned char error_code) { + LF_PRINT_DEBUG("RTI sending MSG_TYPE_REJECT."); + unsigned char response[2]; + response[0] = MSG_TYPE_REJECT; + response[1] = error_code; + LF_MUTEX_LOCK(rti_mutex); + // NOTE: Ignore errors on this response. + if (write_to_socket(*socket_id, 2, response)) { + lf_print_warning("RTI failed to write MSG_TYPE_REJECT message on the socket."); + } + // Close the socket. + shutdown(*socket_id, SHUT_RDWR); + close(*socket_id); + *socket_id = -1; + LF_MUTEX_UNLOCK(rti_mutex); +} + +/** + * Listen for a MSG_TYPE_FED_IDS message, which includes as a payload + * a federate ID and a federation ID. If the federation ID + * matches this federation, send an MSG_TYPE_ACK and otherwise send + * a MSG_TYPE_REJECT message. + * @param socket_id Pointer to the socket on which to listen. + * @param client_fd The socket address. + * @return The federate ID for success or -1 for failure. + */ +static int32_t receive_and_check_fed_id_message(int *socket_id, struct sockaddr_in *client_fd) { + // Buffer for message ID, federate ID, and federation ID length. + size_t length = 1 + sizeof(uint16_t) + 1; // Message ID, federate ID, length of fedration ID. + unsigned char buffer[length]; + + // Read bytes from the socket. We need 4 bytes. + if (read_from_socket_close_on_error(socket_id, length, buffer)) { + lf_print_error("RTI failed to read from accepted socket."); + return -1; + } + + uint16_t fed_id = rti_remote->base.number_of_scheduling_nodes; // Initialize to an invalid value. + + // First byte received is the message type. + if (buffer[0] != MSG_TYPE_FED_IDS) { + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_REJECT, fed_id, NULL); + } + if (buffer[0] == MSG_TYPE_P2P_SENDING_FED_ID || buffer[0] == MSG_TYPE_P2P_TAGGED_MESSAGE) { + // The federate is trying to connect to a peer, not to the RTI. + // It has connected to the RTI instead. + // FIXME: This should not happen, but apparently has been observed. + // It should not happen because the peers get the port and IP address + // of the peer they want to connect to from the RTI. + // If the connection is a peer-to-peer connection between two + // federates, reject the connection with the WRONG_SERVER error. + send_reject(socket_id, WRONG_SERVER); + } else { + send_reject(socket_id, UNEXPECTED_MESSAGE); + } + lf_print_error("RTI expected a MSG_TYPE_FED_IDS message. Got %u (see net_common.h).", buffer[0]); + return -1; + } else { + // Received federate ID. + fed_id = extract_uint16(buffer + 1); + LF_PRINT_DEBUG("RTI received federate ID: %d.", fed_id); + + // Read the federation ID. First read the length, which is one byte. + size_t federation_id_length = (size_t)buffer[sizeof(uint16_t) + 1]; + char federation_id_received[federation_id_length + 1]; // One extra for null terminator. + // Next read the actual federation ID. + if (read_from_socket_close_on_error(socket_id, federation_id_length, + (unsigned char *)federation_id_received)) { + lf_print_error("RTI failed to read federation id from federate %d.", fed_id); + return -1; + } + + // Terminate the string with a null. + federation_id_received[federation_id_length] = 0; + + LF_PRINT_DEBUG("RTI received federation ID: %s.", federation_id_received); + + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_from_federate(rti_remote->base.trace, receive_FED_ID, fed_id, NULL); + } + // Compare the received federation ID to mine. + if (strncmp(rti_remote->federation_id, federation_id_received, federation_id_length) != 0) { + // Federation IDs do not match. Send back a MSG_TYPE_REJECT message. + lf_print_warning("Federate from another federation %s attempted to connect to RTI in federation %s.", + federation_id_received, + rti_remote->federation_id); + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_REJECT, fed_id, NULL); + } + send_reject(socket_id, FEDERATION_ID_DOES_NOT_MATCH); + return -1; + } else { + if (fed_id >= rti_remote->base.number_of_scheduling_nodes) { + // Federate ID is out of range. + lf_print_error("RTI received federate ID %d, which is out of range.", fed_id); + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_REJECT, fed_id, NULL); + } + send_reject(socket_id, FEDERATE_ID_OUT_OF_RANGE); + return -1; + } else { + if ((rti_remote->base.scheduling_nodes[fed_id])->state != NOT_CONNECTED) { + lf_print_error("RTI received duplicate federate ID: %d.", fed_id); + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_REJECT, fed_id, NULL); + } + send_reject(socket_id, FEDERATE_ID_IN_USE); + return -1; + } + } + } + } + federate_info_t *fed = GET_FED_INFO(fed_id); + // The MSG_TYPE_FED_IDS message has the right federation ID. + // Assign the address information for federate. + // The IP address is stored here as an in_addr struct (in .server_ip_addr) that can be useful + // to create sockets and can be efficiently sent over the network. + // First, convert the sockaddr structure into a sockaddr_in that contains an internet address. + struct sockaddr_in *pV4_addr = client_fd; + // Then extract the internet address (which is in IPv4 format) and assign it as the federate's socket server + fed->server_ip_addr = pV4_addr->sin_addr; + +#if LOG_LEVEL >= LOG_LEVEL_DEBUG + // Create the human readable format and copy that into + // the .server_hostname field of the federate. + char str[INET_ADDRSTRLEN + 1]; + inet_ntop(AF_INET, &fed->server_ip_addr, str, INET_ADDRSTRLEN); + strncpy(fed->server_hostname, str, INET_ADDRSTRLEN); + + LF_PRINT_DEBUG("RTI got address %s from federate %d.", fed->server_hostname, fed_id); +#endif + fed->socket = *socket_id; + + // Set the federate's state as pending + // because it is waiting for the start time to be + // sent by the RTI before beginning its execution. + fed->enclave.state = PENDING; + + LF_PRINT_DEBUG("RTI responding with MSG_TYPE_ACK to federate %d.", fed_id); + // Send an MSG_TYPE_ACK message. + unsigned char ack_message = MSG_TYPE_ACK; + if (rti_remote->base.tracing_enabled) { + tracepoint_rti_to_federate(rti_remote->base.trace, send_ACK, fed_id, NULL); + } + LF_MUTEX_LOCK(rti_mutex); + if (write_to_socket_close_on_error(&fed->socket, 1, &ack_message)) { + LF_MUTEX_UNLOCK(rti_mutex); + lf_print_error("RTI failed to write MSG_TYPE_ACK message to federate %d.", fed_id); + return -1; + } + LF_MUTEX_UNLOCK(rti_mutex); + + LF_PRINT_DEBUG("RTI sent MSG_TYPE_ACK to federate %d.", fed_id); + + return (int32_t)fed_id; +} + +/** + * Listen for a MSG_TYPE_NEIGHBOR_STRUCTURE message, and upon receiving it, fill + * out the relevant information in the federate's struct. + * @return 1 on success and 0 on failure. + */ +static int receive_connection_information(int *socket_id, uint16_t fed_id) { + LF_PRINT_DEBUG("RTI waiting for MSG_TYPE_NEIGHBOR_STRUCTURE from federate %d.", fed_id); + unsigned char connection_info_header[MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE]; + read_from_socket_fail_on_error( + socket_id, + MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE, + connection_info_header, + NULL, + "RTI failed to read MSG_TYPE_NEIGHBOR_STRUCTURE message header from federate %d.", + fed_id); + + if (connection_info_header[0] != MSG_TYPE_NEIGHBOR_STRUCTURE) { + lf_print_error( + "RTI was expecting a MSG_TYPE_UDP_PORT message from federate %d. Got %u instead. " + "Rejecting federate.", + fed_id, connection_info_header[0]); + send_reject(socket_id, UNEXPECTED_MESSAGE); + return 0; + } else { + federate_info_t *fed = GET_FED_INFO(fed_id); + // Read the number of upstream and downstream connections + fed->enclave.num_upstream = extract_int32(&(connection_info_header[1])); + fed->enclave.num_downstream = extract_int32(&(connection_info_header[1 + sizeof(int32_t)])); + LF_PRINT_DEBUG( + "RTI got %d upstreams and %d downstreams from federate %d.", + fed->enclave.num_upstream, + fed->enclave.num_downstream, + fed_id); + + // Allocate memory for the upstream and downstream pointers + if (fed->enclave.num_upstream > 0) { + fed->enclave.upstream = (int *)malloc(sizeof(uint16_t) * fed->enclave.num_upstream); + // Allocate memory for the upstream delay pointers + fed->enclave.upstream_delay = (interval_t *)malloc( + sizeof(interval_t) * fed->enclave.num_upstream); + } else { + fed->enclave.upstream = (int *)NULL; + fed->enclave.upstream_delay = (interval_t *)NULL; + } + if (fed->enclave.num_downstream > 0) { + fed->enclave.downstream = (int *)malloc(sizeof(uint16_t) * fed->enclave.num_downstream); + } else { + fed->enclave.downstream = (int *)NULL; + } + + size_t connections_info_body_size = ( + (sizeof(uint16_t) + sizeof(int64_t)) * fed->enclave.num_upstream) + + (sizeof(uint16_t) * fed->enclave.num_downstream); + unsigned char *connections_info_body = NULL; + if (connections_info_body_size > 0) { + connections_info_body = (unsigned char *)malloc(connections_info_body_size); + read_from_socket_fail_on_error( + socket_id, + connections_info_body_size, + connections_info_body, + NULL, + "RTI failed to read MSG_TYPE_NEIGHBOR_STRUCTURE message body from federate %d.", + fed_id); + // Keep track of where we are in the buffer + size_t message_head = 0; + // First, read the info about upstream federates + for (int i = 0; i < fed->enclave.num_upstream; i++) { + fed->enclave.upstream[i] = extract_uint16(&(connections_info_body[message_head])); + message_head += sizeof(uint16_t); + fed->enclave.upstream_delay[i] = extract_int64(&(connections_info_body[message_head])); + message_head += sizeof(int64_t); + } + + // Next, read the info about downstream federates + for (int i = 0; i < fed->enclave.num_downstream; i++) { + fed->enclave.downstream[i] = extract_uint16(&(connections_info_body[message_head])); + message_head += sizeof(uint16_t); + } + + free(connections_info_body); + } + } + LF_PRINT_DEBUG("RTI received neighbor structure from federate %d.", fed_id); + return 1; +} + +/** + * Listen for a MSG_TYPE_UDP_PORT message, and upon receiving it, set up + * clock synchronization and perform the initial clock synchronization. + * Initial clock synchronization is performed only if the MSG_TYPE_UDP_PORT message + * payload is not UINT16_MAX. If it is also not 0, then this function sets + * up to perform runtime clock synchronization using the UDP port number + * specified in the payload to communicate with the federate's clock + * synchronization logic. + * @param socket_id The socket on which to listen. + * @param fed_id The federate ID. + * @return 1 for success, 0 for failure. + */ +static int receive_udp_message_and_set_up_clock_sync(int *socket_id, uint16_t fed_id) { + // Read the MSG_TYPE_UDP_PORT message from the federate regardless of the status of + // clock synchronization. This message will tell the RTI whether the federate + // is doing clock synchronization, and if it is, what port to use for UDP. + LF_PRINT_DEBUG("RTI waiting for MSG_TYPE_UDP_PORT from federate %d.", fed_id); + unsigned char response[1 + sizeof(uint16_t)]; + read_from_socket_fail_on_error(socket_id, 1 + sizeof(uint16_t), response, NULL, + "RTI failed to read MSG_TYPE_UDP_PORT message from federate %d.", fed_id); + if (response[0] != MSG_TYPE_UDP_PORT) { + lf_print_error( + "RTI was expecting a MSG_TYPE_UDP_PORT message from federate %d. Got %u instead. " + "Rejecting federate.", + fed_id, response[0]); + send_reject(socket_id, UNEXPECTED_MESSAGE); + return 0; + } else { + federate_info_t *fed = GET_FED_INFO(fed_id); + if (rti_remote->clock_sync_global_status >= clock_sync_init) { + // If no initial clock sync, no need perform initial clock sync. + uint16_t federate_UDP_port_number = extract_uint16(&(response[1])); + + LF_PRINT_DEBUG("RTI got MSG_TYPE_UDP_PORT %u from federate %d.", federate_UDP_port_number, fed_id); + + // A port number of UINT16_MAX means initial clock sync should not be performed. + if (federate_UDP_port_number != UINT16_MAX) { + // Perform the initialization clock synchronization with the federate. + // Send the required number of messages for clock synchronization + for (int i = 0; i < rti_remote->clock_sync_exchanges_per_interval; i++) { + // Send the RTI's current physical time T1 to the federate. + send_physical_clock(MSG_TYPE_CLOCK_SYNC_T1, fed, TCP); + + // Listen for reply message, which should be T3. + size_t message_size = 1 + sizeof(int32_t); + unsigned char buffer[message_size]; + read_from_socket_fail_on_error(socket_id, message_size, buffer, NULL, + "Socket to federate %d unexpectedly closed.", fed_id); + if (buffer[0] == MSG_TYPE_CLOCK_SYNC_T3) { + int32_t fed_id = extract_int32(&(buffer[1])); + assert(fed_id > -1); + assert(fed_id < 65536); + LF_PRINT_DEBUG("RTI received T3 clock sync message from federate %d.", fed_id); + handle_physical_clock_sync_message(fed, TCP); + } else { + lf_print_error("Unexpected message %u from federate %d.", buffer[0], fed_id); + send_reject(socket_id, UNEXPECTED_MESSAGE); + return 0; + } + } + LF_PRINT_DEBUG("RTI finished initial clock synchronization with federate %d.", fed_id); + } + if (rti_remote->clock_sync_global_status >= clock_sync_on) { + // If no runtime clock sync, no need to set up the UDP port. + if (federate_UDP_port_number > 0) { + // Initialize the UDP_addr field of the federate struct + fed->UDP_addr.sin_family = AF_INET; + fed->UDP_addr.sin_port = htons(federate_UDP_port_number); + fed->UDP_addr.sin_addr = fed->server_ip_addr; + } + } else { + // Disable clock sync after initial round. + fed->clock_synchronization_enabled = false; + } + } else { + // No clock synchronization at all. + LF_PRINT_DEBUG("RTI: No clock synchronization for federate %d.", fed_id); + // Clock synchronization is universally disabled via the clock-sync command-line parameter + // (-c off was passed to the RTI). + // Note that the federates are still going to send a + // MSG_TYPE_UDP_PORT message but with a payload (port) of -1. + fed->clock_synchronization_enabled = false; + } + } + return 1; +} + +#ifdef __RTI_AUTH__ +/** + * Authenticate incoming federate by performing HMAC-based authentication. + * + * @param socket Socket for the incoming federate tryting to authenticate. + * @return True if authentication is successful and false otherwise. + */ +static bool authenticate_federate(int *socket) { + // Wait for MSG_TYPE_FED_NONCE from federate. + size_t fed_id_length = sizeof(uint16_t); + unsigned char buffer[1 + fed_id_length + NONCE_LENGTH]; + read_from_socket_fail_on_error(socket, 1 + fed_id_length + NONCE_LENGTH, buffer, NULL, + "Failed to read MSG_TYPE_FED_NONCE"); + if (buffer[0] != MSG_TYPE_FED_NONCE) { + lf_print_error_and_exit( + "Received unexpected response %u from the FED (see net_common.h).", + buffer[0]); + } + unsigned int hmac_length = SHA256_HMAC_LENGTH; + size_t federation_id_length = strnlen(rti_remote->federation_id, 255); + // HMAC tag is created with MSG_TYPE, federate ID, received federate nonce. + unsigned char mac_buf[1 + fed_id_length + NONCE_LENGTH]; + mac_buf[0] = MSG_TYPE_RTI_RESPONSE; + memcpy(&mac_buf[1], &buffer[1], fed_id_length); + memcpy(&mac_buf[1 + fed_id_length], &buffer[1 + fed_id_length], NONCE_LENGTH); + unsigned char hmac_tag[hmac_length]; + unsigned char *ret = HMAC(EVP_sha256(), rti_remote->federation_id, + federation_id_length, mac_buf, 1 + fed_id_length + NONCE_LENGTH, + hmac_tag, &hmac_length); + if (ret == NULL) { + lf_print_error_and_exit("HMAC construction failed for MSG_TYPE_RTI_RESPONSE."); + } + // Make buffer for message type, RTI's nonce, and HMAC tag. + unsigned char sender[1 + NONCE_LENGTH + hmac_length]; + sender[0] = MSG_TYPE_RTI_RESPONSE; + unsigned char rti_nonce[NONCE_LENGTH]; + RAND_bytes(rti_nonce, NONCE_LENGTH); + memcpy(&sender[1], rti_nonce, NONCE_LENGTH); + memcpy(&sender[1 + NONCE_LENGTH], hmac_tag, hmac_length); + if (write_to_socket(*socket, 1 + NONCE_LENGTH + hmac_length, sender)) { + lf_print_error("Failed to send nonce to federate."); + } + + // Wait for MSG_TYPE_FED_RESPONSE + unsigned char received[1 + hmac_length]; + read_from_socket_fail_on_error(socket, 1 + hmac_length, received, NULL, + "Failed to read federate response."); + if (received[0] != MSG_TYPE_FED_RESPONSE) { + lf_print_error_and_exit( + "Received unexpected response %u from the federate (see net_common.h).", + received[0]); + return false; + } + // HMAC tag is created with MSG_TYPE_FED_RESPONSE and RTI's nonce. + unsigned char mac_buf2[1 + NONCE_LENGTH]; + mac_buf2[0] = MSG_TYPE_FED_RESPONSE; + memcpy(&mac_buf2[1], rti_nonce, NONCE_LENGTH); + unsigned char rti_tag[hmac_length]; + ret = HMAC(EVP_sha256(), rti_remote->federation_id, federation_id_length, + mac_buf2, 1 + NONCE_LENGTH, rti_tag, &hmac_length); + if (ret == NULL) { + lf_print_error_and_exit("HMAC construction failed for MSG_TYPE_FED_RESPONSE."); + } + // Compare received tag and created tag. + if (memcmp(&received[1], rti_tag, hmac_length) != 0) { + // Federation IDs do not match. Send back a HMAC_DOES_NOT_MATCH message. + lf_print_warning("HMAC authentication failed. Rejecting the federate."); + send_reject(socket, HMAC_DOES_NOT_MATCH); + return false; + } else { + LF_PRINT_LOG("Federate's HMAC verified."); + return true; + } +} +#endif + +void lf_connect_to_federates(int socket_descriptor) { + for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { + // Wait for an incoming connection request. + struct sockaddr client_fd; + uint32_t client_length = sizeof(client_fd); + // The following blocks until a federate connects. + int socket_id = -1; + while (1) { + socket_id = accept(rti_remote->socket_descriptor_TCP, &client_fd, &client_length); + if (socket_id >= 0) { + // Got a socket + break; + } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { + lf_print_error_system_failure("RTI failed to accept the socket."); + } else { + // Try again + lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); + continue; + } + } + +// Wait for the first message from the federate when RTI -a option is on. +#ifdef __RTI_AUTH__ + if (rti_remote->authentication_enabled) { + if (!authenticate_federate(&socket_id)) { + lf_print_warning("RTI failed to authenticate the incoming federate."); + // Close the socket. + shutdown(socket_id, SHUT_RDWR); + close(socket_id); + socket_id = -1; + // Ignore the federate that failed authentication. + i--; + continue; + } + } +#endif + + // The first message from the federate should contain its ID and the federation ID. + int32_t fed_id = receive_and_check_fed_id_message(&socket_id, (struct sockaddr_in *)&client_fd); + if (fed_id >= 0 && socket_id >= 0 + && receive_connection_information(&socket_id, (uint16_t)fed_id) + && receive_udp_message_and_set_up_clock_sync(&socket_id, (uint16_t)fed_id)) { + + // Create a thread to communicate with the federate. + // This has to be done after clock synchronization is finished + // or that thread may end up attempting to handle incoming clock + // synchronization messages. + federate_info_t *fed = GET_FED_INFO(fed_id); + lf_thread_create(&(fed->thread_id), federate_info_thread_TCP, fed); + } else { + // Received message was rejected. Try again. + i--; + } + } + // All federates have connected. + LF_PRINT_DEBUG("All federates have connected to RTI."); + + if (rti_remote->clock_sync_global_status >= clock_sync_on) { + // Create the thread that performs periodic PTP clock synchronization sessions + // over the UDP channel, but only if the UDP channel is open and at least one + // federate is performing runtime clock synchronization. + bool clock_sync_enabled = false; + for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { + federate_info_t *fed_info = GET_FED_INFO(i); + if (fed_info->clock_synchronization_enabled) { + clock_sync_enabled = true; + break; + } + } + if (rti_remote->final_port_UDP != UINT16_MAX && clock_sync_enabled) { + lf_thread_create(&rti_remote->clock_thread, clock_synchronization_thread, NULL); + } + } +} + +void *respond_to_erroneous_connections(void *nothing) { + while (true) { + // Wait for an incoming connection request. + struct sockaddr client_fd; + uint32_t client_length = sizeof(client_fd); + // The following will block until either a federate attempts to connect + // or close(rti->socket_descriptor_TCP) is called. + int socket_id = accept(rti_remote->socket_descriptor_TCP, &client_fd, &client_length); + if (socket_id < 0) return NULL; + + if (rti_remote->all_federates_exited) { + return NULL; + } + + lf_print_error("RTI received an unexpected connection request. Federation is running."); + unsigned char response[2]; + response[0] = MSG_TYPE_REJECT; + response[1] = FEDERATION_ID_DOES_NOT_MATCH; + // Ignore errors on this response. + if (write_to_socket(socket_id, 2, response)) { + lf_print_warning("RTI failed to write FEDERATION_ID_DOES_NOT_MATCH to erroneous incoming connection."); + } + // Close the socket. + shutdown(socket_id, SHUT_RDWR); + close(socket_id); + } + return NULL; +} + +void initialize_federate(federate_info_t *fed, uint16_t id) { + initialize_scheduling_node(&(fed->enclave), id); + fed->requested_stop = false; + fed->socket = -1; // No socket. + fed->clock_synchronization_enabled = true; + fed->in_transit_message_tags = pqueue_tag_init(10); + strncpy(fed->server_hostname, "localhost", INET_ADDRSTRLEN); + fed->server_ip_addr.s_addr = 0; + fed->server_port = -1; +} + +int32_t start_rti_server(uint16_t port) { + _lf_initialize_clock(); + // Create the TCP socket server + rti_remote->socket_descriptor_TCP = create_rti_server(port, TCP); + lf_print("RTI: Listening for federates."); + // Create the UDP socket server + // Try to get the rti_remote->final_port_TCP + 1 port + if (rti_remote->clock_sync_global_status >= clock_sync_on) { + rti_remote->socket_descriptor_UDP = create_rti_server(rti_remote->final_port_TCP + 1, UDP); + } + return rti_remote->socket_descriptor_TCP; +} + +void wait_for_federates(int socket_descriptor) { + // Wait for connections from federates and create a thread for each. + lf_connect_to_federates(socket_descriptor); + + // All federates have connected. + lf_print("RTI: All expected federates have connected. Starting execution."); + + // The socket server will not continue to accept connections after all the federates + // have joined. + // In case some other federation's federates are trying to join the wrong + // federation, need to respond. Start a separate thread to do that. + lf_thread_t responder_thread; + lf_thread_create(&responder_thread, respond_to_erroneous_connections, NULL); + + // Wait for federate threads to exit. + void *thread_exit_status; + for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { + federate_info_t *fed = GET_FED_INFO(i); + lf_print("RTI: Waiting for thread handling federate %d.", fed->enclave.id); + lf_thread_join(fed->thread_id, &thread_exit_status); + pqueue_tag_free(fed->in_transit_message_tags); + lf_print("RTI: Federate %d thread exited.", fed->enclave.id); + } + + rti_remote->all_federates_exited = true; + + // Shutdown and close the socket that is listening for incoming connections + // so that the accept() call in respond_to_erroneous_connections returns. + // That thread should then check rti->all_federates_exited and it should exit. + if (shutdown(socket_descriptor, SHUT_RDWR)) { + LF_PRINT_LOG("On shut down TCP socket, received reply: %s", strerror(errno)); + } + // NOTE: In all common TCP/IP stacks, there is a time period, + // typically between 30 and 120 seconds, called the TIME_WAIT period, + // before the port is released after this close. This is because + // the OS is preventing another program from accidentally receiving + // duplicated packets intended for this program. + close(socket_descriptor); + + if (rti_remote->socket_descriptor_UDP > 0) { + if (shutdown(rti_remote->socket_descriptor_UDP, SHUT_RDWR)) { + LF_PRINT_LOG("On shut down UDP socket, received reply: %s", strerror(errno)); + } + close(rti_remote->socket_descriptor_UDP); + } +} + +void initialize_RTI(rti_remote_t *rti) { + rti_remote = rti; + + // Initialize thread synchronization primitives + LF_MUTEX_INIT(rti_mutex); + LF_COND_INIT(received_start_times, rti_mutex); + LF_COND_INIT(sent_start_time, rti_mutex); + + initialize_rti_common(&rti_remote->base); + rti_remote->base.mutex = &rti_mutex; + + // federation_rti related initializations + rti_remote->max_start_time = 0LL; + rti_remote->num_feds_proposed_start = 0; + rti_remote->all_federates_exited = false; + rti_remote->federation_id = "Unidentified Federation"; + rti_remote->user_specified_port = 0; + rti_remote->final_port_TCP = 0; + rti_remote->socket_descriptor_TCP = -1; + rti_remote->final_port_UDP = UINT16_MAX; + rti_remote->socket_descriptor_UDP = -1; + rti_remote->clock_sync_global_status = clock_sync_init; + rti_remote->clock_sync_period_ns = MSEC(10); + rti_remote->clock_sync_exchanges_per_interval = 10; + rti_remote->authentication_enabled = false; + rti_remote->base.tracing_enabled = false; + rti_remote->stop_in_progress = false; +} + +void free_scheduling_nodes(scheduling_node_t **scheduling_nodes, uint16_t number_of_scheduling_nodes) { + for (uint16_t i = 0; i < number_of_scheduling_nodes; i++) { + // FIXME: Gives error freeing memory not allocated!!!! + scheduling_node_t *node = scheduling_nodes[i]; + if (node->upstream != NULL) + free(node->upstream); + if (node->downstream != NULL) + free(node->downstream); + } + free(scheduling_nodes); +} + +#endif // STANDALONE_RTI diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_remote.h similarity index 69% rename from core/federated/RTI/rti_lib.h rename to core/federated/RTI/rti_remote.h index 9d7c639480..9303da42db 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_remote.h @@ -1,3 +1,4 @@ +#if defined STANDALONE_RTI /** * @file * @author Edward A. Lee (eal@berkeley.edu) @@ -11,8 +12,8 @@ * used by scheduling enclaves. */ -#ifndef RTI_LIB_H -#define RTI_LIB_H +#ifndef RTI_REMOTE_H +#define RTI_REMOTE_H #include #include // Provides select() function to read from multiple sockets. @@ -21,7 +22,7 @@ #include // Defines read(), write(), and close() #include // Defines bzero(). -#include "enclave.h" +#include "rti_common.h" #ifdef __RTI_AUTH__ #include // For secure random number generation. @@ -29,7 +30,10 @@ #endif #include "lf_types.h" -#include "message_record/message_record.h" +#include "pqueue_tag.h" + +/** Time allowed for federates to reply to stop request. */ +#define MAX_TIME_FOR_REPLY_TO_STOP_REQUEST SEC(30) ///////////////////////////////////////////// //// Data structures @@ -47,8 +51,8 @@ typedef enum socket_type_t { * denoted with ~>) because those connections do not impose * any scheduling constraints. */ -typedef struct federate_t { - enclave_t enclave; +typedef struct federate_info_t { + scheduling_node_t enclave; bool requested_stop; // Indicates that the federate has requested stop or has replied // to a request for stop from the RTI. Used to prevent double-counting // a federate when handling lf_request_stop(). @@ -57,9 +61,9 @@ typedef struct federate_t { struct sockaddr_in UDP_addr; // The UDP address for the federate. bool clock_synchronization_enabled; // Indicates the status of clock synchronization // for this federate. Enabled by default. - in_transit_message_record_q_t* in_transit_message_tags; // Record of in-transit messages to this federate that are not - // yet processed. This record is ordered based on the time - // value of each message for a more efficient access. + pqueue_tag_t* in_transit_message_tags; // Record of in-transit messages to this federate that are not + // yet processed. This record is ordered based on the time + // value of each message for a more efficient access. char server_hostname[INET_ADDRSTRLEN]; // Human-readable IP address and int32_t server_port; // port number of the socket server of the federate // if it has any incoming direct connections from other federates. @@ -67,7 +71,9 @@ typedef struct federate_t { // RTI has not been informed of the port number. struct in_addr server_ip_addr; // Information about the IP address of the socket // server of the federate. -} federate_t; +} federate_info_t; + + /** * The status of clock synchronization. @@ -81,38 +87,18 @@ typedef enum clock_sync_stat { /** * Structure that an RTI instance uses to keep track of its own and its * corresponding federates' state. - * It is a special case of `enclave_rti_t` (declared in enclave.h). Inheritence - * is mimicked by having the first attributes to be the same as of enclave_rti_t, - * except that enclaves attribute here is of type `federate_t**`, while it - * is of type `enclave_t**` in `enclave_rti_t`. + * It is a special case of `rti_common_t` (declared in enclave.h). Inheritence + * is mimicked by having the first attributes to be the same as of rti_common_t, + * except that scheduling_nodes attribute here is of type `federate_info_t**`, while it + * is of type `scheduling_node_t**` in `rti_common_t`. * // **************** IMPORTANT!!! ******************** * // ** If you make any change to this struct, ** - * // ** you MUST also change enclave_rti_t in ** + * // ** you MUST also change rti_common_t in ** * // ** (enclave.h)! The change must exactly match. ** * // ************************************************** */ -typedef struct federation_rti_t { - ////////////////// Enclave specific attributes ////////////////// - - // The federates. - federate_t **enclaves; - - // Number of enclaves - int32_t number_of_enclaves; - - // RTI's decided stop tag for enclaves - tag_t max_stop_tag; - - // Number of enclaves handling stop - int num_enclaves_handling_stop; - - // Boolean indicating that tracing is enabled. - bool tracing_enabled; - - // Pointer to a tracing object - trace_t* trace; - ////////////// Federation only specific attributes ////////////// - +typedef struct rti_remote_t { + rti_common_t base; // Maximum start time seen so far from the federates. int64_t max_start_time; @@ -177,12 +163,11 @@ typedef struct federation_rti_t { * Boolean indicating that authentication is enabled. */ bool authentication_enabled; - /** * Boolean indicating that a stop request is already in progress. */ bool stop_in_progress; -} federation_rti_t; +} rti_remote_t; /** * Enter a critical section where logical time and the event queue are guaranteed @@ -203,17 +188,9 @@ extern int lf_critical_section_enter(environment_t* env); extern int lf_critical_section_exit(environment_t* env); /** - * Create a server and enable listening for socket connections. - * - * @note This function is similar to create_server(...) in - * federate.c. However, it contains logs that are specific - * to the RTI. - * - * @param port The port number to use. - * @param socket_type The type of the socket for the server (TCP or UDP). - * @return The socket descriptor on which to accept connections. + * Indicator that one or more federates have reported an error on resigning. */ -int create_server(int32_t specified_port, uint16_t port, socket_type_t socket_type); +extern bool _lf_federate_reports_error; /** * @brief Update the next event tag of federate `federate_id`. @@ -224,7 +201,7 @@ int create_server(int32_t specified_port, uint16_t port, socket_type_t socket_ty * Will try to see if the RTI can grant new TAG or PTAG messages to any * downstream federates based on this new next event tag. * - * This function assumes that the caller is holding the _RTI.rti_mutex. + * This function assumes that the caller is holding the _RTI.mutex. * * @param federate_id The id of the federate that needs to be updated. * @param next_event_tag The next event tag for `federate_id`. @@ -236,7 +213,7 @@ void update_federate_next_event_tag_locked(uint16_t federate_id, tag_t next_even * * This function assumes the caller does not hold the mutex. */ -void handle_port_absent_message(federate_t* sending_federate, unsigned char* buffer); +void handle_port_absent_message(federate_info_t* sending_federate, unsigned char* buffer); /** * Handle a timed message being received from a federate by the RTI to relay to another federate. @@ -246,17 +223,17 @@ void handle_port_absent_message(federate_t* sending_federate, unsigned char* buf * @param sending_federate The sending federate. * @param buffer The buffer to read into (the first byte is already there). */ -void handle_timed_message(federate_t* sending_federate, unsigned char* buffer); +void handle_timed_message(federate_info_t* sending_federate, unsigned char* buffer); /** - * Handle a logical tag complete (LTC) message. @see - * MSG_TYPE_LOGICAL_TAG_COMPLETE in rti.h. + * Handle a latest tag complete (LTC) message. @see + * MSG_TYPE_LATEST_TAG_COMPLETE in rti.h. * * This function assumes the caller does not hold the mutex. * * @param fed The federate that has completed a logical tag. */ -void handle_logical_tag_complete(federate_t* fed); +void handle_latest_tag_complete(federate_info_t* fed); /** * Handle a next event tag (NET) message. @see MSG_TYPE_NEXT_EVENT_TAG in rti.h. @@ -265,23 +242,10 @@ void handle_logical_tag_complete(federate_t* fed); * * @param fed The federate sending a NET message. */ -void handle_next_event_tag(federate_t* fed); +void handle_next_event_tag(federate_info_t* fed); /////////////////// STOP functions //////////////////// -/** - * Mark a federate requesting stop. - * - * If the number of federates handling stop reaches the - * NUM_OF_FEDERATES, broadcast MSG_TYPE_STOP_GRANTED to every federate. - * - * This function assumes the _RTI.rti_mutex is already locked. - * - * @param fed The federate that has requested a stop or has suddenly - * stopped (disconnected). - */ -void mark_federate_requesting_stop(federate_t* fed); - /** * Handle a MSG_TYPE_STOP_REQUEST message. * @@ -289,7 +253,7 @@ void mark_federate_requesting_stop(federate_t* fed); * * @param fed The federate sending a MSG_TYPE_STOP_REQUEST message. */ -void handle_stop_request_message(federate_t* fed); +void handle_stop_request_message(federate_info_t* fed); /** * Handle a MSG_TYPE_STOP_REQUEST_REPLY message. @@ -298,7 +262,7 @@ void handle_stop_request_message(federate_t* fed); * * @param fed The federate replying the MSG_TYPE_STOP_REQUEST */ -void handle_stop_request_reply(federate_t* fed); +void handle_stop_request_reply(federate_info_t* fed); ////////////////////////////////////////////////// @@ -310,7 +274,7 @@ void handle_stop_request_reply(federate_t* fed); * are initialized to -1. If no MSG_TYPE_ADDRESS_ADVERTISEMENT message has been received from * the destination federate, the RTI will simply reply with -1 for the port. * The sending federate is responsible for checking back with the RTI after a - * period of time. @see connect_to_federate() in federate.c. * + * period of time. * @param fed_id The federate sending a MSG_TYPE_ADDRESS_QUERY message. */ void handle_address_query(uint16_t fed_id); @@ -322,7 +286,7 @@ void handle_address_query(uint16_t fed_id); * field of the _RTI.federates[federate_id] array of structs. * * The server_hostname and server_ip_addr fields are assigned - * in connect_to_federates() upon accepting the socket + * in lf_connect_to_federates() upon accepting the socket * from the remote federate. * * This function assumes the caller does not hold the mutex. @@ -336,7 +300,7 @@ void handle_address_ad(uint16_t federate_id); * A function to handle timestamp messages. * This function assumes the caller does not hold the mutex. */ -void handle_timestamp(federate_t *my_fed); +void handle_timestamp(federate_info_t *my_fed); /** * Take a snapshot of the physical clock time and send @@ -348,7 +312,7 @@ void handle_timestamp(federate_t *my_fed); * @param fed The federate to send the physical time to. * @param socket_type The socket type (TCP or UDP). */ -void send_physical_clock(unsigned char message_type, federate_t* fed, socket_type_t socket_type); +void send_physical_clock(unsigned char message_type, federate_info_t* fed, socket_type_t socket_type); /** * Handle clock synchronization T3 messages from federates. @@ -363,7 +327,7 @@ void send_physical_clock(unsigned char message_type, federate_t* fed, socket_typ * @param my_fed The sending federate. * @param socket_type The RTI's socket type used for the communication (TCP or UDP) */ -void handle_physical_clock_sync_message(federate_t* my_fed, socket_type_t socket_type); +void handle_physical_clock_sync_message(federate_info_t* my_fed, socket_type_t socket_type); /** * A (quasi-)periodic thread that performs clock synchronization with each @@ -378,89 +342,19 @@ void handle_physical_clock_sync_message(federate_t* my_fed, socket_type_t socket */ void* clock_synchronization_thread(void* noargs); -/** - * A function to handle messages labeled - * as MSG_TYPE_RESIGN sent by a federate. This - * message is sent at the time of termination - * after all shutdown events are processed - * on the federate. - * - * This function assumes the caller does not hold the mutex. - * - * @note At this point, the RTI might have - * outgoing messages to the federate. This - * function thus first performs a shutdown - * on the socket which sends an EOF. It then - * waits for the remote socket to be closed - * before closing the socket itself. - * - * Assumptions: - * - We assume that the other side (the federates) - * are in charge of closing the socket (by calling - * close() on the socket), and then wait for the RTI - * to shutdown the socket. - * - We assume that calling shutdown() follows the same - * shutdown procedure as stated in the TCP/IP specification. - * - * @param my_fed The federate sending a MSG_TYPE_RESIGN message. - **/ -void handle_federate_resign(federate_t *my_fed); - /** * Thread handling TCP communication with a federate. * @param fed A pointer to the federate's struct that has the * socket descriptor for the federate. */ -void* federate_thread_TCP(void* fed); +void* federate_info_thread_TCP(void* fed); /** * Send a MSG_TYPE_REJECT message to the specified socket and close the socket. - * @param socket_id The socket. + * @param socket_id Pointer to the socket ID. * @param error_code An error code. */ -void send_reject(int socket_id, unsigned char error_code); - -/** - * Listen for a MSG_TYPE_FED_IDS message, which includes as a payload - * a federate ID and a federation ID. If the federation ID - * matches this federation, send an MSG_TYPE_ACK and otherwise send - * a MSG_TYPE_REJECT message. Return 1 if the federate is accepted to - * the federation and 0 otherwise. - * @param socket_id The socket on which to listen. - * @param client_fd The socket address. - * @return The federate ID for success or -1 for failure. - */ -int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* client_fd); - -/** - * Listen for a MSG_TYPE_NEIGHBOR_STRUCTURE message, and upon receiving it, fill - * out the relevant information in the federate's struct. - */ -int receive_connection_information(int socket_id, uint16_t fed_id); - -/** - * Listen for a MSG_TYPE_UDP_PORT message, and upon receiving it, set up - * clock synchronization and perform the initial clock synchronization. - * Initial clock synchronization is performed only if the MSG_TYPE_UDP_PORT message - * payload is not UINT16_MAX. If it is also not 0, then this function sets - * up to perform runtime clock synchronization using the UDP port number - * specified in the payload to communicate with the federate's clock - * synchronization logic. - * @param socket_id The socket on which to listen. - * @param fed_id The federate ID. - * @return 1 for success, 0 for failure. - */ -int receive_udp_message_and_set_up_clock_sync(int socket_id, uint16_t fed_id); - -#ifdef __RTI_AUTH__ -/** - * Authenticate incoming federate by performing HMAC-based authentication. - * - * @param socket Socket for the incoming federate tryting to authenticate. - * @return True if authentication is successful and false otherwise. - */ -bool authenticate_federate(int socket); -#endif +void send_reject(int* socket_id, unsigned char error_code); /** * Wait for one incoming connection request from each federate, @@ -468,7 +362,7 @@ bool authenticate_federate(int socket); * that federate. Return when all federates have connected. * @param socket_descriptor The socket on which to accept connections. */ -void connect_to_federates(int socket_descriptor); +void lf_connect_to_federates(int socket_descriptor); /** * Thread to respond to new connections, which could be federates of other @@ -481,7 +375,7 @@ void* respond_to_erroneous_connections(void* nothing); * Initialize the federate with the specified ID. * @param id The federate ID. */ -void initialize_federate(federate_t* fed, uint16_t id); +void initialize_federate(federate_info_t* fed, uint16_t id); /** * Start the socket server for the runtime infrastructure (RTI) and @@ -525,6 +419,7 @@ int process_args(int argc, const char* argv[]); /** * Initialize the _RTI instance. */ -void initialize_RTI(); +void initialize_RTI(rti_remote_t *rti); -#endif // RTI_LIB_H +#endif // RTI_REMOTE_H +#endif // STANDALONE_RTI \ No newline at end of file diff --git a/core/federated/clock-sync.c b/core/federated/clock-sync.c index e438d83aca..eea5e753cd 100644 --- a/core/federated/clock-sync.c +++ b/core/federated/clock-sync.c @@ -71,7 +71,7 @@ instant_t _lf_last_clock_sync_instant = 0LL; /** * The UDP socket descriptor for this federate to communicate with the RTI. - * This is set by setup_clock_synchronization_with_rti() in connect_to_rti() + * This is set by setup_clock_synchronization_with_rti() in lf_connect_to_rti() * in federate.c, which must be called before other * functions that communicate with the rti are called. */ @@ -169,16 +169,14 @@ uint16_t setup_clock_synchronization_with_rti() { _lf_rti_socket_UDP, (struct sockaddr *) &federate_UDP_addr, sizeof(federate_UDP_addr)) < 0) { - lf_print_error_and_exit("Failed to bind its UDP socket: %s.", - strerror(errno)); + lf_print_error_system_failure("Failed to bind its UDP socket."); } // Retrieve the port number that was assigned by the operating system socklen_t addr_length = sizeof(federate_UDP_addr); if (getsockname(_lf_rti_socket_UDP, (struct sockaddr *)&federate_UDP_addr, &addr_length) == -1) { // FIXME: Send 0 UDP_PORT message instead of exiting. // That will disable clock synchronization. - lf_print_error_and_exit("Failed to retrieve UDP port: %s.", - strerror(errno)); + lf_print_error_system_failure("Failed to retrieve UDP port."); } LF_PRINT_DEBUG("Assigned UDP port number %u to its socket.", ntohs(federate_UDP_addr.sin_port)); @@ -205,22 +203,7 @@ uint16_t setup_clock_synchronization_with_rti() { return port_to_return; } -/** - * Synchronize the initial physical clock with the RTI. - * A call to this function is inserted into the startup - * sequence by the code generator if initial clock synchronization - * is required. - * - * This is a blocking function that expects - * to read a MSG_TYPE_CLOCK_SYNC_T1 from the RTI TCP socket. - * It will then follow the PTP protocol to synchronize the local - * physical clock with the RTI. - * Failing to complete this protocol is treated as a catastrophic - * error that causes the federate to exit. - * - * @param rti_socket_TCP The rti's socket - */ -void synchronize_initial_physical_clock_with_rti(int rti_socket_TCP) { +void synchronize_initial_physical_clock_with_rti(int* rti_socket_TCP) { LF_PRINT_DEBUG("Waiting for initial clock synchronization messages from the RTI."); size_t message_size = 1 + sizeof(instant_t); @@ -228,7 +211,7 @@ void synchronize_initial_physical_clock_with_rti(int rti_socket_TCP) { for (int i=0; i < _LF_CLOCK_SYNC_EXCHANGES_PER_INTERVAL; i++) { // The first message expected from the RTI is MSG_TYPE_CLOCK_SYNC_T1 - read_from_socket_errexit(rti_socket_TCP, message_size, buffer, + read_from_socket_fail_on_error(rti_socket_TCP, message_size, buffer, NULL, "Federate %d did not get the initial clock synchronization message T1 from the RTI.", _lf_my_fed_id); @@ -242,12 +225,12 @@ void synchronize_initial_physical_clock_with_rti(int rti_socket_TCP) { // Handle the message and send a reply T3 message. // NOTE: No need to acquire the mutex lock during initialization because only // one thread is running. - if (handle_T1_clock_sync_message(buffer, rti_socket_TCP, receive_time) != 0) { + if (handle_T1_clock_sync_message(buffer, *rti_socket_TCP, receive_time) != 0) { lf_print_error_and_exit("Initial clock sync: Failed to send T3 reply to RTI."); } // Next message from the RTI is required to be MSG_TYPE_CLOCK_SYNC_T4 - read_from_socket_errexit(rti_socket_TCP, message_size, buffer, + read_from_socket_fail_on_error(rti_socket_TCP, message_size, buffer, NULL, "Federate %d did not get the clock synchronization message T4 from the RTI.", _lf_my_fed_id); @@ -257,7 +240,7 @@ void synchronize_initial_physical_clock_with_rti(int rti_socket_TCP) { } // Handle the message. - handle_T4_clock_sync_message(buffer, rti_socket_TCP, receive_time); + handle_T4_clock_sync_message(buffer, *rti_socket_TCP, receive_time); } LF_PRINT_LOG("Finished initial clock synchronization with the RTI."); @@ -294,7 +277,7 @@ int handle_T1_clock_sync_message(unsigned char* buffer, int socket, instant_t t2 // Write the reply to the socket. LF_PRINT_DEBUG("Sending T3 message to RTI."); - if (write_to_socket(socket, 1 + sizeof(int), reply_buffer) != 1 + sizeof(int)) { + if (write_to_socket(socket, 1 + sizeof(int), reply_buffer)) { lf_print_error("Clock sync: Failed to send T3 message to RTI."); return -1; } @@ -361,12 +344,11 @@ void handle_T4_clock_sync_message(unsigned char* buffer, int socket, instant_t r if (socket == _lf_rti_socket_UDP) { // Read the coded probe message. // We can reuse the same buffer. - ssize_t bytes_read = read_from_socket(socket, 1 + sizeof(instant_t), buffer); + int read_failed = read_from_socket(socket, 1 + sizeof(instant_t), buffer); instant_t r5 = lf_time_physical(); - if ((bytes_read < 1 + (ssize_t)sizeof(instant_t)) - || buffer[0] != MSG_TYPE_CLOCK_SYNC_CODED_PROBE) { + if (read_failed || buffer[0] != MSG_TYPE_CLOCK_SYNC_CODED_PROBE) { lf_print_warning("Clock sync: Did not get the expected coded probe message from the RTI. " "Skipping clock synchronization round."); return; diff --git a/core/federated/federate.c b/core/federated/federate.c index d49cd20a46..cd9149e9ea 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1,68 +1,41 @@ /** * @file - * @author Edward A. Lee (eal@berkeley.edu) - * - * @section LICENSE -Copyright (c) 2020, The University of California at Berkeley. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - * @section DESCRIPTION - * Utility functions for a federate in a federated execution. - * The main entry point is synchronize_with_other_federates(). + * @author Soroush Bateni + * @author Peter Donovan + * @author Edward A. Lee + * @author Anirudh Rengarajsm + * @copyright (c) 2020-2023, The University of California at Berkeley. + * License: BSD 2-clause + * @brief Utility functions for a federate in a federated execution. */ #ifdef FEDERATED -#ifdef PLATFORM_ARDUINO -#error To be implemented. No support for federation on Arduino yet. -#else +#if !defined(PLATFORM_Linux) && !defined(PLATFORM_Darwin) +#error No support for federated execution on this platform. +#endif + #include // inet_ntop & inet_pton #include // Defines getaddrinfo(), freeaddrinfo() and struct addrinfo. #include // Defines struct sockaddr_in - -#include -#include // Defines bzero(). #include -#endif +#include // Defines read(), write(), and close() +#include // Defines memset(), strnlen(), strncmp(), strncpy() +#include // Defines strerror() #include #include // Defined perror(), errno -#include -#include // Defines sigaction. -#include -#include -#include -#include // Defines read(), write(), and close() +#include // Defines bzero(). #include "clock-sync.h" #include "federate.h" -#include "lf_types.h" #include "net_common.h" #include "net_util.h" -#include "platform.h" #include "reactor.h" #include "reactor_common.h" #include "reactor_threaded.h" #include "scheduler.h" #include "trace.h" + #ifdef FEDERATED_AUTHENTICATED #include // For secure random number generation. #include // For HMAC-based authentication of federates. @@ -72,28 +45,42 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern instant_t _lf_last_reported_unadjusted_physical_time_ns; extern instant_t start_time; -// Error messages. -char* ERROR_SENDING_HEADER = "ERROR sending header information to federate via RTI"; -char* ERROR_SENDING_MESSAGE = "ERROR sending message to federate via RTI"; +// Global variable defined in reactor_common.c: +extern bool _lf_termination_executed; -// Mutex lock held while performing socket write and close operations. -lf_mutex_t outbound_socket_mutex; -lf_cond_t port_status_changed; -lf_cond_t logical_time_changed; +// Global variables references in federate.h +lf_mutex_t lf_outbound_socket_mutex; +lf_cond_t lf_port_status_changed; +lf_cond_t lf_current_tag_changed; -// Variable to track how far in the reaction queue we can go until we need to wait for more network port statuses to be known. +/** + * The max level allowed to advance (MLAA) is a variable that tracks how far in the reaction + * queue we can go until we need to wait for more network port statuses to be known. + * Specifically, when an input port status is unknown at a tag (we don't know whether the upstream + * federate has sent or will send a message at that tag), then the downstream federate must + * pause before executing any reaction that depends on that port. A "level" is assigned to that + * port by the code generator based on the overall topology of the federation. Reactions that + * depend on the port have higher levels, whereas those with no dependence on that port have + * lower levels. The MLAA is a level at which the federate must block until the MLAA is + * incremented. It will be incremented as port statuses become known, and when all are known, + * it will become INT_MAX and all reactions will be unblocked. In decentralized execution, the + * MLAA is incremented by a background thread that monitors the local physical clock and + * increments the MLAA when it is safe to assume that the port is absent, if it has not already + * been incremented by the arrival of a message. In centralized execution, the MLAA is used + * only for ports that are involved in a zero-delay cycle (ZDC), and it is incremented when + * either a message or an absent message arrives. + */ int max_level_allowed_to_advance; /** - * The state of this federate instance. + * The state of this federate instance. Each executable has exactly one federate instance, + * and the _fed global variable refers to that instance. */ federate_instance_t _fed = { .socket_TCP_RTI = -1, .number_of_inbound_p2p_connections = 0, .inbound_socket_listeners = NULL, .number_of_outbound_p2p_connections = 0, - .sockets_for_inbound_p2p_connections = { -1 }, - .sockets_for_outbound_p2p_connections = { -1 }, .inbound_p2p_handling_thread_id = 0, .server_socket = -1, .server_port = -1, @@ -107,7 +94,6 @@ federate_instance_t _fed = { .min_delay_from_physical_action_to_federate_output = NEVER }; - federation_metadata_t federation_metadata = { .federation_id = "Unidentified Federation", .rti_host = NULL, @@ -115,1048 +101,874 @@ federation_metadata_t federation_metadata = { .rti_user = NULL }; +////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////// +// Static functions (used only internally) /** - * Create a server to listen to incoming physical - * connections from remote federates. This function - * only handles the creation of the server socket. - * The reserved port for the server socket is then - * sent to the RTI by sending an MSG_TYPE_ADDRESS_ADVERTISEMENT message - * (@see net_common.h). This function expects no response - * from the RTI. - * - * If a port is specified by the user, that will be used - * as the only possibility for the server. This function - * will fail if that port is not available. If a port is not - * specified, the STARTING_PORT (@see net_common.h) will be used. - * The function will keep incrementing the port in this case - * until the number of tries reaches PORT_RANGE_LIMIT. - * - * @note This function is similar to create_server(...) in rti.c. - * However, it contains specific log messages for the peer to - * peer connections between federates. It also additionally - * sends an address advertisement (MSG_TYPE_ADDRESS_ADVERTISEMENT) message to the - * RTI informing it of the port. - * - * @param specified_port The specified port by the user. - */ -void create_server(int specified_port) { - if (specified_port > UINT16_MAX || - specified_port < 0) { - lf_print_error( - "create_server(): The specified port (%d) is out of range." - " Starting with %d instead.", - specified_port, - STARTING_PORT - ); - specified_port = 0; - } - uint16_t port = (uint16_t)specified_port; - if (specified_port == 0) { - // Use the default starting port. - port = STARTING_PORT; - } - LF_PRINT_DEBUG("Creating a socket server on port %d.", port); - // Create an IPv4 socket for TCP (not UDP) communication over IP (0). - int socket_descriptor = create_real_time_tcp_socket_errexit(); - - // Server file descriptor. - struct sockaddr_in server_fd; - // Zero out the server address structure. - bzero((char*)&server_fd, sizeof(server_fd)); - - server_fd.sin_family = AF_INET; // IPv4 - server_fd.sin_addr.s_addr = INADDR_ANY; // All interfaces, 0.0.0.0. - // Convert the port number from host byte order to network byte order. - server_fd.sin_port = htons(port); - - int result = bind( - socket_descriptor, - (struct sockaddr *) &server_fd, - sizeof(server_fd)); - // If the binding fails with this port and no particular port was specified - // in the LF program, then try the next few ports in sequence. - while (result != 0 - && specified_port == 0 - && port >= STARTING_PORT - && port <= STARTING_PORT + PORT_RANGE_LIMIT) { - LF_PRINT_DEBUG("Failed to get port %d. Trying %d.", port, port + 1); - port++; - server_fd.sin_port = htons(port); - result = bind( - socket_descriptor, - (struct sockaddr *) &server_fd, - sizeof(server_fd)); - } - if (result != 0) { - if (specified_port == 0) { - lf_print_error_and_exit("Failed to bind socket. Cannot find a usable port. \ - Consider increasing PORT_RANGE_LIMIT in federate.c"); - } else { - lf_print_error_and_exit("Failed to bind socket. Specified port is not available. \ - Consider leaving the port unspecified"); - } - } - LF_PRINT_LOG("Server for communicating with other federates started using port %d.", port); - - // Enable listening for socket connections. - // The second argument is the maximum number of queued socket requests, - // which according to the Mac man page is limited to 128. - listen(socket_descriptor, 128); - - // Set the global server port - _fed.server_port = port; - - // Send the server port number to the RTI - // on an MSG_TYPE_ADDRESS_ADVERTISEMENT message (@see net_common.h). - unsigned char buffer[sizeof(int32_t) + 1]; - buffer[0] = MSG_TYPE_ADDRESS_ADVERTISEMENT; - encode_int32(_fed.server_port, &(buffer[1])); - // Trace the event when tracing is enabled - tracepoint_federate_to_rti(_fed.trace, send_ADR_AD, _lf_my_fed_id, NULL); - write_to_socket_errexit(_fed.socket_TCP_RTI, sizeof(int32_t) + 1, (unsigned char*)buffer, - "Failed to send address advertisement."); - LF_PRINT_DEBUG("Sent port %d to the RTI.", _fed.server_port); - - // Set the global server socket - _fed.server_socket = socket_descriptor; -} - -/** - * Send a message to another federate directly or via the RTI. - * This method assumes that the caller does not hold the outbound_socket_mutex lock, - * which it acquires to perform the send. - * - * If the socket connection to the remote federate or the RTI has been broken, - * then this returns 0 without sending. Otherwise, it returns 1. - * - * @note This function is similar to send_timed_message() except that it - * does not deal with time and timed_messages. - * - * @param message_type The type of the message being sent. - * Currently can be MSG_TYPE_MESSAGE for messages sent via - * RTI or MSG_TYPE_P2P_MESSAGE for messages sent between - * federates. - * @param port The ID of the destination port. - * @param federate The ID of the destination federate. - * @param next_destination_str The name of the next destination in string format - * @param length The message length. - * @param message The message. - * @return 1 if the message has been sent, 0 otherwise. - * FIXME: Currently, federates can send untimed messages to RTI, but there is no - * handling mechanism of MSG_TYPE_MESSAGE at the RTI side. - * Is it really needed? Or should the RTI be updated? - */ -int send_message(int message_type, - unsigned short port, - unsigned short federate, - const char* next_destination_str, - size_t length, - unsigned char* message) { - unsigned char header_buffer[1 + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t)]; - // First byte identifies this as a timed message. - if (message_type != MSG_TYPE_MESSAGE && - message_type != MSG_TYPE_P2P_MESSAGE - ) { - lf_print_error( - "send_message() was called with an invalid message type (%d).", - message_type - ); - return 0; - } - header_buffer[0] = (unsigned char)message_type; - // Next two bytes identify the destination port. - // NOTE: Send messages little endian, not big endian. - encode_uint16(port, &(header_buffer[1])); - - // Next two bytes identify the destination federate. - encode_uint16(federate, &(header_buffer[1 + sizeof(uint16_t)])); - - // The next four bytes are the message length. - encode_int32((int32_t)length, &(header_buffer[1 + sizeof(uint16_t) + sizeof(uint16_t)])); - - LF_PRINT_LOG("Sending untimed message to %s.", next_destination_str); - - // Header: message_type + port_id + federate_id + length of message + timestamp + microstep - const int header_length = 1 + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t); - // Use a mutex lock to prevent multiple threads from simultaneously sending. - lf_mutex_lock(&outbound_socket_mutex); - // First, check that the socket is still connected. This must done - // while holding the mutex lock. - int socket = -1; - if (message_type == MSG_TYPE_P2P_MESSAGE) { - socket = _fed.sockets_for_outbound_p2p_connections[federate]; - } else { - socket = _fed.socket_TCP_RTI; - } - if (socket < 0) { - lf_print_warning("Socket is no longer connected. Dropping message."); - lf_mutex_unlock(&outbound_socket_mutex); - return 0; - } - // Trace the event when tracing is enabled - if (message_type == MSG_TYPE_P2P_MESSAGE) { - tracepoint_federate_to_federate(_fed.trace, send_P2P_MSG, _lf_my_fed_id, federate, NULL); - } else { // message_type == MSG_TYPE_MESSAGE) - tracepoint_federate_to_rti(_fed.trace, send_MSG, _lf_my_fed_id, NULL); - } - write_to_socket_with_mutex(socket, header_length, header_buffer, &outbound_socket_mutex, - "Failed to send message header to to %s.", next_destination_str); - write_to_socket_with_mutex(socket, length, message, &outbound_socket_mutex, - "Failed to send message body to to %s.", next_destination_str); - lf_mutex_unlock(&outbound_socket_mutex); - return 1; -} - -/** - * Send the specified timestamped message to the specified port in the - * specified federate via the RTI or directly to a federate depending on - * the given socket. The timestamp is calculated as current_logical_time + - * additional delay which is greater than or equal to zero. - * The port should be an input port of a reactor in - * the destination federate. This version does include the timestamp - * in the message. The caller can reuse or free the memory after this returns. - * - * If the socket connection to the remote federate or the RTI has been broken, - * then this returns 0 without sending. Otherwise, it returns 1. - * - * This method assumes that the caller does not hold the outbound_socket_mutex lock, - * which it acquires to perform the send. - * - * @note This function is similar to send_message() except that it - * sends timed messages and also contains logics related to time. - * - * @param env The environment of the federate - * @param additional_delay The offset applied to the timestamp - * using after. The additional delay will be greater or equal to zero - * if an after is used on the connection. If no after is given in the - * program, -1 is passed. - * @param message_type The type of the message being sent. - * Currently can be MSG_TYPE_TAGGED_MESSAGE for messages sent via - * RTI or MSG_TYPE_P2P_TAGGED_MESSAGE for messages sent between - * federates. - * @param port The ID of the destination port. - * @param federate The ID of the destination federate. - * @param next_destination_str The next destination in string format (RTI or federate) - * (used for reporting errors). - * @param length The message length. - * @param message The message. - * @return 1 if the message has been sent, 0 otherwise. - */ -int send_timed_message(environment_t* env, - interval_t additional_delay, - int message_type, - unsigned short port, - unsigned short federate, - const char* next_destination_str, - size_t length, - unsigned char* message) { - assert(env != GLOBAL_ENVIRONMENT); - - unsigned char header_buffer[1 + sizeof(uint16_t) + sizeof(uint16_t) - + sizeof(int32_t) + sizeof(instant_t) + sizeof(microstep_t)]; - // First byte identifies this as a timed message. - if (message_type != MSG_TYPE_TAGGED_MESSAGE && - message_type != MSG_TYPE_P2P_TAGGED_MESSAGE - ) { - lf_print_error( - "send_message() was called with an invalid message type (%d).", - message_type - ); - return 0; - } - size_t buffer_head = 0; - header_buffer[buffer_head] = (unsigned char)message_type; - buffer_head += sizeof(unsigned char); - // Next two bytes identify the destination port. - // NOTE: Send messages little endian, not big endian. - encode_uint16(port, &(header_buffer[buffer_head])); - buffer_head += sizeof(uint16_t); - - // Next two bytes identify the destination federate. - encode_uint16(federate, &(header_buffer[buffer_head])); - buffer_head += sizeof(uint16_t); - - // The next four bytes are the message length. - encode_int32((int32_t)length, &(header_buffer[buffer_head])); - buffer_head += sizeof(int32_t); - - // Apply the additional delay to the current tag and use that as the intended - // tag of the outgoing message - tag_t current_message_intended_tag = lf_delay_tag(env->current_tag, - additional_delay); - - // Next 8 + 4 will be the tag (timestamp, microstep) - encode_tag( - &(header_buffer[buffer_head]), - current_message_intended_tag - ); - buffer_head += sizeof(int64_t) + sizeof(uint32_t); - - LF_PRINT_LOG("Sending message with tag " PRINTF_TAG " to %s.", - current_message_intended_tag.time - start_time, current_message_intended_tag.microstep, next_destination_str); - - // Header: message_type + port_id + federate_id + length of message + timestamp + microstep - size_t header_length = buffer_head; - - if (_lf_is_tag_after_stop_tag(env, current_message_intended_tag)) { - // Message tag is past the timeout time (the stop time) so it should - // not be sent. - return 0; - } - - // Use a mutex lock to prevent multiple threads from simultaneously sending. - lf_mutex_lock(&outbound_socket_mutex); - // First, check that the socket is still connected. This must done - // while holding the mutex lock. - int socket = -1; - if (message_type == MSG_TYPE_P2P_TAGGED_MESSAGE) { - socket = _fed.sockets_for_outbound_p2p_connections[federate]; - } else { - socket = _fed.socket_TCP_RTI; - } - if (socket < 0) { - lf_print_warning("Socket is no longer connected. Dropping message."); - lf_mutex_unlock(&outbound_socket_mutex); - return 0; - } - // Trace the event when tracing is enabled - if (message_type == MSG_TYPE_TAGGED_MESSAGE) { - tracepoint_federate_to_rti(_fed.trace, send_TAGGED_MSG, _lf_my_fed_id, ¤t_message_intended_tag); - } else { // message_type == MSG_TYPE_P2P_TAGGED_MESSAGE - tracepoint_federate_to_federate(_fed.trace, send_P2P_TAGGED_MSG, _lf_my_fed_id, federate, ¤t_message_intended_tag); - } - write_to_socket_with_mutex(socket, header_length, header_buffer, &outbound_socket_mutex, - "Failed to send timed message header to %s.", next_destination_str); - write_to_socket_with_mutex(socket, length, message, &outbound_socket_mutex, - "Failed to send timed message body to %s.", next_destination_str); - lf_mutex_unlock(&outbound_socket_mutex); - return 1; -} - -/** - * Send a time to the RTI. - * This is not synchronized. - * It assumes the caller is. + * Send a time to the RTI. This acquires the lf_outbound_socket_mutex. * @param type The message type (MSG_TYPE_TIMESTAMP). * @param time The time. - * @param exit_on_error If set to true, exit the program if sending 'time' fails. - * Print a soft error message otherwise */ -void _lf_send_time(unsigned char type, instant_t time, bool exit_on_error) { +static void send_time(unsigned char type, instant_t time) { LF_PRINT_DEBUG("Sending time " PRINTF_TIME " to the RTI.", time); size_t bytes_to_write = 1 + sizeof(instant_t); unsigned char buffer[bytes_to_write]; buffer[0] = type; encode_int64(time, &(buffer[1])); - lf_mutex_lock(&outbound_socket_mutex); - if (_fed.socket_TCP_RTI < 0) { - lf_print_warning("Socket is no longer connected. Dropping message."); - lf_mutex_unlock(&outbound_socket_mutex); - return; - } - tag_t tag = {.time = time, .microstep = 0}; // Trace the event when tracing is enabled + tag_t tag = {.time = time, .microstep = 0}; tracepoint_federate_to_rti(_fed.trace, send_TIMESTAMP, _lf_my_fed_id, &tag); - ssize_t bytes_written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, buffer); - if (bytes_written < (ssize_t)bytes_to_write) { - if (!exit_on_error) { - lf_print_error("Failed to send time " PRINTF_TIME " to the RTI." - " Error code %d: %s", - time - start_time, - errno, - strerror(errno) - ); - - } else if (errno == ENOTCONN) { - // FIXME: Shutdown is probably not working properly because the socket gets disconnected. - lf_print_error("Socket to the RTI is no longer connected. Considering this a soft error."); - } else { - lf_print_error_and_exit("Failed to send time " PRINTF_TIME " to the RTI." - " Error code %d: %s", - time - start_time, - errno, - strerror(errno) - ); - } - } - lf_mutex_unlock(&outbound_socket_mutex); + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + write_to_socket_fail_on_error(&_fed.socket_TCP_RTI, bytes_to_write, buffer, &lf_outbound_socket_mutex, + "Failed to send time " PRINTF_TIME " to the RTI.", time - start_time); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); } /** * Send a tag to the RTI. - * This is not synchronized. - * It assumes the caller is. - * @param type The message type (MSG_TYPE_NEXT_EVENT_TAG or MSG_TYPE_LOGICAL_TAG_COMPLETE). + * This function acquires the lf_outbound_socket_mutex. + * @param type The message type (MSG_TYPE_NEXT_EVENT_TAG or MSG_TYPE_LATEST_TAG_COMPLETE). * @param tag The tag. - * @param exit_on_error If set to true, exit the program if sending 'tag' fails. - * Print a soft error message otherwise */ -void _lf_send_tag(unsigned char type, tag_t tag, bool exit_on_error) { +static void send_tag(unsigned char type, tag_t tag) { LF_PRINT_DEBUG("Sending tag " PRINTF_TAG " to the RTI.", tag.time - start_time, tag.microstep); size_t bytes_to_write = 1 + sizeof(instant_t) + sizeof(microstep_t); unsigned char buffer[bytes_to_write]; buffer[0] = type; encode_tag(&(buffer[1]), tag); - lf_mutex_lock(&outbound_socket_mutex); + LF_MUTEX_LOCK(lf_outbound_socket_mutex); if (_fed.socket_TCP_RTI < 0) { lf_print_warning("Socket is no longer connected. Dropping message."); - lf_mutex_unlock(&outbound_socket_mutex); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); return; } trace_event_t event_type = (type == MSG_TYPE_NEXT_EVENT_TAG) ? send_NET : send_LTC; // Trace the event when tracing is enabled tracepoint_federate_to_rti(_fed.trace, event_type, _lf_my_fed_id, &tag); - ssize_t bytes_written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, buffer); - if (bytes_written < (ssize_t)bytes_to_write) { - if (!exit_on_error) { - lf_print_error("Failed to send tag " PRINTF_TAG " to the RTI." - " Error code %d: %s", - tag.time - start_time, - tag.microstep, - errno, - strerror(errno) - ); - return; - } else if (errno == ENOTCONN) { - lf_print_error("Socket to the RTI is no longer connected. Considering this a soft error."); - return; - } else { - lf_mutex_unlock(&outbound_socket_mutex); - lf_print_error_and_exit("Failed to send tag " PRINTF_TAG " to the RTI." - " Error code %d: %s", - tag.time - start_time, - tag.microstep, - errno, - strerror(errno) - ); - } - } - lf_mutex_unlock(&outbound_socket_mutex); + write_to_socket_fail_on_error( + &_fed.socket_TCP_RTI, bytes_to_write, buffer, &lf_outbound_socket_mutex, + "Failed to send tag " PRINTF_TAG " to the RTI.", tag.time - start_time, tag.microstep); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); } /** - * Thread to accept connections from other federates that send this federate - * messages directly (not through the RTI). This thread starts a thread for - * each accepted socket connection and, once it has opened all expected - * sockets, exits. - * @param env_arg pointer to the environment of this federate. + * Return true if either the socket to the RTI is broken or the socket is + * alive and the first unread byte on the socket's queue is MSG_TYPE_FAILED. */ +static bool rti_failed() { + unsigned char first_byte; + ssize_t bytes = peek_from_socket(_fed.socket_TCP_RTI, &first_byte); + if (bytes < 0 || (bytes == 1 && first_byte == MSG_TYPE_FAILED)) return true; + else return false; +} -void* handle_p2p_connections_from_federates(void* env_arg) { - assert(env_arg); - environment_t* env = (environment_t *) env_arg; - int received_federates = 0; - // Allocate memory to store thread IDs. - _fed.inbound_socket_listeners = (lf_thread_t*)calloc(_fed.number_of_inbound_p2p_connections, sizeof(lf_thread_t)); - while (received_federates < _fed.number_of_inbound_p2p_connections) { - // Wait for an incoming connection request. - struct sockaddr client_fd; - uint32_t client_length = sizeof(client_fd); - int socket_id = accept(_fed.server_socket, &client_fd, &client_length); - // FIXME: Error handling here is too harsh maybe? - if (socket_id < 0 && errno != EAGAIN && errno != EWOULDBLOCK) { - lf_print_error("A fatal error occurred while accepting a new socket. " - "Federate will not accept connections anymore."); - return NULL; - } - LF_PRINT_LOG("Accepted new connection from remote federate."); +//////////////////////////////// Port Status Handling /////////////////////////////////////// - size_t header_length = 1 + sizeof(uint16_t) + 1; - unsigned char buffer[header_length]; - ssize_t bytes_read = read_from_socket(socket_id, header_length, (unsigned char*)&buffer); - if (bytes_read != (ssize_t)header_length || buffer[0] != MSG_TYPE_P2P_SENDING_FED_ID) { - lf_print_warning("Federate received invalid first message on P2P socket. Closing socket."); - if (bytes_read >= 0) { - unsigned char response[2]; - response[0] = MSG_TYPE_REJECT; - response[1] = WRONG_SERVER; - // Trace the event when tracing is enabled - tracepoint_federate_to_federate(_fed.trace, send_REJECT, _lf_my_fed_id, -3, NULL); - // Ignore errors on this response. - write_to_socket(socket_id, 2, response); - } - close(socket_id); - continue; - } +extern lf_action_base_t* _lf_action_table[]; +extern interval_t _lf_action_delay_table[]; +extern size_t _lf_action_table_size; +extern lf_action_base_t* _lf_zero_delay_cycle_action_table[]; +extern size_t _lf_zero_delay_cycle_action_table_size; +extern reaction_t* network_input_reactions[]; +extern size_t num_network_input_reactions; +extern reaction_t* port_absent_reaction[]; +extern size_t num_port_absent_reactions; +#ifdef FEDERATED_DECENTRALIZED +extern staa_t* staa_lst[]; +extern size_t staa_lst_size; +#endif - // Get the federation ID and check it. - unsigned char federation_id_length = buffer[header_length - 1]; - char remote_federation_id[federation_id_length]; - bytes_read = read_from_socket(socket_id, federation_id_length, (unsigned char*)remote_federation_id); - if (bytes_read != federation_id_length - || (strncmp(federation_metadata.federation_id, remote_federation_id, strnlen(federation_metadata.federation_id, 255)) != 0)) { - lf_print_warning("Received invalid federation ID. Closing socket."); - if (bytes_read >= 0) { - unsigned char response[2]; - response[0] = MSG_TYPE_REJECT; - response[1] = FEDERATION_ID_DOES_NOT_MATCH; - // Trace the event when tracing is enabled - tracepoint_federate_to_federate(_fed.trace, send_REJECT, _lf_my_fed_id, -3, NULL); - // Ignore errors on this response. - write_to_socket(socket_id, 2, response); - } - close(socket_id); - continue; - } +/** + * Return a pointer to the action struct for the action + * corresponding to the specified port ID. + * @param port_id The port ID. + * @return A pointer to an action struct or null if the ID is out of range. + */ +static lf_action_base_t* action_for_port(int port_id) { + if (port_id >= 0 && port_id < _lf_action_table_size) { + return _lf_action_table[port_id]; + } + lf_print_error_and_exit("Invalid port ID: %d", port_id); + return NULL; +} - // Extract the ID of the sending federate. - uint16_t remote_fed_id = extract_uint16((unsigned char*)&(buffer[1])); - LF_PRINT_DEBUG("Received sending federate ID %d.", remote_fed_id); - - // Trace the event when tracing is enabled - tracepoint_federate_to_federate(_fed.trace, receive_FED_ID, _lf_my_fed_id, remote_fed_id, NULL); - - // Once we record the socket_id here, all future calls to close() on - // the socket should be done while holding a mutex, and this array - // element should be reset to -1 during that critical section. - // Otherwise, there can be race condition where, during termination, - // two threads attempt to simultaneously access the socket. - _fed.sockets_for_inbound_p2p_connections[remote_fed_id] = socket_id; - - // Send an MSG_TYPE_ACK message. - unsigned char response = MSG_TYPE_ACK; - // Trace the event when tracing is enabled - tracepoint_federate_to_federate(_fed.trace, send_ACK, _lf_my_fed_id, remote_fed_id, NULL); - write_to_socket_errexit(socket_id, 1, (unsigned char*)&response, - "Failed to write MSG_TYPE_ACK in response to federate %d.", - remote_fed_id); - - // Start a thread to listen for incoming messages from other federates. - // The fed_id is a uint16_t, which we assume can be safely cast to and from void*. - void* fed_id_arg = (void*)(uintptr_t)remote_fed_id; - int result = lf_thread_create( - &_fed.inbound_socket_listeners[received_federates], - listen_to_federates, - fed_id_arg); - if (result != 0) { - // Failed to create a listening thread. - close(socket_id); - _fed.sockets_for_inbound_p2p_connections[remote_fed_id] = -1; - lf_print_error_and_exit( - "Failed to create a thread to listen for incoming physical connection. Error code: %d.", - result +/** + * Update the last known status tag of all network input ports + * to the value of `tag`, unless that the provided `tag` is less + * than the last_known_status_tag of the port. This is called when + * a TAG signal is received from the RTI in centralized coordination. + * If any update occurs, then this broadcasts on `lf_port_status_changed`. + * + * This assumes the caller holds the mutex. + * + * @param tag The tag on which the latest status of all network input + * ports is known. + */ +static void update_last_known_status_on_input_ports(tag_t tag) { + LF_PRINT_DEBUG("In update_last_known_status_on_input ports."); + bool notify = false; + for (int i = 0; i < _lf_action_table_size; i++) { + lf_action_base_t* input_port_action = _lf_action_table[i]; + // This is called when a TAG is received. + // But it is possible for an input port to have received already + // a message with a larger tag (if there is an after delay on the + // connection), in which case, the last known status tag of the port + // is in the future and should not be rolled back. So in that case, + // we do not update the last known status tag. + if (lf_tag_compare(tag, + input_port_action->trigger->last_known_status_tag) >= 0) { + LF_PRINT_DEBUG( + "Updating the last known status tag of port %d from " PRINTF_TAG " to " PRINTF_TAG ".", + i, + input_port_action->trigger->last_known_status_tag.time - lf_time_start(), + input_port_action->trigger->last_known_status_tag.microstep, + tag.time - lf_time_start(), + tag.microstep ); + input_port_action->trigger->last_known_status_tag = tag; + notify = true; } - - received_federates++; } - - LF_PRINT_LOG("All remote federates are connected."); - return NULL; + // FIXME: We could put a condition variable into the trigger_t + // struct for each network input port, in which case this won't + // be a broadcast but rather a targetted signal. + if (notify && lf_update_max_level(tag, false)) { + // Notify network input reactions + lf_cond_broadcast(&lf_port_status_changed); + } } /** - * Close the socket that sends outgoing messages to the - * specified federate ID. This function assumes the caller holds - * the outbound_socket_mutex mutex lock. - * @param fed_id The ID of the peer federate receiving messages from this - * federate, or -1 if the RTI (centralized coordination). + * @brief Update the last known status tag of a network input port. + * + * First, if the specified tag is less than the current_tag of the top-level + * environment, then ignore the specified tag and use the current_tag. This + * situation can arise if a message has arrived late (an STP violation has occurred). + * + * If the specified tag is greater than the previous last_known_status_tag + * of the port, then update the last_known_status_tag to the new tag. + * + * If the tag is equal to the previous last_known_status_tag, then + * increment the microstep of the last_known_status_tag. This situation can + * occur if a sequence of late messages (STP violations) are occurring all at + * once during an execution of a logical tag. + * + * This function is called when a message or absent message arrives. For decentralized + * coordination, it is also called by the background thread update_ports_from_staa_offsets + * which uses physical time to determine when an input port can be assumed to be absent + * if a message has not been received. + * + * This function assumes the caller holds the mutex on the top-level environment, + * and, if the tag actually increases, it broadcasts on `lf_port_status_changed`. + * + * @param env The top-level environment, whose mutex is assumed to be held. + * @param tag The tag on which the latest status of the specified network input port is known. + * @param portID The port ID. */ -void _lf_close_outbound_socket(int fed_id) { - assert (fed_id >= 0 && fed_id < NUMBER_OF_FEDERATES); - if (_fed.sockets_for_outbound_p2p_connections[fed_id] >= 0) { - shutdown(_fed.sockets_for_outbound_p2p_connections[fed_id], SHUT_RDWR); - close(_fed.sockets_for_outbound_p2p_connections[fed_id]); - _fed.sockets_for_outbound_p2p_connections[fed_id] = -1; +static void update_last_known_status_on_input_port(environment_t* env, tag_t tag, int port_id) { + if (lf_tag_compare(tag, env->current_tag) < 0) tag = env->current_tag; + trigger_t* input_port_action = action_for_port(port_id)->trigger; + int comparison = lf_tag_compare(tag, input_port_action->last_known_status_tag); + if (comparison == 0) tag.microstep++; + if (comparison >= 0) { + LF_PRINT_LOG( + "Updating the last known status tag of port %d from " PRINTF_TAG " to " PRINTF_TAG ".", + port_id, + input_port_action->last_known_status_tag.time - lf_time_start(), + input_port_action->last_known_status_tag.microstep, + tag.time - lf_time_start(), + tag.microstep + ); + input_port_action->last_known_status_tag = tag; + + // Check whether this port update implies a change to MLAA, which may unblock reactions. + // For decentralized coordination, the first argument is NEVER, so it has no effect. + // For centralized, the arguments probably also have no effect, but the port update may. + // Note that it would not be correct to pass `tag` as the first argument because + // there is no guarantee that there is either a TAG or a PTAG for this time. + // The message that triggered this to be called could be from an upstream + // federate that is far ahead of other upstream federates in logical time. + lf_update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); + lf_cond_broadcast(&lf_port_status_changed); + } else { + // Message arrivals should be monotonic, so this should not occur. + lf_print_warning("Attempt to update the last known status tag " + "of network input port %d to an earlier tag was ignored.", port_id); } } /** - * For each incoming message socket, we create this thread that listens - * for upstream messages. Currently, the only possible upstream message - * is MSG_TYPE_CLOSE_REQUEST. If this thread receives that message, then closes - * the socket. The idea here is that a peer-to-peer socket connection - * is always closed from the sending end, never from the receiving end. - * This way, any sends in progress complete before the socket is actually - * closed. + * Set the status of network port with id portID. + * + * @param portID The network port ID + * @param status The network port status (port_status_t) */ -void* listen_for_upstream_messages_from_downstream_federates(void* fed_id_ptr) { - uint16_t fed_id = *((uint16_t*)fed_id_ptr); - unsigned char message; - - lf_mutex_lock(&outbound_socket_mutex); - while(_fed.sockets_for_outbound_p2p_connections[fed_id] >= 0) { - // Unlock the mutex before performing a blocking read. - // Note that there is a race condition here, but the read will return - // a failure if the socket gets closed. - lf_mutex_unlock(&outbound_socket_mutex); - - LF_PRINT_DEBUG("Thread listening for MSG_TYPE_CLOSE_REQUEST from federate %d", fed_id); - ssize_t bytes_read = read_from_socket( - _fed.sockets_for_outbound_p2p_connections[fed_id], 1, &message); - // Reacquire the mutex lock before closing or reading the socket again. - lf_mutex_lock(&outbound_socket_mutex); - - if (bytes_read == 1 && message == MSG_TYPE_CLOSE_REQUEST) { - // Received a request to close the socket. - LF_PRINT_DEBUG("Received MSG_TYPE_CLOSE_REQUEST from federate %d.", fed_id); - // Trace the event when tracing is enabled - tracepoint_federate_from_federate(_fed.trace, receive_CLOSE_RQ, _lf_my_fed_id, fed_id, NULL); - _lf_close_outbound_socket(fed_id); - break; - } - if (bytes_read == 0) { - // EOF. - LF_PRINT_DEBUG("Received EOF from federate %d.", fed_id); - _lf_close_outbound_socket(fed_id); - break; - } - if (bytes_read < 0) { - // EOF. - LF_PRINT_DEBUG("Error on socket from federate %d.", fed_id); - _lf_close_outbound_socket(fed_id); - break; - } - } - lf_mutex_unlock(&outbound_socket_mutex); - return NULL; +static void set_network_port_status(int portID, port_status_t status) { + lf_action_base_t* network_input_port_action = action_for_port(portID); + network_input_port_action->trigger->status = status; } /** - * Connect to the federate with the specified id. This established - * connection will then be used in functions such as send_timed_message() - * to send messages directly to the specified federate. - * This function first sends an MSG_TYPE_ADDRESS_QUERY message to the RTI to obtain - * the IP address and port number of the specified federate. It then attempts - * to establish a socket connection to the specified federate. - * If this fails, the program exits. If it succeeds, it sets element [id] of - * the _fed.sockets_for_outbound_p2p_connections global array to - * refer to the socket for communicating directly with the federate. - * @param remote_federate_id The ID of the remote federate. + * Version of schedule_value() similar to that in reactor_common.c + * except that it does not acquire the mutex lock and has a special + * behavior during startup where it can inject reactions to the reaction + * queue if execution has not started yet. + * It is also responsible for setting the intended tag of the + * network message based on the calculated delay. + * This function assumes that the caller holds the mutex lock. + * + * This is used for handling incoming timed messages to a federate. + * + * @param env The environment of the federate + * @param action The action or timer to be triggered. + * @param tag The tag of the message received over the network. + * @param value Dynamically allocated memory containing the value to send. + * @param length The length of the array, if it is an array, or 1 for a + * scalar and 0 for no payload. + * @return A handle to the event, or 0 if no event was scheduled, or -1 for error. */ -void connect_to_federate(uint16_t remote_federate_id) { - int result = -1; - int count_retries = 0; - - // Ask the RTI for port number of the remote federate. - // The buffer is used for both sending and receiving replies. - // The size is what is needed for receiving replies. - unsigned char buffer[sizeof(int32_t) + INET_ADDRSTRLEN]; - int port = -1; - struct in_addr host_ip_addr; - int count_tries = 0; - while (port == -1) { - buffer[0] = MSG_TYPE_ADDRESS_QUERY; - // NOTE: Sending messages in little endian. - encode_uint16(remote_federate_id, &(buffer[1])); - - LF_PRINT_DEBUG("Sending address query for federate %d.", remote_federate_id); - // Trace the event when tracing is enabled - tracepoint_federate_to_rti(_fed.trace, send_ADR_QR, _lf_my_fed_id, NULL); - write_to_socket_errexit(_fed.socket_TCP_RTI, sizeof(uint16_t) + 1, buffer, - "Failed to send address query for federate %d to RTI.", - remote_federate_id); +static trigger_handle_t schedule_message_received_from_network_locked( + environment_t* env, + trigger_t* trigger, + tag_t tag, + lf_token_t* token) { + assert(env != GLOBAL_ENVIRONMENT); - // Read RTI's response. - read_from_socket_errexit(_fed.socket_TCP_RTI, sizeof(int32_t), buffer, - "Failed to read the requested port number for federate %d from RTI.", - remote_federate_id); + // Return value of the function + trigger_handle_t return_value = 0; - port = extract_int32(buffer); + // Indicates whether or not the intended tag + // of the message (timestamp, microstep) is + // in the future relative to the tag of this + // federate. By default, assume it is not. + bool message_tag_is_in_the_future = lf_tag_compare(tag, env->current_tag) > 0; + // Assign the intended tag temporarily to restore later. + tag_t previous_intended_tag = trigger->intended_tag; + trigger->intended_tag = tag; - read_from_socket_errexit(_fed.socket_TCP_RTI, sizeof(host_ip_addr), (unsigned char*)&host_ip_addr, - "Failed to read the IP address for federate %d from RTI.", - remote_federate_id); + // Calculate the extra_delay required to be passed + // to the schedule function. + interval_t extra_delay = tag.time - env->current_tag.time; + if (!message_tag_is_in_the_future && env->execution_started) { +#ifdef FEDERATED_CENTRALIZED + // If the coordination is centralized, receiving a message + // that does not carry a timestamp that is in the future + // would indicate a critical condition, showing that the + // time advance mechanism is not working correctly. + LF_MUTEX_UNLOCK(env->mutex); + lf_print_error_and_exit( + "Received a message at tag " PRINTF_TAG " that has a tag " PRINTF_TAG + " that has violated the STP offset. " + "Centralized coordination should not have these types of messages.", + env->current_tag.time - start_time, env->current_tag.microstep, + tag.time - start_time, tag.microstep); +#else + // Set the delay back to 0 + extra_delay = 0LL; + LF_PRINT_LOG("Calling schedule with 0 delay and intended tag " PRINTF_TAG ".", + trigger->intended_tag.time - start_time, + trigger->intended_tag.microstep); + return_value = _lf_schedule(env, trigger, extra_delay, token); +#endif + } else { + // In case the message is in the future, call + // _lf_schedule_at_tag() so that the microstep is respected. + LF_PRINT_LOG("Received a message that is (" PRINTF_TIME " nanoseconds, " PRINTF_MICROSTEP " microsteps) " + "in the future.", extra_delay, tag.microstep - env->current_tag.microstep); + return_value = _lf_schedule_at_tag(env, trigger, tag, token); + } + trigger->intended_tag = previous_intended_tag; + // Notify the main thread in case it is waiting for physical time to elapse. + LF_PRINT_DEBUG("Broadcasting notification that event queue changed."); + lf_cond_broadcast(&env->event_q_changed); + return return_value; +} - // A reply of -1 for the port means that the RTI does not know - // the port number of the remote federate, presumably because the - // remote federate has not yet sent an MSG_TYPE_ADDRESS_ADVERTISEMENT message to the RTI. - // Sleep for some time before retrying. - if (port == -1) { - if (count_tries++ >= CONNECT_NUM_RETRIES) { - lf_print_error_and_exit("TIMEOUT obtaining IP/port for federate %d from the RTI.", - remote_federate_id); - } - // Wait ADDRESS_QUERY_RETRY_INTERVAL nanoseconds. - if (lf_sleep(ADDRESS_QUERY_RETRY_INTERVAL) != 0) { - // Sleep was interrupted. - continue; +/** + * Close the socket that receives incoming messages from the + * specified federate ID. This function should be called when a read + * of incoming socket fails or when an EOF is received. + * It can also be called when the receiving end wants to stop communication, + * in which case, flag should be 1. + * + * @param fed_id The ID of the peer federate sending messages to this + * federate. + * @param flag 0 if an EOF was received, -1 if a socket error occurred, 1 otherwise. + */ +static void close_inbound_socket(int fed_id, int flag) { + LF_MUTEX_LOCK(socket_mutex); + if (_fed.sockets_for_inbound_p2p_connections[fed_id] >= 0) { + if (flag >= 0) { + if (flag > 0) { + shutdown(_fed.sockets_for_inbound_p2p_connections[fed_id], SHUT_RDWR); + } else { + // Have received EOF from the other end. Send EOF to the other end. + shutdown(_fed.sockets_for_inbound_p2p_connections[fed_id], SHUT_WR); } } + close(_fed.sockets_for_inbound_p2p_connections[fed_id]); + _fed.sockets_for_inbound_p2p_connections[fed_id] = -1; } - assert(port < 65536); - assert(port > 0); - uint16_t uport = (uint16_t)port; + LF_MUTEX_UNLOCK(socket_mutex); +} -#if LOG_LEVEL > 3 - // Print the received IP address in a human readable format - // Create the human readable format of the received address. - // This is avoided unless LOG_LEVEL is high enough to - // subdue the overhead caused by inet_ntop(). - char hostname[INET_ADDRSTRLEN]; - inet_ntop(AF_INET, &host_ip_addr, hostname, INET_ADDRSTRLEN); - LF_PRINT_LOG("Received address %s port %d for federate %d from RTI.", - hostname, uport, remote_federate_id); -#endif +/** + * Return true if reactions need to be inserted directly into the reaction queue and + * false if a call to schedule is needed (the normal case). This function handles zero-delay + * cycles, where processing at a tag must be able to begin before all messages have arrived + * at that tag. This returns true if the following conditions are all true: + * + * 1. the first reaction triggered has a level >= MLAA (a port is or will be blocked on this trigger); + * 2. the intended_tag is equal to the current tag of the environment; + * 3. the intended_tag is greater than the last_tag of the trigger; + * 4. the intended_tag is greater than the last_known_status_tag of the trigger; + * 5. the execution has started (the event queue has been examined); + * 6. the trigger is not physical; + * + * The comparison against the MLAA (condition 1), if true, means that there is a blocking port + * waiting for this trigger (or possibly an earlier blocking port). For condition (2), tardy + * messages are not scheduled now (they are already late), so if a reaction is blocked on + * unknown status of this port, it will be unblocked with an absent. The comparison against the + * last_tag of the trigger (condition 3) ensures that if the message is tardy but there is + * already an earlier tardy message that has been handled (or is being handled), then we + * don't try to handle two messages in the same tag, which is not allowed. For example, there + * could be a case where current tag is 10 with a port absent reaction waiting, and a message + * has arrived with intended_tag 8. This message will eventually cause the port absent reaction + * to exit, but before that, a message with intended_tag of 9 could arrive before the port absent + * reaction has had a chance to exit. The port status is on the other hand changed in this thread, + * and thus, can be checked in this scenario without this race condition. The message with + * intended_tag of 9 in this case needs to wait one microstep to be processed. The check with + * last_known_status_tag (condition 4) deals with messages arriving with identical intended + * tags (which should not happen). This one will be handled late (one microstep later than + * the current tag if 1 and 2 are true). + * + * This function assumes the mutex is held on the environment. + * + * @param env The environment. + * @param trigger The trigger. + * @param intended_tag The intended tag. + */ +static bool handle_message_now(environment_t* env, trigger_t* trigger, tag_t intended_tag) { + return trigger->reactions[0]->index >= max_level_allowed_to_advance + && lf_tag_compare(intended_tag, lf_tag(env)) == 0 + && lf_tag_compare(intended_tag, trigger->last_tag) > 0 + && lf_tag_compare(intended_tag, trigger->last_known_status_tag) > 0 + && env->execution_started + && !trigger->is_physical; +} - // Iterate until we either successfully connect or exceed the number of - // attempts given by CONNECT_NUM_RETRIES. - int socket_id = -1; - while (result < 0) { - // Create an IPv4 socket for TCP (not UDP) communication over IP (0). - socket_id = create_real_time_tcp_socket_errexit(); +/** + * Handle a message being received from a remote federate. + * + * This function assumes the caller does not hold the mutex lock. + * @param socket Pointer to the socket to read the message from. + * @param fed_id The sending federate ID or -1 if the centralized coordination. + * @return 0 for success, -1 for failure. + */ +static int handle_message(int* socket, int fed_id) { + // Read the header. + size_t bytes_to_read = sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t); + unsigned char buffer[bytes_to_read]; + if (read_from_socket_close_on_error(socket, bytes_to_read, buffer)) { + // Read failed, which means the socket has been closed between reading the + // message ID byte and here. + return -1; + } - // Server file descriptor. - struct sockaddr_in server_fd; - // Zero out the server_fd struct. - bzero((char*)&server_fd, sizeof(server_fd)); + // Extract the header information. + unsigned short port_id; + unsigned short federate_id; + size_t length; + extract_header(buffer, &port_id, &federate_id, &length); + // Check if the message is intended for this federate + assert(_lf_my_fed_id == federate_id); + LF_PRINT_DEBUG("Receiving message to port %d of length %zu.", port_id, length); - // Set up the server_fd fields. - server_fd.sin_family = AF_INET; // IPv4 - server_fd.sin_addr = host_ip_addr; // Received from the RTI + // Get the triggering action for the corresponding port + lf_action_base_t* action = action_for_port(port_id); - // Convert the port number from host byte order to network byte order. - server_fd.sin_port = htons(uport); - result = connect( - socket_id, - (struct sockaddr *)&server_fd, - sizeof(server_fd)); - - if (result != 0) { - lf_print_error("Failed to connect to federate %d on port %d.", remote_federate_id, uport); - - // Try again after some time if the connection failed. - // Note that this should not really happen since the remote federate should be - // accepting socket connections. But possibly it will be busy (in process of accepting - // another socket connection?). Hence, we retry. - count_retries++; - if (count_retries > CONNECT_NUM_RETRIES) { - // If the remote federate is not accepting the connection after CONNECT_NUM_RETRIES - // treat it as a soft error condition and return. - lf_print_error("Failed to connect to federate %d after %d retries. Giving up.", - remote_federate_id, CONNECT_NUM_RETRIES); - return; - } - lf_print_warning("Could not connect to federate %d. Will try again every %lld nanoseconds.\n", - remote_federate_id, ADDRESS_QUERY_RETRY_INTERVAL); - // Wait ADDRESS_QUERY_RETRY_INTERVAL nanoseconds. - if (lf_sleep(ADDRESS_QUERY_RETRY_INTERVAL) != 0) { - // Sleep was interrupted. - continue; - } - } else { - // Connect was successful. - size_t buffer_length = 1 + sizeof(uint16_t) + 1; - unsigned char buffer[buffer_length]; - buffer[0] = MSG_TYPE_P2P_SENDING_FED_ID; - if (_lf_my_fed_id > UINT16_MAX) { - // This error is very unlikely to occur. - lf_print_error_and_exit("Too many federates! More than %d.", UINT16_MAX); - } - encode_uint16((uint16_t)_lf_my_fed_id, (unsigned char*)&(buffer[1])); - unsigned char federation_id_length = (unsigned char)strnlen(federation_metadata.federation_id, 255); - buffer[sizeof(uint16_t) + 1] = federation_id_length; - // Trace the event when tracing is enabled - tracepoint_federate_to_federate(_fed.trace, send_FED_ID, _lf_my_fed_id, remote_federate_id, NULL); - write_to_socket_errexit(socket_id, - buffer_length, buffer, - "Failed to send fed_id to federate %d.", remote_federate_id); - write_to_socket_errexit(socket_id, - federation_id_length, (unsigned char*)federation_metadata.federation_id, - "Failed to send federation id to federate %d.", - remote_federate_id); - - read_from_socket_errexit(socket_id, 1, (unsigned char*)buffer, - "Failed to read MSG_TYPE_ACK from federate %d in response to sending fed_id.", - remote_federate_id); - if (buffer[0] != MSG_TYPE_ACK) { - // Get the error code. - read_from_socket_errexit(socket_id, 1, (unsigned char*)buffer, - "Failed to read error code from federate %d in response to sending fed_id.", remote_federate_id); - lf_print_error("Received MSG_TYPE_REJECT message from remote federate (%d).", buffer[0]); - result = -1; - continue; - } else { - lf_print("Connected to federate %d, port %d.", remote_federate_id, port); - // Trace the event when tracing is enabled - tracepoint_federate_to_federate(_fed.trace, receive_ACK, _lf_my_fed_id, remote_federate_id, NULL); - } - } + // Read the payload. + // Allocate memory for the message contents. + unsigned char* message_contents = (unsigned char*)malloc(length); + if (read_from_socket_close_on_error(socket, length, message_contents)) { + return -1; } - // Once we set this variable, then all future calls to close() on this - // socket ID should reset it to -1 within a critical section. - _fed.sockets_for_outbound_p2p_connections[remote_federate_id] = socket_id; + // Trace the event when tracing is enabled + tracepoint_federate_from_federate(_fed.trace, receive_P2P_MSG, _lf_my_fed_id, federate_id, NULL); + LF_PRINT_LOG("Message received by federate: %s. Length: %zu.", message_contents, length); - // Start a thread to listen for upstream messages (MSG_TYPE_CLOSE_REQUEST) from - // this downstream federate. - uint16_t* remote_fed_id_copy = (uint16_t*)malloc(sizeof(uint16_t)); - if (remote_fed_id_copy == NULL) { - lf_print_error_and_exit("malloc failed."); - } - *remote_fed_id_copy = remote_federate_id; - lf_thread_t thread_id; - result = lf_thread_create( - &thread_id, - listen_for_upstream_messages_from_downstream_federates, - remote_fed_id_copy); - if (result != 0) { - // Failed to create a listening thread. - lf_print_error_and_exit( - "Failed to create a thread to listen for upstream message. Error code: %d.", - result - ); - } + LF_PRINT_DEBUG("Calling schedule for message received on a physical connection."); + _lf_schedule_value(action, 0, message_contents, length); + return 0; } -#ifdef FEDERATED_AUTHENTICATED /** - * Perform HMAC-based authentication with the RTI, using the federation ID - * as an HMAC key. - * - * @param rti_socket TCP socket for connection with the RTI. + * Handle a tagged message being received from a remote federate via the RTI + * or directly from other federates. + * This will read the tag encoded in the header + * and calculate an offset to pass to the schedule function. + * This function assumes the caller does not hold the mutex lock. + * Instead of holding the mutex lock, this function calls + * _lf_increment_tag_barrier with the tag carried in + * the message header as an argument. This ensures that the current tag + * will not advance to the tag of the message if it is in the future, or + * the tag will not advance at all if the tag of the message is + * now or in the past. + * @param socket Pointer to the socket to read the message from. + * @param fed_id The sending federate ID or -1 if the centralized coordination. + * @return 0 on successfully reading the message, -1 on failure (e.g. due to socket closed). */ -void perform_hmac_authentication(int rti_socket) { - - // Send buffer including message type, federate ID, federate's nonce. - size_t fed_id_length = sizeof(uint16_t); - size_t message_length = 1 + fed_id_length + NONCE_LENGTH; - unsigned char fed_hello_buf[message_length]; - fed_hello_buf[0] = MSG_TYPE_FED_NONCE; - encode_uint16((uint16_t)_lf_my_fed_id, &fed_hello_buf[1]); - unsigned char fed_nonce[NONCE_LENGTH]; - RAND_bytes(fed_nonce, NONCE_LENGTH); - memcpy(&fed_hello_buf[1 + fed_id_length], fed_nonce, NONCE_LENGTH); - write_to_socket(rti_socket, message_length, fed_hello_buf); - - // Check HMAC of received FED_RESPONSE message. - unsigned int hmac_length = SHA256_HMAC_LENGTH; - size_t federation_id_length = strnlen(federation_metadata.federation_id, 255); +static int handle_tagged_message(int* socket, int fed_id) { + // Environment is always the one corresponding to the top-level scheduling enclave. + environment_t *env; + _lf_get_environments(&env); - unsigned char received[1 + NONCE_LENGTH + hmac_length]; - read_from_socket_errexit(rti_socket, 1 + NONCE_LENGTH + hmac_length, received, "Failed to read RTI response."); - if (received[0] != MSG_TYPE_RTI_RESPONSE) { - lf_print_error("Received unexpected response %u from the RTI (see net_common.h).", - received[0]); + // Read the header which contains the timestamp. + size_t bytes_to_read = sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t) + + sizeof(instant_t) + sizeof(microstep_t); + unsigned char buffer[bytes_to_read]; + if (read_from_socket_close_on_error(socket, bytes_to_read, buffer)) { + return -1; // Read failed. } - // Create tag to compare to received tag. - unsigned char buf_to_check[1 + fed_id_length + NONCE_LENGTH]; - buf_to_check[0] = MSG_TYPE_RTI_RESPONSE; - encode_uint16((uint16_t)_lf_my_fed_id, &buf_to_check[1]); - memcpy(&buf_to_check[1 + fed_id_length], fed_nonce, NONCE_LENGTH); - unsigned char fed_tag[hmac_length]; - HMAC(EVP_sha256(), federation_metadata.federation_id, federation_id_length, buf_to_check, 1 + fed_id_length + NONCE_LENGTH, - fed_tag, &hmac_length); - // Compare received tag and created tag. - if (memcmp(&received[1 + NONCE_LENGTH], fed_tag, hmac_length) != 0) { - // HMAC does not match. Send back a MSG_TYPE_REJECT message. - lf_print_error("HMAC authentication failed."); - unsigned char response[2]; - response[0] = MSG_TYPE_REJECT; - response[1] = HMAC_DOES_NOT_MATCH; - write_to_socket_errexit( - rti_socket, 2, response, - "Federate failed to write MSG_TYPE_REJECT message on the socket."); - close(rti_socket); - } - else { - LF_PRINT_LOG("HMAC verified."); - // HMAC tag is created with MSG_TYPE_FED_RESPONSE and received federate nonce. - unsigned char mac_buf[1 + NONCE_LENGTH]; - mac_buf[0] = MSG_TYPE_FED_RESPONSE; - memcpy(&mac_buf[1], &received[1], NONCE_LENGTH); - // Buffer for message type and HMAC tag. - unsigned char sender[1 + hmac_length]; - sender[0] = MSG_TYPE_FED_RESPONSE; - HMAC(EVP_sha256(), federation_metadata.federation_id, federation_id_length, mac_buf, 1 + NONCE_LENGTH, - &sender[1], &hmac_length); - write_to_socket(rti_socket, 1 + hmac_length, sender); + // Extract the header information. + unsigned short port_id; + unsigned short federate_id; + size_t length; + tag_t intended_tag; + extract_timed_header(buffer, &port_id, &federate_id, &length, &intended_tag); + // Trace the event when tracing is enabled + if (fed_id == -1) { + tracepoint_federate_from_rti(_fed.trace, receive_TAGGED_MSG, _lf_my_fed_id, &intended_tag); + } else { + tracepoint_federate_from_federate(_fed.trace, receive_P2P_TAGGED_MSG, _lf_my_fed_id, fed_id, &intended_tag); } -} -#endif + // Check if the message is intended for this federate + assert(_lf_my_fed_id == federate_id); + LF_PRINT_DEBUG("Receiving message to port %d of length %zu.", port_id, length); -/** - * Connect to the RTI at the specified host and port and return - * the socket descriptor for the connection. If this fails, the - * program exits. If it succeeds, it sets the _fed.socket_TCP_RTI global - * variable to refer to the socket for communicating with the RTI. - * @param hostname A hostname, such as "localhost". - * @param port_number A port number. - */ -void connect_to_rti(const char* hostname, int port) { - LF_PRINT_LOG("Connecting to the RTI."); + // Get the triggering action for the corresponding port + lf_action_base_t* action = action_for_port(port_id); - // override passed hostname and port if passed as runtime arguments - hostname = federation_metadata.rti_host ? federation_metadata.rti_host : hostname; - port = federation_metadata.rti_port >= 0 ? federation_metadata.rti_port : port; + // Record the physical time of arrival of the message + instant_t time_of_arrival = lf_time_physical(); - uint16_t uport = 0; - if (port < 0 || - port > INT16_MAX) { - lf_print_error( - "connect_to_rti(): Specified port (%d) is out of range," - " using zero instead.", - port - ); - } else { - uport = (uint16_t)port; + if (action->trigger->is_physical) { + // Messages sent on physical connections should be handled via handle_message(). + lf_print_error_and_exit("Received a tagged message on a physical connection."); } - // Repeatedly try to connect, one attempt every 2 seconds, until - // either the program is killed, the sleep is interrupted, - // or the connection succeeds. - // If the specified port is 0, set it instead to the start of the - // port range. - bool specific_port_given = true; - if (uport == 0) { - uport = STARTING_PORT; - specific_port_given = false; +#ifdef FEDERATED_DECENTRALIZED + // Only applicable for federated programs with decentralized coordination: + // For logical connections in decentralized coordination, + // increment the barrier to prevent advancement of tag beyond + // the received tag if possible. The following function call + // suggests that the tag barrier be raised to the tag provided + // by the message. If this tag is in the past, the function will cause + // the tag to freeze at the current level. + // If something happens, make sure to release the barrier. + _lf_increment_tag_barrier(env, intended_tag); +#endif + LF_PRINT_LOG("Received message on port %d with intended tag: " PRINTF_TAG ", Current tag: " PRINTF_TAG ".", + port_id, intended_tag.time - start_time, intended_tag.microstep, + lf_time_logical_elapsed(env), env->current_tag.microstep); + + // Read the payload. + // Allocate memory for the message contents. + unsigned char* message_contents = (unsigned char*)malloc(length); + if (read_from_socket_close_on_error(socket, length, message_contents)) { +#ifdef FEDERATED_DECENTRALIZED + _lf_decrement_tag_barrier_locked(env); +#endif + return -1; // Read failed. } - int result = -1; - int count_retries = 0; - struct addrinfo hints; - struct addrinfo *res; + // The following is only valid for string messages. + // LF_PRINT_DEBUG("Message received: %s.", message_contents); - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_INET; /* Allow IPv4 */ - hints.ai_socktype = SOCK_STREAM; /* Stream socket */ - hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */ - hints.ai_addr = NULL; - hints.ai_next = NULL; - hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */ + LF_MUTEX_LOCK(env->mutex); - while (result < 0) { - // Convert port number to string - char str[6]; - sprintf(str,"%u",uport); - - // Get address structure matching hostname and hints criteria, and - // set port to the port number provided in str. There should only - // ever be one matching address structure, and we connect to that. - int server = getaddrinfo(hostname, (const char*)&str, &hints, &res); - if (server != 0) { - lf_print_error_and_exit("No host for RTI matching given hostname: %s", hostname); - } + action->trigger->physical_time_of_arrival = time_of_arrival; - // Create a socket - _fed.socket_TCP_RTI = create_real_time_tcp_socket_errexit(); + // Create a token for the message + lf_token_t* message_token = _lf_new_token((token_type_t*)action, message_contents, length); - result = connect(_fed.socket_TCP_RTI, res->ai_addr, res->ai_addrlen); - if (result == 0) { - lf_print("Successfully connected to RTI."); - } + if (handle_message_now(env, action->trigger, intended_tag)) { + // Since the message is intended for the current tag and a port absent reaction + // was waiting for the message, trigger the corresponding reactions for this message. - freeaddrinfo(res); /* No longer needed */ + update_last_known_status_on_input_port(env, intended_tag, port_id); - // If this failed, try more ports, unless a specific port was given. - if (result != 0 - && !specific_port_given - && uport >= STARTING_PORT - && uport <= STARTING_PORT + PORT_RANGE_LIMIT - ) { - lf_print("Failed to connect to RTI on port %d. Trying %d.", uport, uport + 1); - uport++; - // Wait PORT_KNOCKING_RETRY_INTERVAL seconds. - if (lf_sleep(PORT_KNOCKING_RETRY_INTERVAL) != 0) { - // Sleep was interrupted. - continue; + LF_PRINT_LOG( + "Inserting reactions directly at tag " PRINTF_TAG ". " + "Intended tag: " PRINTF_TAG ".", + env->current_tag.time - lf_time_start(), + env->current_tag.microstep, + intended_tag.time - lf_time_start(), + intended_tag.microstep + ); + // Only set the intended tag of the trigger if it is being executed now + // because otherwise this may preempt the intended_tag of a previous activation + // of the trigger. + action->trigger->intended_tag = intended_tag; + + // This will mark the STP violation in the reaction if the message is tardy. + _lf_insert_reactions_for_trigger(env, action->trigger, message_token); + + // Set the status of the port as present here to inform the network input + // port absent reactions know that they no longer need to block. The reason for + // that is because the network receiver reaction is now in the reaction queue + // keeping the precedence order intact. + set_network_port_status(port_id, present); + } else { + // If no port absent reaction is waiting for this message, or if the intended + // tag is in the future, or the message is tardy, use schedule functions to process the message. + + tag_t actual_tag = intended_tag; +#ifdef FEDERATED_DECENTRALIZED + // For tardy messages in decentralized coordination, we need to figure out what the actual tag will be. + // (Centralized coordination errors out with tardy messages). + if (lf_tag_compare(intended_tag, env->current_tag) <= 0) { + // Message is tardy. + actual_tag = env->current_tag; + actual_tag.microstep++; + // Check that this is greater than any previously scheduled event for this port. + trigger_t* input_port_action = action_for_port(port_id)->trigger; + if (lf_tag_compare(actual_tag, input_port_action->last_known_status_tag) <= 0) { + actual_tag = input_port_action->last_known_status_tag; + actual_tag.microstep++; } } - // If this still failed, try again with the original port after some time. - if (result < 0) { - if (!specific_port_given && uport == STARTING_PORT + PORT_RANGE_LIMIT + 1) { - uport = STARTING_PORT; - } - count_retries++; - if (count_retries > CONNECT_NUM_RETRIES) { - lf_print_error_and_exit("Failed to connect to the RTI after %d retries. Giving up.", - CONNECT_NUM_RETRIES); - } - lf_print("Could not connect to RTI at %s. Will try again every %lld seconds.", - hostname, CONNECT_RETRY_INTERVAL / BILLION); - // Wait CONNECT_RETRY_INTERVAL nanoseconds. - if (lf_sleep(CONNECT_RETRY_INTERVAL) != 0) { - // Sleep was interrupted. - continue; - } +#endif // FEDERATED_DECENTRALIZED + // The following will update the input_port_action->last_known_status_tag. + // For decentralized coordination, this is needed for the thread implementing STAA. + update_last_known_status_on_input_port(env, actual_tag, port_id); + + // If the current time >= stop time, discard the message. + // But only if the stop time is not equal to the start time! + if (lf_tag_compare(env->current_tag, env->stop_tag) >= 0 && env->execution_started) { + lf_print_error("Received message too late. Already at stop tag.\n" + " Current tag is " PRINTF_TAG " and intended tag is " PRINTF_TAG ".\n" + " Discarding message and closing the socket.", + env->current_tag.time - start_time, env->current_tag.microstep, + intended_tag.time - start_time, intended_tag.microstep); + // Close socket, reading any incoming data and discarding it. + close_inbound_socket(fed_id, 1); } else { - // Have connected to an RTI, but not sure it's the right RTI. - // Send a MSG_TYPE_FED_IDS message and wait for a reply. - // Notify the RTI of the ID of this federate and its federation. - unsigned char buffer[4]; + // Need to use intended_tag here, not actual_tag, so that STP violations are detected. + // It will become actual_tag (that is when the reactions will be invoked). + schedule_message_received_from_network_locked(env, action->trigger, intended_tag, message_token); + } + } -#ifdef FEDERATED_AUTHENTICATED - LF_PRINT_LOG("Connected to an RTI. Performing HMAC-based authentication using federation ID."); - perform_hmac_authentication(_fed.socket_TCP_RTI); -#else - LF_PRINT_LOG("Connected to an RTI. Sending federation ID for authentication."); +#ifdef FEDERATED_DECENTRALIZED + // Only applicable for federated programs with decentralized coordination + // Finally, decrement the barrier to allow the execution to continue + // past the raised barrier + _lf_decrement_tag_barrier_locked(env); #endif - // Send the message type first. - buffer[0] = MSG_TYPE_FED_IDS; - // Next send the federate ID. - if (_lf_my_fed_id > UINT16_MAX) { - lf_print_error_and_exit("Too many federates! More than %d.", UINT16_MAX); - } - encode_uint16((uint16_t)_lf_my_fed_id, &buffer[1]); - // Next send the federation ID length. - // The federation ID is limited to 255 bytes. - size_t federation_id_length = strnlen(federation_metadata.federation_id, 255); - buffer[1 + sizeof(uint16_t)] = (unsigned char)(federation_id_length & 0xff); - - // Trace the event when tracing is enabled - tracepoint_federate_to_rti(_fed.trace, send_FED_ID, _lf_my_fed_id, NULL); - - write_to_socket_errexit(_fed.socket_TCP_RTI, 2 + sizeof(uint16_t), buffer, - "Failed to send federate ID to RTI."); + // The mutex is unlocked here after the barrier on + // logical time has been removed to avoid + // the need for unecessary lock and unlock + // operations. + LF_MUTEX_UNLOCK(env->mutex); - // Next send the federation ID itself. - write_to_socket_errexit(_fed.socket_TCP_RTI, federation_id_length, (unsigned char*)federation_metadata.federation_id, - "Failed to send federation ID to RTI."); + return 0; +} - // Wait for a response. - // The response will be MSG_TYPE_REJECT if the federation ID doesn't match. - // Otherwise, it will be either MSG_TYPE_ACK or MSG_TYPE_UDP_PORT, where the latter - // is used if clock synchronization will be performed. - unsigned char response; +/** + * Handle a port absent message received from a remote federate. + * This just sets the last known status tag of the port specified + * in the message. + * + * @param socket Pointer to the socket to read the message from + * @param fed_id The sending federate ID or -1 if the centralized coordination. + * @return 0 for success, -1 for failure to complete the read. + */ +static int handle_port_absent_message(int* socket, int fed_id) { + size_t bytes_to_read = sizeof(uint16_t) + sizeof(uint16_t) + sizeof(instant_t) + sizeof(microstep_t); + unsigned char buffer[bytes_to_read]; + if (read_from_socket_close_on_error(socket, bytes_to_read, buffer)) { + return -1; + } - LF_PRINT_DEBUG("Waiting for response to federation ID from the RTI."); + // Extract the header information. + unsigned short port_id = extract_uint16(buffer); + // The next part of the message is the federate_id, but we don't need it. + // unsigned short federate_id = extract_uint16(&(buffer[sizeof(uint16_t)])); + tag_t intended_tag = extract_tag(&(buffer[sizeof(uint16_t)+sizeof(uint16_t)])); - read_from_socket_errexit(_fed.socket_TCP_RTI, 1, &response, "Failed to read response from RTI."); - if (response == MSG_TYPE_REJECT) { - // Trace the event when tracing is enabled - tracepoint_federate_from_rti(_fed.trace, receive_REJECT, _lf_my_fed_id, NULL); - // Read one more byte to determine the cause of rejection. - unsigned char cause; - read_from_socket_errexit(_fed.socket_TCP_RTI, 1, &cause, "Failed to read the cause of rejection by the RTI."); - if (cause == FEDERATION_ID_DOES_NOT_MATCH || cause == WRONG_SERVER) { - lf_print("Connected to the wrong RTI on port %d. Trying %d.", uport, uport + 1); - uport++; - result = -1; - continue; + // Trace the event when tracing is enabled + if (fed_id == -1) { + tracepoint_federate_from_rti(_fed.trace, receive_PORT_ABS, _lf_my_fed_id, &intended_tag); + } else { + tracepoint_federate_from_federate(_fed.trace, receive_PORT_ABS, _lf_my_fed_id, fed_id, &intended_tag); + } + LF_PRINT_LOG("Handling port absent for tag " PRINTF_TAG " for port %hu of fed %d.", + intended_tag.time - lf_time_start(), + intended_tag.microstep, + port_id, + fed_id + ); + + // Environment is always the one corresponding to the top-level scheduling enclave. + environment_t *env; + _lf_get_environments(&env); + + LF_MUTEX_LOCK(env->mutex); + update_last_known_status_on_input_port(env, intended_tag, port_id); + LF_MUTEX_UNLOCK(env->mutex); + + return 0; +} + +/** + * Thread that listens for inputs from other federates. + * This thread listens for messages of type MSG_TYPE_P2P_MESSAGE, + * MSG_TYPE_P2P_TAGGED_MESSAGE, or MSG_TYPE_PORT_ABSENT (@see net_common.h) from the specified + * peer federate and calls the appropriate handling function for + * each message type. If an error occurs or an EOF is received + * from the peer, then this procedure sets the corresponding + * socket in _fed.sockets_for_inbound_p2p_connections + * to -1 and returns, terminating the thread. + * @param _args The remote federate ID (cast to void*). + * @param fed_id_ptr A pointer to a uint16_t containing federate ID being listened to. + * This procedure frees the memory pointed to before returning. + */ +static void* listen_to_federates(void* _args) { + uint16_t fed_id = (uint16_t)(uintptr_t)_args; + + LF_PRINT_LOG("Listening to federate %d.", fed_id); + + int* socket_id = &_fed.sockets_for_inbound_p2p_connections[fed_id]; + + // Buffer for incoming messages. + // This does not constrain the message size + // because the message will be put into malloc'd memory. + unsigned char buffer[FED_COM_BUFFER_SIZE]; + + // Listen for messages from the federate. + while (1) { + bool socket_closed = false; + // Read one byte to get the message type. + LF_PRINT_DEBUG("Waiting for a P2P message on socket %d.", *socket_id); + if (read_from_socket_close_on_error(socket_id, 1, buffer)) { + // Socket has been closed. + lf_print("Socket from federate %d is closed.", fed_id); + // Stop listening to this federate. + socket_closed = true; + break; + } + LF_PRINT_DEBUG("Received a P2P message on socket %d of type %d.", + *socket_id, buffer[0]); + bool bad_message = false; + switch (buffer[0]) { + case MSG_TYPE_P2P_MESSAGE: + LF_PRINT_LOG("Received untimed message from federate %d.", fed_id); + if (handle_message(socket_id, fed_id)) { + // Failed to complete the reading of a message on a physical connection. + lf_print_warning("Failed to complete reading of message on physical connection."); + socket_closed = true; } - lf_print_error_and_exit("RTI Rejected MSG_TYPE_FED_IDS message with response (see net_common.h): " - "%d. Error code: %d. Federate quits.\n", response, cause); - } else if (response == MSG_TYPE_ACK) { - // Trace the event when tracing is enabled - tracepoint_federate_from_rti(_fed.trace, receive_ACK, _lf_my_fed_id, NULL); - LF_PRINT_LOG("Received acknowledgment from the RTI."); - - // Call a generated (external) function that sends information - // about connections between this federate and other federates - // where messages are routed through the RTI. - // @see MSG_TYPE_NEIGHBOR_STRUCTURE in net_common.h - send_neighbor_structure_to_RTI(_fed.socket_TCP_RTI); - - uint16_t udp_port = setup_clock_synchronization_with_rti(); - - // Write the returned port number to the RTI - unsigned char UDP_port_number[1 + sizeof(uint16_t)]; - UDP_port_number[0] = MSG_TYPE_UDP_PORT; - encode_uint16(udp_port, &(UDP_port_number[1])); - write_to_socket_errexit(_fed.socket_TCP_RTI, 1 + sizeof(uint16_t), UDP_port_number, - "Failed to send the UDP port number to the RTI."); - } else { - lf_print_error_and_exit("Received unexpected response %u from the RTI (see net_common.h).", - response); + break; + case MSG_TYPE_P2P_TAGGED_MESSAGE: + LF_PRINT_LOG("Received tagged message from federate %d.", fed_id); + if (handle_tagged_message(socket_id, fed_id)) { + // P2P tagged messages are only used in decentralized coordination, and + // it is not a fatal error if the socket is closed before the whole message is read. + // But this thread should exit. + lf_print_warning("Failed to complete reading of tagged message."); + socket_closed = true; + } + break; + case MSG_TYPE_PORT_ABSENT: + LF_PRINT_LOG("Received port absent message from federate %d.", fed_id); + if (handle_port_absent_message(socket_id, fed_id)) { + // P2P tagged messages are only used in decentralized coordination, and + // it is not a fatal error if the socket is closed before the whole message is read. + // But this thread should exit. + lf_print_warning("Failed to complete reading of tagged message."); + socket_closed = true; + } + break; + default: + bad_message = true; + } + if (bad_message) { + lf_print_error("Received erroneous message type: %d. Closing the socket.", buffer[0]); + // Trace the event when tracing is enabled + tracepoint_federate_from_federate(_fed.trace, receive_UNIDENTIFIED, _lf_my_fed_id, fed_id, NULL); + break; // while loop + } + if (socket_closed) { + // NOTE: For decentralized execution, once this socket is closed, we could + // update last known tags of all ports connected to the specified federate to FOREVER_TAG, + // which would eliminate the need to wait for STAA to assume an input is absent. + // However, at this time, we don't know which ports correspond to which upstream federates. + // The code generator would have to encode this information. Once that is done, + // we could call update_last_known_status_on_input_port with FOREVER_TAG. + + break; // while loop + } + } + return NULL; +} + +/** + * Close the socket that sends outgoing messages to the + * specified federate ID. This function acquires the lf_outbound_socket_mutex mutex lock + * if _lf_normal_termination is true and otherwise proceeds without the lock. + * @param fed_id The ID of the peer federate receiving messages from this + * federate, or -1 if the RTI (centralized coordination). + * @param flag 0 if the socket has received EOF, 1 if not, -1 if abnormal termination. + */ +static void close_outbound_socket(int fed_id, int flag) { + assert (fed_id >= 0 && fed_id < NUMBER_OF_FEDERATES); + if (_lf_normal_termination) { + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + } + if (_fed.sockets_for_outbound_p2p_connections[fed_id] >= 0) { + // Close the socket by sending a FIN packet indicating that no further writes + // are expected. Then read until we get an EOF indication. + if (flag >= 0) { + // SHUT_WR indicates no further outgoing messages. + shutdown(_fed.sockets_for_outbound_p2p_connections[fed_id], SHUT_WR); + if (flag > 0) { + // Have not received EOF yet. read until we get an EOF or error indication. + // This compensates for delayed ACKs and disabling of Nagles algorithm + // by delaying exiting until the shutdown is complete. + unsigned char message[32]; + while (read(_fed.sockets_for_outbound_p2p_connections[fed_id], &message, 32) > 0); } - lf_print("Connected to RTI at %s:%d.", hostname, uport); } + close(_fed.sockets_for_outbound_p2p_connections[fed_id]); + _fed.sockets_for_outbound_p2p_connections[fed_id] = -1; + } + if (_lf_normal_termination) { + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + } +} + +#ifdef FEDERATED_AUTHENTICATED +/** + * Perform HMAC-based authentication with the RTI, using the federation ID + * as an HMAC key. + * @return 0 for success, -1 for failure. + */ +static int perform_hmac_authentication() { + + // Send buffer including message type, federate ID, federate's nonce. + size_t fed_id_length = sizeof(uint16_t); + size_t message_length = 1 + fed_id_length + NONCE_LENGTH; + unsigned char fed_hello_buf[message_length]; + fed_hello_buf[0] = MSG_TYPE_FED_NONCE; + encode_uint16((uint16_t)_lf_my_fed_id, &fed_hello_buf[1]); + unsigned char fed_nonce[NONCE_LENGTH]; + RAND_bytes(fed_nonce, NONCE_LENGTH); + memcpy(&fed_hello_buf[1 + fed_id_length], fed_nonce, NONCE_LENGTH); + + write_to_socket_fail_on_error( + &_fed.socket_TCP_RTI, message_length, fed_hello_buf, NULL, + "Failed to write nonce."); + + // Check HMAC of received FED_RESPONSE message. + unsigned int hmac_length = SHA256_HMAC_LENGTH; + size_t federation_id_length = strnlen(federation_metadata.federation_id, 255); + + unsigned char received[1 + NONCE_LENGTH + hmac_length]; + if (read_from_socket_close_on_error(&_fed.socket_TCP_RTI, 1 + NONCE_LENGTH + hmac_length, received)) { + lf_print_warning("Failed to read RTI response."); + return -1; + } + if (received[0] != MSG_TYPE_RTI_RESPONSE) { + if (received[0] == MSG_TYPE_FAILED) { + lf_print_error("RTI has failed."); + return -1; + } else { + lf_print_error( + "Received unexpected response %u from the RTI (see net_common.h).", + received[0]); + return -1; + } + } + // Create tag to compare to received tag. + unsigned char buf_to_check[1 + fed_id_length + NONCE_LENGTH]; + buf_to_check[0] = MSG_TYPE_RTI_RESPONSE; + encode_uint16((uint16_t)_lf_my_fed_id, &buf_to_check[1]); + memcpy(&buf_to_check[1 + fed_id_length], fed_nonce, NONCE_LENGTH); + unsigned char fed_tag[hmac_length]; + HMAC(EVP_sha256(), federation_metadata.federation_id, federation_id_length, buf_to_check, 1 + fed_id_length + NONCE_LENGTH, + fed_tag, &hmac_length); + + // Compare received tag and created tag. + if (memcmp(&received[1 + NONCE_LENGTH], fed_tag, hmac_length) != 0) { + // HMAC does not match. Send back a MSG_TYPE_REJECT message. + lf_print_error("HMAC authentication failed."); + unsigned char response[2]; + response[0] = MSG_TYPE_REJECT; + response[1] = HMAC_DOES_NOT_MATCH; + + // Ignore errors on writing back. + write_to_socket(_fed.socket_TCP_RTI, 2, response); + return -1; + } else { + LF_PRINT_LOG("HMAC verified."); + // HMAC tag is created with MSG_TYPE_FED_RESPONSE and received federate nonce. + unsigned char mac_buf[1 + NONCE_LENGTH]; + mac_buf[0] = MSG_TYPE_FED_RESPONSE; + memcpy(&mac_buf[1], &received[1], NONCE_LENGTH); + // Buffer for message type and HMAC tag. + unsigned char sender[1 + hmac_length]; + sender[0] = MSG_TYPE_FED_RESPONSE; + HMAC(EVP_sha256(), federation_metadata.federation_id, federation_id_length, mac_buf, 1 + NONCE_LENGTH, + &sender[1], &hmac_length); + + write_to_socket_fail_on_error( + &_fed.socket_TCP_RTI, 1 + hmac_length, sender, NULL, + "Failed to write fed response."); + } + return 0; +} +#endif + +static void close_rti_socket() { + shutdown(_fed.socket_TCP_RTI, SHUT_RDWR); + close(_fed.socket_TCP_RTI); + _fed.socket_TCP_RTI = -1; +} + +/** + * Return in the result a struct with the address info for the specified hostname and port. + * The memory for the result is dynamically allocated and must be freed using freeaddrinfo. + * @param hostname The host name. + * @param port The port number. + * @param result The struct into which to write. + */ +static void rti_address(const char* hostname, uint16_t port, struct addrinfo** result) { + struct addrinfo hints; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; /* Allow IPv4 */ + hints.ai_socktype = SOCK_STREAM; /* Stream socket */ + hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */ + hints.ai_addr = NULL; + hints.ai_next = NULL; + hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */ + + // Convert port number to string. + char str[6]; + sprintf(str, "%u", port); + + // Get address structure matching hostname and hints criteria, and + // set port to the port number provided in str. There should only + // ever be one matching address structure, and we connect to that. + if (getaddrinfo(hostname, (const char*)&str, &hints, result)) { + lf_print_error_and_exit("No host for RTI matching given hostname: %s", hostname); } } @@ -1169,23 +981,27 @@ void connect_to_rti(const char* hostname, int port) { * @param my_physical_time The physical time at this federate. * @return The designated start time for the federate. */ -instant_t get_start_time_from_rti(instant_t my_physical_time) { +static instant_t get_start_time_from_rti(instant_t my_physical_time) { // Send the timestamp marker first. - _lf_send_time(MSG_TYPE_TIMESTAMP, my_physical_time, true); + send_time(MSG_TYPE_TIMESTAMP, my_physical_time); // Read bytes from the socket. We need 9 bytes. // Buffer for message ID plus timestamp. size_t buffer_length = 1 + sizeof(instant_t); unsigned char buffer[buffer_length]; - read_from_socket_errexit(_fed.socket_TCP_RTI, buffer_length, buffer, + read_from_socket_fail_on_error(&_fed.socket_TCP_RTI, buffer_length, buffer, NULL, "Failed to read MSG_TYPE_TIMESTAMP message from RTI."); LF_PRINT_DEBUG("Read 9 bytes."); // First byte received is the message ID. if (buffer[0] != MSG_TYPE_TIMESTAMP) { - lf_print_error_and_exit("Expected a MSG_TYPE_TIMESTAMP message from the RTI. Got %u (see net_common.h).", - buffer[0]); + if (buffer[0] == MSG_TYPE_FAILED) { + lf_print_error_and_exit("RTI has failed."); + } + lf_print_error_and_exit( + "Expected a MSG_TYPE_TIMESTAMP message from the RTI. Got %u (see net_common.h).", + buffer[0]); } instant_t timestamp = extract_int64(&(buffer[1])); @@ -1199,1500 +1015,1381 @@ instant_t get_start_time_from_rti(instant_t my_physical_time) { return timestamp; } -//////////////////////////////// Port Status Handling /////////////////////////////////////// - -extern lf_action_base_t* _lf_action_table[]; -extern interval_t _lf_action_delay_table[]; -extern size_t _lf_action_table_size; -extern lf_action_base_t* _lf_zero_delay_action_table[]; -extern size_t _lf_zero_delay_action_table_size; -extern reaction_t* network_input_reactions[]; -extern size_t num_network_input_reactions; -extern reaction_t* port_absent_reaction[]; -extern size_t num_sender_reactions; -#ifdef FEDERATED_DECENTRALIZED -extern staa_t* staa_lst[]; -extern size_t staa_lst_size; -#endif - /** - * Return a pointer to the action struct for the action - * corresponding to the specified port ID. - * @param port_id The port ID. - * @return A pointer to an action struct or null if the ID is out of range. + * Handle a time advance grant (TAG) message from the RTI. + * This updates the last known status tag for each network input + * port, and broadcasts a signal, which may cause a blocking + * port absent reaction to unblock. + * + * In addition, this updates the last known TAG/PTAG and broadcasts + * a notification of this update, which may unblock whichever worker + * thread is trying to advance time. + * + * @note This function is very similar to handle_provisinal_tag_advance_grant() except that + * it sets last_TAG_was_provisional to false. */ -lf_action_base_t* _lf_action_for_port(int port_id) { - if (port_id < _lf_action_table_size) { - return _lf_action_table[port_id]; +static void handle_tag_advance_grant(void) { + // Environment is always the one corresponding to the top-level scheduling enclave. + environment_t *env; + _lf_get_environments(&env); + + size_t bytes_to_read = sizeof(instant_t) + sizeof(microstep_t); + unsigned char buffer[bytes_to_read]; + read_from_socket_fail_on_error(&_fed.socket_TCP_RTI, bytes_to_read, buffer, NULL, + "Failed to read tag advance grant from RTI."); + tag_t TAG = extract_tag(buffer); + + // Trace the event when tracing is enabled + tracepoint_federate_from_rti(_fed.trace, receive_TAG, _lf_my_fed_id, &TAG); + + LF_MUTEX_LOCK(env->mutex); + + // Update the last known status tag of all network input ports + // to the TAG received from the RTI. Here we assume that the RTI + // knows the status of network ports up to and including the granted tag, + // so by extension, we assume that the federate can safely rely + // on the RTI to handle port statuses up until the granted tag. + update_last_known_status_on_input_ports(TAG); + + // It is possible for this federate to have received a PTAG + // earlier with the same tag as this TAG. + if (lf_tag_compare(TAG, _fed.last_TAG) >= 0) { + _fed.last_TAG = TAG; + _fed.is_last_TAG_provisional = false; + LF_PRINT_LOG("Received Time Advance Grant (TAG): " PRINTF_TAG ".", + _fed.last_TAG.time - start_time, _fed.last_TAG.microstep); + } else { + LF_MUTEX_UNLOCK(env->mutex); + lf_print_error("Received a TAG " PRINTF_TAG " that wasn't larger " + "than the previous TAG or PTAG " PRINTF_TAG ". Ignoring the TAG.", + TAG.time - start_time, TAG.microstep, + _fed.last_TAG.time - start_time, _fed.last_TAG.microstep); + return; } - lf_print_error("Invalid port ID: %d", port_id); - return NULL; + // Notify everything that is blocked. + lf_cond_broadcast(&env->event_q_changed); + + LF_MUTEX_UNLOCK(env->mutex); } +#ifdef FEDERATED_DECENTRALIZED /** - * Set the status of network port with id portID. + * @brief Return whether there exists an input port whose status is unknown. * - * @param portID The network port ID - * @param status The network port status (port_status_t) + * @param staa_elem A record of all input port actions. */ -void set_network_port_status(int portID, port_status_t status) { - lf_action_base_t* network_input_port_action = _lf_action_for_port(portID); - network_input_port_action->trigger->status = status; +static bool a_port_is_unknown(staa_t* staa_elem) { + bool do_wait = false; + for (int j = 0; j < staa_elem->num_actions; ++j) { + if (staa_elem->actions[j]->trigger->status == unknown) { + do_wait = true; + break; + } + } + return do_wait; } +#endif /** - * Update the last known status tag of all network input ports - * to the value of `tag`, unless that the provided `tag` is less - * than the last_known_status_tag of the port. This is called when - * all inputs to network ports with tags up to and including `tag` - * have been received by those ports. If any update occurs, - * then this broadcasts on `port_status_changed`. - * - * This assumes the caller holds the mutex. - * - * @param tag The tag on which the latest status of all network input - * ports is known. + * @brief Return the port ID of the port associated with the given action. + * @return The port ID or -1 if there is no match. */ -void update_last_known_status_on_input_ports(tag_t tag) { - LF_PRINT_DEBUG("In update_last_known_status_on_input ports."); - bool notify = false; +static int id_of_action(lf_action_base_t* input_port_action) { for (int i = 0; i < _lf_action_table_size; i++) { - lf_action_base_t* input_port_action = _lf_action_for_port(i); - // This is called when a TAG is received. - // But it is possible for an input port to have received already - // a message with a larger tag (if there is an after delay on the - // connection), in which case, the last known status tag of the port - // is in the future and should not be rolled back. So in that case, - // we do not update the last known status tag. - if (lf_tag_compare(tag, - input_port_action->trigger->last_known_status_tag) >= 0) { - LF_PRINT_DEBUG( - "Updating the last known status tag of port %d from " PRINTF_TAG " to " PRINTF_TAG ".", - i, - input_port_action->trigger->last_known_status_tag.time - lf_time_start(), - input_port_action->trigger->last_known_status_tag.microstep, - tag.time - lf_time_start(), - tag.microstep - ); - input_port_action->trigger->last_known_status_tag = tag; - notify = true; - } - } - // FIXME: We could put a condition variable into the trigger_t - // struct for each network input port, in which case this won't - // be a broadcast but rather a targetted signal. - if (notify && update_max_level(tag, false)) { - // Notify network input reactions - lf_cond_broadcast(&port_status_changed); + if (_lf_action_table[i] == input_port_action) return i; } + return -1; } /** - * Update the last known status tag of a network input port - * to the value of "tag". This is the largest tag at which the status - * (present or absent) of the port is known. - * - * This function assumes the caller holds the mutex, and, if the tag - * actually increases, it broadcasts on `port_status_changed`. - * - * @param tag The tag on which the latest status of network input - * ports is known. - * @param portID The port ID + * @brief Thread handling setting the known absent status of input ports. + * For the code-generated array of staa offsets `staa_lst`, which is sorted by STAA offset, + * wait for physical time to advance to the current time plus the STAA offset, + * then set the absent status of the input ports associated with the STAA. + * Then wait for current time to advance and start over. */ -void update_last_known_status_on_input_port(tag_t tag, int port_id) { - trigger_t* input_port_action = _lf_action_for_port(port_id)->trigger; - if (lf_tag_compare(tag, - input_port_action->last_known_status_tag) >= 0) { - if (lf_tag_compare(tag, - input_port_action->last_known_status_tag) == 0) { - // If the intended tag for an input port is equal to the last known status, we need - // to increment the microstep. This is a direct result of the behavior of the lf_delay_tag() - // semantics in tag.h. - tag.microstep++; +#ifdef FEDERATED_DECENTRALIZED +static void* update_ports_from_staa_offsets(void* args) { + if (staa_lst_size == 0) return NULL; // Nothing to do. + // NOTE: Using only the top-level environment, which is the one that deals with network + // input ports. + environment_t *env; + int num_envs = _lf_get_environments(&env); + LF_MUTEX_LOCK(env->mutex); + while (1) { + LF_PRINT_DEBUG("**** (update thread) starting"); + tag_t tag_when_started_waiting = lf_tag(env); + for (int i = 0; i < staa_lst_size; ++i) { + staa_t* staa_elem = staa_lst[i]; + // The staa_elem is adjusted in the code generator to have subtracted the delay on the connection. + // The list is sorted in increasing order of adjusted STAA offsets. + // The wait_until function automatically adds the _lf_fed_STA_offset to the wait time. + interval_t wait_until_time = env->current_tag.time + staa_elem->STAA; + LF_PRINT_DEBUG("**** (update thread) original wait_until_time: " PRINTF_TIME, wait_until_time - lf_time_start()); + + // The wait_until call will release the env->mutex while it is waiting. + // However, it will not release the env->mutex if the wait time is too small. + // At the cost of a small additional delay in deciding a port is absent, + // we require a minimum wait time here. Otherwise, if both the STAA and STA are + // zero, this thread will fail to ever release the environment mutex. + // This causes chaos. The MIN_SLEEP_DURATION is the smallest amount of time + // that wait_until will actually wait. Note that this strategy does not + // block progress of any execution that is actually processing events. + // It only slightly delays the decision that an event is absent, and only + // if the STAA and STA are extremely small. + if (_lf_fed_STA_offset + staa_elem->STAA < 5 * MIN_SLEEP_DURATION) { + wait_until_time += 5 * MIN_SLEEP_DURATION; + } + while (a_port_is_unknown(staa_elem)) { + LF_PRINT_DEBUG("**** (update thread) waiting until: " PRINTF_TIME, wait_until_time - lf_time_start()); + if (wait_until(env, wait_until_time, &lf_port_status_changed)) { + if (lf_tag_compare(lf_tag(env), tag_when_started_waiting) != 0) { + break; + } + /* Possibly useful for debugging: + tag_t current_tag = lf_tag(env); + LF_PRINT_DEBUG("**** (update thread) Assuming absent! " PRINTF_TAG, current_tag.time - lf_time_start(), current_tag.microstep); + LF_PRINT_DEBUG("**** (update thread) Lag is " PRINTF_TIME, current_tag.time - lf_time_physical()); + LF_PRINT_DEBUG("**** (update thread) Wait until time is " PRINTF_TIME, wait_until_time - lf_time_start()); + */ + + for (int j = 0; j < staa_elem->num_actions; ++j) { + lf_action_base_t* input_port_action = staa_elem->actions[j]; + if (input_port_action->trigger->status == unknown) { + input_port_action->trigger->status = absent; + LF_PRINT_DEBUG("**** (update thread) Assuming port absent at time " PRINTF_TIME, lf_tag(env).time - start_time); + update_last_known_status_on_input_port(env, lf_tag(env), id_of_action(input_port_action)); + lf_cond_broadcast(&lf_port_status_changed); + } + } } - LF_PRINT_DEBUG( - "Updating the last known status tag of port %d from " PRINTF_TAG " to " PRINTF_TAG ".", - port_id, - input_port_action->last_known_status_tag.time - lf_time_start(), - input_port_action->last_known_status_tag.microstep, - tag.time - lf_time_start(), - tag.microstep - ); - input_port_action->last_known_status_tag = tag; - // There is no guarantee that there is either a TAG or a PTAG for this time. - // The message that triggered this to be called could be from an upstream - // federate that is far ahead of other upstream federates in logical time. - // Therefore, do not pass `tag` to `update_max_level`. - update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); - lf_cond_broadcast(&port_status_changed); - } else { - LF_PRINT_DEBUG("Attempt to update the last known status tag " - "of network input port %d to an earlier tag was ignored.", port_id); + // If the tag has advanced, start over. + if (lf_tag_compare(lf_tag(env), tag_when_started_waiting) != 0) break; + } + // If the tag has advanced, start over. + if (lf_tag_compare(lf_tag(env), tag_when_started_waiting) != 0) break; + } + // If the tag has advanced, start over. + if (lf_tag_compare(lf_tag(env), tag_when_started_waiting) != 0) continue; + + // At this point, the current tag is the same as when we started waiting + // and all ports should be known, and hence max_level_allowed_to_advance + // should be INT_MAX. Check this to prevent an infinite wait. + if (max_level_allowed_to_advance != INT_MAX) { + // If this occurs, then the current tag advanced during a wait. + // Some ports may have been reset to uknown during that wait, in which case, + // it would be huge mistake to enter the wait for a new tag below because the + // program will freeze. First, check whether any ports are unknown: + bool port_unkonwn = false; + for (int i = 0; i < staa_lst_size; ++i) { + staa_t* staa_elem = staa_lst[i]; + if (a_port_is_unknown(staa_elem)) { + port_unkonwn = true; + break; + } + } + if (!port_unkonwn) { + // If this occurs, then there is a race condition that can lead to deadlocks. + lf_print_error_and_exit("**** (update thread) Inconsistency: All ports are known, but MLAA is blocking."); + } + + // Since max_level_allowed_to_advance will block advancement of time, we cannot follow + // through to the next step without deadlocking. Wait some time, then continue. + // The wait is necessary to prevent a busy wait. + lf_sleep(2 * MIN_SLEEP_DURATION); + continue; + } + + // Wait until we progress to a new tag. + while (lf_tag_compare(lf_tag(env), tag_when_started_waiting) == 0) { + // The following will release the env->mutex while waiting. + LF_PRINT_DEBUG("**** (update thread) Waiting for tags to not match: " PRINTF_TAG ", " PRINTF_TAG, + lf_tag(env).time - lf_time_start(), lf_tag(env).microstep, + tag_when_started_waiting.time -lf_time_start(), tag_when_started_waiting.microstep); + // Ports are reset to unknown at the start of new tag, so that will wake this up. + lf_cond_wait(&lf_port_status_changed); + } + LF_PRINT_DEBUG("**** (update thread) Tags after wait: " PRINTF_TAG ", " PRINTF_TAG, + lf_tag(env).time - lf_time_start(), lf_tag(env).microstep, + tag_when_started_waiting.time -lf_time_start(), tag_when_started_waiting.microstep); } + LF_MUTEX_UNLOCK(env->mutex); } +#endif // FEDERATED_DECENTRALIZED /** - * Reset the status fields on network input ports to unknown. + * Handle a provisional tag advance grant (PTAG) message from the RTI. + * This updates the last known TAG/PTAG and broadcasts + * a notification of this update, which may unblock whichever worker + * thread is trying to advance time. + * If current_time is less than the specified PTAG, then this will + * also insert into the event_q a dummy event with the specified tag. + * This will ensure that the federate advances time to the specified + * tag and, for centralized coordination, stimulates null-message-sending + * output reactions at that tag. * - * @note This function must be called at the beginning of each - * logical time. + * @note This function is similar to handle_tag_advance_grant() except that + * it sets last_TAG_was_provisional to true and also it does not update the + * last known tag for input ports. */ -void reset_status_fields_on_input_port_triggers() { - for (int i = 0; i < _lf_action_table_size; i++) { - set_network_port_status(i, unknown); +static void handle_provisional_tag_advance_grant() { + // Environment is always the one corresponding to the top-level scheduling enclave. + environment_t *env; + _lf_get_environments(&env); + + size_t bytes_to_read = sizeof(instant_t) + sizeof(microstep_t); + unsigned char buffer[bytes_to_read]; + read_from_socket_fail_on_error(&_fed.socket_TCP_RTI, bytes_to_read, buffer, NULL, + "Failed to read provisional tag advance grant from RTI."); + tag_t PTAG = extract_tag(buffer); + + // Trace the event when tracing is enabled + tracepoint_federate_from_rti(_fed.trace, receive_PTAG, _lf_my_fed_id, &PTAG); + + // Note: it is important that last_known_status_tag of ports does not + // get updated to a PTAG value because a PTAG does not indicate that + // the RTI knows about the status of all ports up to and _including_ + // the value of PTAG. Only a TAG message indicates that. + LF_MUTEX_LOCK(env->mutex); + + // Sanity check + if (lf_tag_compare(PTAG, _fed.last_TAG) < 0 + || (lf_tag_compare(PTAG, _fed.last_TAG) == 0 && !_fed.is_last_TAG_provisional)) { + LF_MUTEX_UNLOCK(env->mutex); + lf_print_error_and_exit("Received a PTAG " PRINTF_TAG " that is equal or earlier " + "than an already received TAG " PRINTF_TAG ".", + PTAG.time, PTAG.microstep, + _fed.last_TAG.time, _fed.last_TAG.microstep); } - LF_PRINT_DEBUG("Resetting port status fields."); - update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); -} -/** - * Enqueue port absent reactions that will send a MSG_TYPE_PORT_ABSENT - * message to downstream federates if a given network output port is not present. - * @param env The environment of the federate - */ -void enqueue_port_absent_reactions(environment_t* env){ - assert(env != GLOBAL_ENVIRONMENT); -#ifdef FEDERATED_CENTRALIZED - if (!_fed.has_downstream) { - // This federate is not connected to any downstream federates via a - // logical connection. No need to trigger port absent - // reactions. + _fed.last_TAG = PTAG; + _fed.is_last_TAG_provisional = true; + LF_PRINT_LOG("At tag " PRINTF_TAG ", received Provisional Tag Advance Grant (PTAG): " PRINTF_TAG ".", + env->current_tag.time - start_time, env->current_tag.microstep, + _fed.last_TAG.time - start_time, _fed.last_TAG.microstep); + + // Even if we don't modify the event queue, we need to broadcast a change + // because we do not need to continue to wait for a TAG. + lf_cond_broadcast(&env->event_q_changed); + // Notify level advance thread which is blocked. + lf_update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); + lf_cond_broadcast(&lf_port_status_changed); + + // Possibly insert a dummy event into the event queue if current time is behind + // (which it should be). Do not do this if the federate has not fully + // started yet. + + instant_t dummy_event_time = PTAG.time; + microstep_t dummy_event_relative_microstep = PTAG.microstep; + + if (lf_tag_compare(env->current_tag, PTAG) == 0) { + // The current tag can equal the PTAG if we are at the start time + // or if this federate has been able to advance time to the current + // tag (e.g., it has no upstream federates). In either case, either + // it is already treating the current tag as PTAG cycle (e.g. at the + // start time) or it will be completing the current cycle and sending + // a LTC message shortly. In either case, there is nothing more to do. + LF_MUTEX_UNLOCK(env->mutex); return; - } -#endif - LF_PRINT_DEBUG("Enqueueing port absent reactions at time %lld.", (long long) (env->current_tag.time - start_time)); - if (num_sender_reactions == 0) { - LF_PRINT_DEBUG("No port absent reactions."); + } else if (lf_tag_compare(env->current_tag, PTAG) > 0) { + // Current tag is greater than the PTAG. + // It could be that we have sent an LTC that crossed with the incoming + // PTAG or that we have advanced to a tag greater than the PTAG. + // In the former case, there is nothing more to do. + // In the latter case, we may be blocked processing a PTAG cycle at + // a greater tag or we may be in the middle of processing a regular + // TAG. In either case, we know that at the PTAG tag, all outputs + // have either been sent or are absent, so we can send an LTC. + // Send an LTC to indicate absent outputs. + lf_latest_tag_complete(PTAG); + // Nothing more to do. + LF_MUTEX_UNLOCK(env->mutex); return; + } else if (PTAG.time == env->current_tag.time) { + // We now know env->current_tag < PTAG, but the times are equal. + // Adjust the microstep for scheduling the dummy event. + dummy_event_relative_microstep -= env->current_tag.microstep; } - for (int i = 0; i < num_sender_reactions; i++) { - reaction_t* reaction = port_absent_reaction[i]; - if (reaction && reaction->status == inactive) { - LF_PRINT_DEBUG("Inserting port absent reaction on reaction queue."); - lf_scheduler_trigger_reaction(env->scheduler, reaction, -1); - } + // We now know env->current_tag < PTAG. + + if (dummy_event_time != FOREVER) { + // Schedule a dummy event at the specified time and (relative) microstep. + LF_PRINT_DEBUG("At tag " PRINTF_TAG ", inserting into the event queue a dummy event " + "with time " PRINTF_TIME " and (relative) microstep " PRINTF_MICROSTEP ".", + env->current_tag.time - start_time, env->current_tag.microstep, + dummy_event_time - start_time, dummy_event_relative_microstep); + // Dummy event points to a NULL trigger and NULL real event. + event_t* dummy = _lf_create_dummy_events(env, + NULL, dummy_event_time, NULL, dummy_event_relative_microstep); + pqueue_insert(env->event_q, dummy); } + + LF_MUTEX_UNLOCK(env->mutex); } /** - * Send a port absent message to federate with fed_ID, informing the - * remote federate that the current federate will not produce an event - * on this network port at the current logical time. + * Handle a MSG_TYPE_STOP_GRANTED message from the RTI. * - * @param env The environment of the federate - * @param additional_delay The offset applied to the timestamp - * using after. The additional delay will be greater or equal to zero - * if an after is used on the connection. If no after is given in the - * program, -1 is passed. - * @param port_ID The ID of the receiving port. - * @param fed_ID The fed ID of the receiving federate. + * This function removes the global barrier on + * logical time raised when lf_request_stop() was + * called in the environment for each enclave. */ -void send_port_absent_to_federate(environment_t* env, interval_t additional_delay, - unsigned short port_ID, - unsigned short fed_ID) { - assert(env != GLOBAL_ENVIRONMENT); +static void handle_stop_granted_message() { - // Construct the message - size_t message_length = 1 + sizeof(port_ID) + sizeof(fed_ID) + sizeof(instant_t) + sizeof(microstep_t); - unsigned char buffer[message_length]; + size_t bytes_to_read = MSG_TYPE_STOP_GRANTED_LENGTH - 1; + unsigned char buffer[bytes_to_read]; + read_from_socket_fail_on_error(&_fed.socket_TCP_RTI, bytes_to_read, buffer, NULL, + "Failed to read stop granted from RTI."); - // Apply the additional delay to the current tag and use that as the intended - // tag of the outgoing message. Note that if there is delay on the connection, - // then we cannot promise no message with tag = current_tag + delay because a - // subsequent reaction might produce such a message. But we can promise no - // message with a tag strictly less than current_tag + delay. - tag_t current_message_intended_tag = lf_delay_strict(env->current_tag, - additional_delay); + tag_t received_stop_tag = extract_tag(buffer); - LF_PRINT_LOG("Sending port " - "absent for tag " PRINTF_TAG " for port %d to federate %d.", - current_message_intended_tag.time - start_time, - current_message_intended_tag.microstep, - port_ID, fed_ID); + // Trace the event when tracing is enabled + tracepoint_federate_from_rti(_fed.trace, receive_STOP_GRN, _lf_my_fed_id, &received_stop_tag); - buffer[0] = MSG_TYPE_PORT_ABSENT; - encode_uint16(port_ID, &(buffer[1])); - encode_uint16(fed_ID, &(buffer[1+sizeof(port_ID)])); - encode_tag(&(buffer[1+sizeof(port_ID)+sizeof(fed_ID)]), current_message_intended_tag); + LF_PRINT_LOG("Received from RTI a MSG_TYPE_STOP_GRANTED message with elapsed tag " PRINTF_TAG ".", + received_stop_tag.time - start_time, received_stop_tag.microstep); - lf_mutex_lock(&outbound_socket_mutex); -#ifdef FEDERATED_CENTRALIZED - // Send the absent message through the RTI - int socket = _fed.socket_TCP_RTI; -#else - // Send the absent message directly to the federate - int socket = _fed.sockets_for_outbound_p2p_connections[fed_ID]; -#endif - // Do not write if the socket is closed. - if (socket >= 0) { - // Trace the event when tracing is enabled - tracepoint_federate_to_rti(_fed.trace, send_PORT_ABS, _lf_my_fed_id, ¤t_message_intended_tag); - write_to_socket_with_mutex(socket, message_length, buffer, &outbound_socket_mutex, - "Failed to send port absent message for port %hu to federate %hu.", - port_ID, fed_ID); + environment_t *env; + int num_environments = _lf_get_environments(&env); + + for (int i = 0; i < num_environments; i++) { + LF_MUTEX_LOCK(env[i].mutex); + + // Sanity check. + if (lf_tag_compare(received_stop_tag, env[i].current_tag) <= 0) { + lf_print_error("RTI granted a MSG_TYPE_STOP_GRANTED tag that is equal to or less than this federate's current tag " PRINTF_TAG ". " + "Stopping at the next microstep instead.", + env[i].current_tag.time - start_time, env[i].current_tag.microstep); + received_stop_tag = env[i].current_tag; + received_stop_tag.microstep++; + } + + _lf_set_stop_tag(&env[i], received_stop_tag); + LF_PRINT_DEBUG("Setting the stop tag to " PRINTF_TAG ".", + env[i].stop_tag.time - start_time, + env[i].stop_tag.microstep); + + if (env[i].barrier.requestors) _lf_decrement_tag_barrier_locked(&env[i]); + lf_cond_broadcast(&env[i].event_q_changed); + LF_MUTEX_UNLOCK(env[i].mutex); } - lf_mutex_unlock(&outbound_socket_mutex); } -///////////////////////////////////////////////////////////////////////////////////////// - /** - * Version of schedule_value() similar to that in reactor_common.c - * except that it does not acquire the mutex lock and has a special - * behavior during startup where it can inject reactions to the reaction - * queue if execution has not started yet. - * It is also responsible for setting the intended tag of the - * network message based on the calculated delay. - * This function assumes that the caller holds the mutex lock. - * - * This is used for handling incoming timed messages to a federate. - * - * @param env The environment of the federate - * @param action The action or timer to be triggered. - * @param tag The tag of the message received over the network. - * @param value Dynamically allocated memory containing the value to send. - * @param length The length of the array, if it is an array, or 1 for a - * scalar and 0 for no payload. - * @return A handle to the event, or 0 if no event was scheduled, or -1 for error. + * Handle a MSG_TYPE_STOP_REQUEST message from the RTI. */ -static trigger_handle_t schedule_message_received_from_network_locked( - environment_t* env, - trigger_t* trigger, - tag_t tag, - lf_token_t* token) { - assert(env != GLOBAL_ENVIRONMENT); - - // Return value of the function - int return_value = 0; +static void handle_stop_request_message() { + size_t bytes_to_read = MSG_TYPE_STOP_REQUEST_LENGTH - 1; + unsigned char buffer[bytes_to_read]; + read_from_socket_fail_on_error(&_fed.socket_TCP_RTI, bytes_to_read, buffer, NULL, + "Failed to read stop request from RTI."); + tag_t tag_to_stop = extract_tag(buffer); - // Indicates whether or not the intended tag - // of the message (timestamp, microstep) is - // in the future relative to the tag of this - // federate. By default, assume it is not. - bool message_tag_is_in_the_future = lf_tag_compare(tag, env->current_tag) > 0; + // Trace the event when tracing is enabled + tracepoint_federate_from_rti(_fed.trace, receive_STOP_REQ, _lf_my_fed_id, &tag_to_stop); + LF_PRINT_LOG("Received from RTI a MSG_TYPE_STOP_REQUEST signal with tag " PRINTF_TAG ".", + tag_to_stop.time - start_time, + tag_to_stop.microstep); - // Assign the intended tag - trigger->intended_tag = tag; + extern lf_mutex_t global_mutex; + extern bool lf_stop_requested; + bool already_blocked = false; - // Calculate the extra_delay required to be passed - // to the schedule function. - interval_t extra_delay = tag.time - env->current_tag.time; - if (!message_tag_is_in_the_future) { -#ifdef FEDERATED_CENTRALIZED - // If the coordination is centralized, receiving a message - // that does not carry a timestamp that is in the future - // would indicate a critical condition, showing that the - // time advance mechanism is not working correctly. - lf_print_error_and_exit("Received a message at tag " PRINTF_TAG " that" - " has a tag " PRINTF_TAG " that has violated the STP offset. " - "Centralized coordination should not have these types of messages.", - env->current_tag.time - start_time, env->current_tag.microstep, - tag.time - start_time, tag.microstep); -#else - // Set the delay back to 0 - extra_delay = 0LL; - LF_PRINT_LOG("Calling schedule with 0 delay and intended tag " PRINTF_TAG ".", - trigger->intended_tag.time - start_time, - trigger->intended_tag.microstep); - return_value = _lf_schedule(env, trigger, extra_delay, token); -#endif - } else { - // In case the message is in the future, call - // _lf_schedule_at_tag() so that the microstep is respected. - LF_PRINT_LOG("Received a message that is (" PRINTF_TIME " nanoseconds, " PRINTF_MICROSTEP " microsteps) " - "in the future.", extra_delay, tag.microstep - env->current_tag.microstep); - return_value = _lf_schedule_at_tag(env, trigger, tag, token); + LF_MUTEX_LOCK(global_mutex); + if (lf_stop_requested) { + LF_PRINT_LOG("Ignoring MSG_TYPE_STOP_REQUEST from RTI because lf_request_stop has been called locally."); + already_blocked = true; } - // Notify the main thread in case it is waiting for physical time to elapse. - LF_PRINT_DEBUG("Broadcasting notification that event queue changed."); - lf_cond_broadcast(&env->event_q_changed); - return return_value; -} - -/** - * Request to close the socket that receives incoming messages from the - * specified federate ID. This sends a message to the upstream federate - * requesting that it close the socket. If the message is sent successfully, - * this returns 1. Otherwise it returns 0, which presumably means that the - * socket is already closed. - * - * @param The ID of the peer federate sending messages to this federate. - * - * @return 1 if the MSG_TYPE_CLOSE_REQUEST message is sent successfully, 0 otherwise. - */ -int _lf_request_close_inbound_socket(int fed_id) { - assert(fed_id >= 0 && fed_id < NUMBER_OF_FEDERATES); - - if (_fed.sockets_for_inbound_p2p_connections[fed_id] < 1) return 0; - - // Send a MSG_TYPE_CLOSE_REQUEST message. - unsigned char message_marker = MSG_TYPE_CLOSE_REQUEST; - LF_PRINT_LOG("Sending MSG_TYPE_CLOSE_REQUEST message to upstream federate."); + // Treat the stop request from the RTI as if a local stop request had been received. + lf_stop_requested = true; + LF_MUTEX_UNLOCK(global_mutex); - // Trace the event when tracing is enabled - tracepoint_federate_to_federate(_fed.trace, send_CLOSE_RQ, _lf_my_fed_id, fed_id, NULL); - - ssize_t written = write_to_socket( - _fed.sockets_for_inbound_p2p_connections[fed_id], - 1, &message_marker); - _fed.sockets_for_inbound_p2p_connections[fed_id] = -1; - if (written == 1) { - LF_PRINT_LOG("Sent MSG_TYPE_CLOSE_REQUEST message to upstream federate."); - return 1; - } else { - return 0; + // If we have previously received from the RTI a stop request, + // or we have previously sent a stop request to the RTI, + // then we have already blocked tag advance in enclaves. + // Do not do this twice. The record of whether the first has occurred + // is guarded by the outbound socket mutex. + // The second is guarded by the global mutex. + // Note that the RTI should not send stop requests more than once to federates. + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + if (_fed.received_stop_request_from_rti) { + LF_PRINT_LOG("Redundant MSG_TYPE_STOP_REQUEST from RTI. Ignoring it."); + already_blocked = true; + } else if (!already_blocked) { + // Do this only if lf_request_stop has not been called because it will + // prevent lf_request_stop from sending. + _fed.received_stop_request_from_rti = true; } -} + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); -/** - * Close the socket that receives incoming messages from the - * specified federate ID or RTI. This function should be called when a read - * of incoming socket fails or when an EOF is received. - * - * @param The ID of the peer federate sending messages to this - * federate, or -1 if the RTI. - */ -void _lf_close_inbound_socket(int fed_id) { - if (fed_id < 0) { - // socket connection is to the RTI. - int socket = _fed.socket_TCP_RTI; - // First, set the global socket to -1. - _fed.socket_TCP_RTI = -1; - // Then shutdown and close the socket. - shutdown(socket, SHUT_RDWR); - close(socket); - } else if (_fed.sockets_for_inbound_p2p_connections[fed_id] >= 0) { - shutdown(_fed.sockets_for_inbound_p2p_connections[fed_id], SHUT_RDWR); - close(_fed.sockets_for_inbound_p2p_connections[fed_id]); - _fed.sockets_for_inbound_p2p_connections[fed_id] = -1; + if (already_blocked) { + // Either we have sent a stop request to the RTI ourselves, + // or we have previously received a stop request from the RTI. + // Nothing more to do. Tag advance is already blocked on enclaves. + return; } -} -/** - * Handle a port absent message received from a remote federate. - * This just sets the last known status tag of the port specified - * in the message. - * - * @param socket The socket to read the message from - * @param buffer The buffer to read - * @param fed_id The sending federate ID or -1 if the centralized coordination. - */ -static void handle_port_absent_message(int socket, int fed_id) { - size_t bytes_to_read = sizeof(uint16_t) + sizeof(uint16_t) + sizeof(instant_t) + sizeof(microstep_t); - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(socket, bytes_to_read, buffer, - "Failed to read port absent message."); + // Iterate over the scheduling enclaves to find the maximum current tag + // and adjust the tag_to_stop if any of those is greater than tag_to_stop. + // If not done previously, block tag advance in the enclave. + environment_t *env; + int num_environments = _lf_get_environments(&env); + for (int i = 0; i < num_environments; i++) { + LF_MUTEX_LOCK(env[i].mutex); + if (lf_tag_compare(tag_to_stop, env[i].current_tag) <= 0) { + // Can't stop at the requested tag. Make a counteroffer. + tag_to_stop = env->current_tag; + tag_to_stop.microstep++; + } + // Set a barrier to prevent the enclave from advancing past the so-far tag to stop. + _lf_increment_tag_barrier_locked(&env[i], tag_to_stop); - // Extract the header information. - unsigned short port_id = extract_uint16(buffer); - // The next part of the message is the federate_id, but we don't need it. - // unsigned short federate_id = extract_uint16(&(buffer[sizeof(uint16_t)])); - tag_t intended_tag = extract_tag(&(buffer[sizeof(uint16_t)+sizeof(uint16_t)])); + LF_MUTEX_UNLOCK(env[i].mutex); + } + // Send the reply, which is the least tag at which we can stop. + unsigned char outgoing_buffer[MSG_TYPE_STOP_REQUEST_REPLY_LENGTH]; + ENCODE_STOP_REQUEST_REPLY(outgoing_buffer, tag_to_stop.time, tag_to_stop.microstep); // Trace the event when tracing is enabled - if (fed_id == -1) { - tracepoint_federate_from_rti(_fed.trace, receive_PORT_ABS, _lf_my_fed_id, &intended_tag); - } else { - tracepoint_federate_from_federate(_fed.trace, receive_PORT_ABS, _lf_my_fed_id, fed_id, &intended_tag); - } - LF_PRINT_LOG("Handling port absent for tag " PRINTF_TAG " for port %hu of fed %d.", - intended_tag.time - lf_time_start(), - intended_tag.microstep, - port_id, - fed_id - ); + tracepoint_federate_to_rti(_fed.trace, send_STOP_REQ_REP, _lf_my_fed_id, &tag_to_stop); - // Environment is always the one corresponding to the top-level scheduling enclave. - environment_t *env; - _lf_get_environments(&env); + // Send the current logical time to the RTI. + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + write_to_socket_fail_on_error( + &_fed.socket_TCP_RTI, MSG_TYPE_STOP_REQUEST_REPLY_LENGTH, outgoing_buffer, &lf_outbound_socket_mutex, + "Failed to send the answer to MSG_TYPE_STOP_REQUEST to RTI."); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); - lf_mutex_lock(&env->mutex); -#ifdef FEDERATED_DECENTRALIZED - trigger_t* network_input_port_action = _lf_action_for_port(port_id)->trigger; - if (lf_tag_compare(intended_tag, - network_input_port_action->last_known_status_tag) < 0) { - lf_mutex_unlock(&env->mutex); - } -#endif // In centralized coordination, a TAG message from the RTI - // can set the last_known_status_tag to a future tag where messages - // have not arrived yet. - // Set the mutex status as absent - update_last_known_status_on_input_port(intended_tag, port_id); - lf_mutex_unlock(&env->mutex); + LF_PRINT_DEBUG("Sent MSG_TYPE_STOP_REQUEST_REPLY to RTI with tag " PRINTF_TAG, + tag_to_stop.time, tag_to_stop.microstep); } /** - * Handle a message being received from a remote federate. - * - * This function assumes the caller does not hold the mutex lock. - * @param socket The socket to read the message from - * @param buffer The buffer to read - * @param fed_id The sending federate ID or -1 if the centralized coordination. + * Send a resign signal to the RTI. */ -void handle_message(int socket, int fed_id) { - // FIXME: Need better error handling? - // Read the header. - size_t bytes_to_read = sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t); - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(socket, bytes_to_read, buffer, - "Failed to read message header."); - - // Extract the header information. - unsigned short port_id; - unsigned short federate_id; - size_t length; - extract_header(buffer, &port_id, &federate_id, &length); - // Check if the message is intended for this federate - assert(_lf_my_fed_id == federate_id); - LF_PRINT_DEBUG("Receiving message to port %d of length %zu.", port_id, length); - - // Get the triggering action for the corresponding port - lf_action_base_t* action = _lf_action_for_port(port_id); - - // Read the payload. - // Allocate memory for the message contents. - unsigned char* message_contents = (unsigned char*)malloc(length); - read_from_socket_errexit(socket, length, message_contents, - "Failed to read message body."); - // Trace the event when tracing is enabled - tracepoint_federate_from_federate(_fed.trace, receive_P2P_MSG, _lf_my_fed_id, federate_id, NULL); - LF_PRINT_LOG("Message received by federate: %s. Length: %zu.", message_contents, length); - - LF_PRINT_DEBUG("Calling schedule for message received on a physical connection."); - _lf_schedule_value(action, 0, message_contents, length); +static void send_resign_signal(environment_t* env) { + size_t bytes_to_write = 1; + unsigned char buffer[bytes_to_write]; + buffer[0] = MSG_TYPE_RESIGN; + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + write_to_socket_fail_on_error( + &_fed.socket_TCP_RTI, bytes_to_write, &(buffer[0]), &lf_outbound_socket_mutex, + "Failed to send MSG_TYPE_RESIGN."); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + LF_PRINT_LOG("Resigned."); } -void stall_advance_level_federation(environment_t* env, size_t level) { - LF_PRINT_DEBUG("Acquiring the environment mutex."); - lf_mutex_lock(&env->mutex); - LF_PRINT_DEBUG("Waiting on MLAA with next_reaction_level %zu and MLAA %d.", level, max_level_allowed_to_advance); - while (((int) level) >= max_level_allowed_to_advance) { - lf_cond_wait(&port_status_changed); - }; - LF_PRINT_DEBUG("Exiting wait with MLAA %d and next_reaction_level %zu.", max_level_allowed_to_advance, level); - lf_mutex_unlock(&env->mutex); +/** + * Send a failed signal to the RTI. + */ +static void send_failed_signal(environment_t* env) { + size_t bytes_to_write = 1; + unsigned char buffer[bytes_to_write]; + buffer[0] = MSG_TYPE_FAILED; + write_to_socket_fail_on_error( + &_fed.socket_TCP_RTI, bytes_to_write, &(buffer[0]), NULL, + "Failed to send MSG_TYPE_FAILED."); + LF_PRINT_LOG("Failed."); } /** - * Handle a timed message being received from a remote federate via the RTI - * or directly from other federates. - * This will read the tag encoded in the header - * and calculate an offset to pass to the schedule function. - * This function assumes the caller does not hold the mutex lock. - * Instead of holding the mutex lock, this function calls - * _lf_increment_tag_barrier with the tag carried in - * the message header as an argument. This ensures that the current tag - * will not advance to the tag of the message if it is in the future, or - * the tag will not advance at all if the tag of the message is - * now or in the past. - * @param socket The socket to read the message from. - * @param buffer The buffer to read. - * @param fed_id The sending federate ID or -1 if the centralized coordination. + * @brief Stop the traces associated with all environments in the program. */ -void handle_tagged_message(int socket, int fed_id) { - // Environment is always the one corresponding to the top-level scheduling enclave. +static void stop_all_traces() { environment_t *env; - _lf_get_environments(&env); - - // FIXME: Need better error handling? - // Read the header which contains the timestamp. - size_t bytes_to_read = sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t) - + sizeof(instant_t) + sizeof(microstep_t); - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(socket, bytes_to_read, buffer, - "Failed to read timed message header"); - - // Extract the header information. - unsigned short port_id; - unsigned short federate_id; - size_t length; - tag_t intended_tag; - extract_timed_header(buffer, &port_id, &federate_id, &length, &intended_tag); - // Trace the event when tracing is enabled - if (fed_id == -1) { - tracepoint_federate_from_rti(_fed.trace, receive_TAGGED_MSG, _lf_my_fed_id, &intended_tag); - } else { - tracepoint_federate_from_federate(_fed.trace, receive_P2P_TAGGED_MSG, _lf_my_fed_id, fed_id, &intended_tag); - } - // Check if the message is intended for this federate - assert(_lf_my_fed_id == federate_id); - LF_PRINT_DEBUG("Receiving message to port %d of length %zu.", port_id, length); - - // Get the triggering action for the corresponding port - lf_action_base_t* action = _lf_action_for_port(port_id); - - // Record the physical time of arrival of the message - instant_t time_of_arrival = lf_time_physical(); - - if (action->trigger->is_physical) { - // Messages sent on physical connections should be handled via handle_message(). - lf_print_error_and_exit("Received a timed message on a physical connection."); + int num_envs = _lf_get_environments(&env); + for (int i = 0; i < num_envs; i++) { + stop_trace(env[i].trace); } +} -#ifdef FEDERATED_DECENTRALIZED - // Only applicable for federated programs with decentralized coordination: - // For logical connections in decentralized coordination, - // increment the barrier to prevent advancement of tag beyond - // the received tag if possible. The following function call - // suggests that the tag barrier be raised to the tag provided - // by the message. If this tag is in the past, the function will cause - // the tag to freeze at the current level. - // If something happens, make sure to release the barrier. - _lf_increment_tag_barrier(env, intended_tag); -#endif - LF_PRINT_LOG("Received message on port %d with tag: " PRINTF_TAG ", Current tag: " PRINTF_TAG ".", - port_id, intended_tag.time - start_time, intended_tag.microstep, - lf_time_logical_elapsed(env), env->current_tag.microstep); +/** + * Handle a failed signal from the RTI. The RTI will only fail + * if it is forced to exit, e.g. by a SIG_INT. Hence, this federate + * will exit immediately with an error condition, counting on the + * termination functions to handle any cleanup needed. + */ +static void handle_rti_failed_message(void) { + exit(1); +} - // Read the payload. - // Allocate memory for the message contents. - unsigned char* message_contents = (unsigned char*)malloc(length); - read_from_socket_errexit(socket, length, message_contents, - "Failed to read message body."); +/** + * Thread that listens for TCP inputs from the RTI. + * When messages arrive, this calls the appropriate handler. + * @param args Ignored + */ +static void* listen_to_rti_TCP(void* args) { + // Buffer for incoming messages. + // This does not constrain the message size + // because the message will be put into malloc'd memory. + unsigned char buffer[FED_COM_BUFFER_SIZE]; - // The following is only valid for string messages. - // LF_PRINT_DEBUG("Message received: %s.", message_contents); - - lf_mutex_lock(&env->mutex); - - action->trigger->physical_time_of_arrival = time_of_arrival; - - // Create a token for the message - lf_token_t* message_token = _lf_new_token((token_type_t*)action, message_contents, length); - - // FIXME: It might be enough to just check this field and not the status at all - update_last_known_status_on_input_port(intended_tag, port_id); - - // Check whether reactions need to be inserted directly into the reaction - // queue or a call to schedule is needed. This checks if the intended - // tag of the message is for the current tag or a tag that is already - // passed and if any port absent reaction is waiting on this port (or the - // execution hasn't even started). - // If the tag is intended for a tag that is passed, the port absent reactions - // would need to exit because only one message can be processed per tag, - // and that message is going to be a tardy message. The actual tardiness - // handling is done inside _lf_insert_reactions_for_trigger. - // To prevent multiple processing of messages per tag, - // we also need to check the port status. - // For example, there could be a case where current tag is - // 10 with a port absent reaction waiting, and a message has arrived with intended_tag 8. - // This message will eventually cause the port absent reaction to exit, but before that, - // a message with intended_tag of 9 could arrive before the port absent reaction has had a chance - // to exit. The port status is on the other hand changed in this thread, and thus, - // can be checked in this scenario without this race condition. The message with - // intended_tag of 9 in this case needs to wait one microstep to be processed. - if (lf_tag_compare(intended_tag, lf_tag(env)) <= 0 && // The event is meant for the current or a previous tag. - (action->trigger->status == unknown || // if the status of the port is still unknown. - _lf_execution_started == false) // Or, execution hasn't even started, so it's safe to handle this event. - ) { - // Since the message is intended for the current tag and a port absent reaction - // was waiting for the message, trigger the corresponding reactions for this - // message. - LF_PRINT_LOG( - "Inserting reactions directly at tag " PRINTF_TAG ". " - "Intended tag: " PRINTF_TAG ".", - env->current_tag.time - lf_time_start(), - env->current_tag.microstep, - intended_tag.time - lf_time_start(), - intended_tag.microstep - ); - action->trigger->intended_tag = intended_tag; - _lf_insert_reactions_for_trigger(env, action->trigger, message_token); - - // Set the status of the port as present here to inform the network input - // port absent reactions know that they no longer need to block. The reason for - // that is because the network receiver reaction is now in the reaction queue - // keeping the precedence order intact. - set_network_port_status(port_id, present); - - // Port is now present. Therefore, notify the level advancer to proceed - update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); - lf_cond_broadcast(&port_status_changed); - } else { - // If no port absent reaction is waiting for this message, or if the intended - // tag is in the future, use schedule functions to process the message. - - // Before that, if the current time >= stop time, discard the message. - // But only if the stop time is not equal to the start time! - if (lf_tag_compare(env->current_tag, env->stop_tag) >= 0) { - lf_print_error("Received message too late. Already at stop tag.\n" - "Current tag is " PRINTF_TAG " and intended tag is " PRINTF_TAG ".\n" - "Discarding message.", - env->current_tag.time - start_time, env->current_tag.microstep, - intended_tag.time - start_time, intended_tag.microstep); - goto release; + // Listen for messages from the federate. + while (1) { + // Check whether the RTI socket is still valid + if (_fed.socket_TCP_RTI < 0) { + lf_print_warning("Socket to the RTI unexpectedly closed."); + return NULL; } - - LF_PRINT_LOG("Calling schedule with tag " PRINTF_TAG ".", intended_tag.time - start_time, intended_tag.microstep); - schedule_message_received_from_network_locked(env, action->trigger, intended_tag, message_token); + // Read one byte to get the message type. + // This will exit if the read fails. + int read_failed = read_from_socket(_fed.socket_TCP_RTI, 1, buffer); + if (read_failed < 0) { + if (errno == ECONNRESET) { + lf_print_error("Socket connection to the RTI was closed by the RTI without" + " properly sending an EOF first. Considering this a soft error."); + // FIXME: If this happens, possibly a new RTI must be elected. + _fed.socket_TCP_RTI = -1; + return NULL; + } else { + lf_print_error("Socket connection to the RTI has been broken with error %d: %s." + " The RTI should close connections with an EOF first." + " Considering this a soft error.", + errno, + strerror(errno)); + // FIXME: If this happens, possibly a new RTI must be elected. + _fed.socket_TCP_RTI = -1; + return NULL; + } + } else if (read_failed > 0) { + // EOF received. + lf_print("Connection to the RTI closed with an EOF."); + _fed.socket_TCP_RTI = -1; + stop_all_traces(); + return NULL; + } + switch (buffer[0]) { + case MSG_TYPE_TAGGED_MESSAGE: + if (handle_tagged_message(&_fed.socket_TCP_RTI, -1)) { + // Failures to complete the read of messages from the RTI are fatal. + lf_print_error_and_exit("Failed to complete the reading of a message from the RTI."); + } + break; + case MSG_TYPE_TAG_ADVANCE_GRANT: + handle_tag_advance_grant(); + break; + case MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT: + handle_provisional_tag_advance_grant(); + break; + case MSG_TYPE_STOP_REQUEST: + handle_stop_request_message(); + break; + case MSG_TYPE_STOP_GRANTED: + handle_stop_granted_message(); + break; + case MSG_TYPE_PORT_ABSENT: + if (handle_port_absent_message(&_fed.socket_TCP_RTI, -1)) { + // Failures to complete the read of absent messages from the RTI are fatal. + lf_print_error_and_exit("Failed to complete the reading of an absent message from the RTI."); + } + break; + case MSG_TYPE_FAILED: + handle_rti_failed_message(); + break; + case MSG_TYPE_CLOCK_SYNC_T1: + case MSG_TYPE_CLOCK_SYNC_T4: + lf_print_error("Federate %d received unexpected clock sync message from RTI on TCP socket.", + _lf_my_fed_id); + break; + default: + lf_print_error_and_exit("Received from RTI an unrecognized TCP message type: %hhx.", buffer[0]); + // Trace the event when tracing is enabled + tracepoint_federate_from_rti(_fed.trace, receive_UNIDENTIFIED, _lf_my_fed_id, NULL); + } } - - release: -#ifdef FEDERATED_DECENTRALIZED // Only applicable for federated programs with decentralized coordination - // Finally, decrement the barrier to allow the execution to continue - // past the raised barrier - _lf_decrement_tag_barrier_locked(env); -#endif - - // The mutex is unlocked here after the barrier on - // logical time has been removed to avoid - // the need for unecessary lock and unlock - // operations. - lf_mutex_unlock(&env->mutex); + return NULL; } /** - * Handle a time advance grant (TAG) message from the RTI. - * This updates the last known status tag for each network input - * port, and broadcasts a signal, which may cause a blocking - * port absent reaction to unblock. - * - * In addition, this updates the last known TAG/PTAG and broadcasts - * a notification of this update, which may unblock whichever worker - * thread is trying to advance time. - * - * @note This function is very similar to handle_provisinal_tag_advance_grant() except that - * it sets last_TAG_was_provisional to false. + * Modify the specified tag, if necessary, to be an earlier tag based + * on the current physical time. The earlier tag is necessary if this federate + * has downstream federates and also has physical actions that may trigger + * outputs. In that case, the earlier tag will be the current physical time + * plus the minimum delay on all such physical actions plus any other delays + * along the path from the triggering physical action to the output port + * minus one nanosecond. The modified tag is assured of being less than any + * output tag that might later be produced. + * @param tag A pointer to the proposed NET. + * @return True if this federate requires this modification and the tag was + * modified. */ -void handle_tag_advance_grant(void) { - // Environment is always the one corresponding to the top-level scheduling enclave. - environment_t *env; - _lf_get_environments(&env); - - size_t bytes_to_read = sizeof(instant_t) + sizeof(microstep_t); - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, - "Failed to read tag advance grant from RTI."); - tag_t TAG = extract_tag(buffer); - - // Trace the event when tracing is enabled - tracepoint_federate_from_rti(_fed.trace, receive_TAG, _lf_my_fed_id, &TAG); - - lf_mutex_lock(&env->mutex); - - // Update the last known status tag of all network input ports - // to the TAG received from the RTI. Here we assume that the RTI - // knows the status of network ports up to and including the granted tag, - // so by extension, we assume that the federate can safely rely - // on the RTI to handle port statuses up until the granted tag. - update_last_known_status_on_input_ports(TAG); - - // It is possible for this federate to have received a PTAG - // earlier with the same tag as this TAG. - if (lf_tag_compare(TAG, _fed.last_TAG) >= 0) { - _fed.last_TAG.time = TAG.time; - _fed.last_TAG.microstep = TAG.microstep; - _fed.is_last_TAG_provisional = false; - LF_PRINT_LOG("Received Time Advance Grant (TAG): " PRINTF_TAG ".", - _fed.last_TAG.time - start_time, _fed.last_TAG.microstep); - } else { - lf_mutex_unlock(&env->mutex); - lf_print_error("Received a TAG " PRINTF_TAG " that wasn't larger " - "than the previous TAG or PTAG " PRINTF_TAG ". Ignoring the TAG.", - TAG.time - start_time, TAG.microstep, - _fed.last_TAG.time - start_time, _fed.last_TAG.microstep); +static bool bounded_NET(tag_t* tag) { + // The tag sent by this function is a promise that, absent + // inputs from another federate, this federate will not produce events + // earlier than t. But if there are downstream federates and there is + // a physical action (not counting receivers from upstream federates), + // then we can only promise up to current physical time (plus the minimum + // of all minimum delays on the physical actions). + // In this case, we send a NET message with the current physical time + // to permit downstream federates to advance. To avoid + // overwhelming the network, this NET message should be sent periodically + // at specified intervals controlled by the target parameter + // coordination-options: {advance-message-interval: time units}. + // The larger the interval, the more downstream federates will lag + // behind real time, but the less network traffic. If this option is + // missing, we issue a warning message suggesting that a redesign + // might be in order so that outputs don't depend on physical actions. + LF_PRINT_DEBUG("Checking NET to see whether it should be bounded by physical time." + " Min delay from physical action: " PRINTF_TIME ".", + _fed.min_delay_from_physical_action_to_federate_output); + if (_fed.min_delay_from_physical_action_to_federate_output >= 0LL + && _fed.has_downstream + ) { + // There is a physical action upstream of some output from this + // federate, and there is at least one downstream federate. + // Compare the tag to the current physical time. + instant_t physical_time = lf_time_physical(); + if (physical_time + _fed.min_delay_from_physical_action_to_federate_output < tag->time) { + // Can only promise up and not including this new time: + tag->time = physical_time + _fed.min_delay_from_physical_action_to_federate_output - 1L; + tag->microstep = 0; + LF_PRINT_LOG("Has physical actions that bound NET to " PRINTF_TAG ".", + tag->time - start_time, tag->microstep); + return true; + } } - // Notify everything that is blocked. - lf_cond_broadcast(&env->event_q_changed); - - lf_mutex_unlock(&env->mutex); + return false; } +////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////// +// Public functions (declared in reactor.h) +// An empty version of this function is code generated for unfederated execution. + /** - * Send a logical tag complete (LTC) message to the RTI - * unless an equal or later LTC has previously been sent. - * This function assumes the caller holds the mutex lock. - * - * @param tag_to_send The tag to send. + * Close sockets used to communicate with other federates, if they are open, + * and send a MSG_TYPE_RESIGN message to the RTI. This implements the function + * defined in reactor.h. For unfederated execution, the code generator + * generates an empty implementation. + * @param env The environment of the federate */ -void _lf_logical_tag_complete(tag_t tag_to_send) { - int compare_with_last_tag = lf_tag_compare(_fed.last_sent_LTC, tag_to_send); - if (compare_with_last_tag >= 0) { - return; +void terminate_execution(environment_t* env) { + assert(env != GLOBAL_ENVIRONMENT); + + // For an abnormal termination (e.g. a SIGINT), we need to send a + // MSG_TYPE_FAILED message to the RTI, but we should not acquire a mutex. + if (_fed.socket_TCP_RTI >= 0) { + if (_lf_normal_termination) { + tracepoint_federate_to_rti(_fed.trace, send_RESIGN, _lf_my_fed_id, &env->current_tag); + send_resign_signal(env); + } else { + tracepoint_federate_to_rti(_fed.trace, send_FAILED, _lf_my_fed_id, &env->current_tag); + send_failed_signal(env); + } } - LF_PRINT_LOG("Sending Logical Time Complete (LTC) " PRINTF_TAG " to the RTI.", - tag_to_send.time - start_time, - tag_to_send.microstep); - _lf_send_tag(MSG_TYPE_LOGICAL_TAG_COMPLETE, tag_to_send, true); - _fed.last_sent_LTC = tag_to_send; -} -bool update_max_level(tag_t tag, bool is_provisional) { - // This always needs the top-level environment, which will be env[0]. - environment_t *env; - _lf_get_environments(&env); - int prev_max_level_allowed_to_advance = max_level_allowed_to_advance; - max_level_allowed_to_advance = INT_MAX; - if ((lf_tag_compare(env->current_tag, tag) < 0) || ( - lf_tag_compare(env->current_tag, tag) == 0 && !is_provisional - )) { - LF_PRINT_DEBUG("Updated MLAA to %d at time " PRINTF_TIME ".", - max_level_allowed_to_advance, - lf_time_logical_elapsed(env) - ); - // Safe to complete the current tag - return (prev_max_level_allowed_to_advance != max_level_allowed_to_advance); + LF_PRINT_DEBUG("Closing incoming P2P sockets."); + // Close any incoming P2P sockets that are still open. + for (int i=0; i < NUMBER_OF_FEDERATES; i++) { + close_inbound_socket(i, 1); + // Ignore errors. Mark the socket closed. + _fed.sockets_for_inbound_p2p_connections[i] = -1; } -#ifdef FEDERATED_DECENTRALIZED - size_t action_table_size = _lf_action_table_size; - lf_action_base_t** action_table = _lf_action_table; -#else - size_t action_table_size = _lf_zero_delay_action_table_size; - lf_action_base_t** action_table = _lf_zero_delay_action_table; -#endif // FEDERATED_DECENTRALIZED - for (int i = 0; i < action_table_size; i++) { - lf_action_base_t* input_port_action = action_table[i]; -#ifdef FEDERATED_DECENTRALIZED - // In decentralized execution, if the current_tag is close enough to the - // start tag and there is a large enough delay on an incoming - // connection, then there is no need to block progress waiting for this - // port status. - if ( - (_lf_action_delay_table[i] == 0 && env->current_tag.time == start_time && env->current_tag.microstep == 0) - || (_lf_action_delay_table[i] > 0 && lf_tag_compare( - env->current_tag, - lf_delay_strict((tag_t) {.time=start_time, .microstep=0}, _lf_action_delay_table[i]) - ) <= 0) - ) { - continue; - } -#endif // FEDERATED_DECENTRALIZED - if (lf_tag_compare(env->current_tag, - input_port_action->trigger->last_known_status_tag) > 0 - && !input_port_action->trigger->is_physical) { - max_level_allowed_to_advance = LF_MIN( - max_level_allowed_to_advance, - ((int) LF_LEVEL(input_port_action->trigger->reactions[0]->index)) - ); - } + + // Check for all outgoing physical connections in + // _fed.sockets_for_outbound_p2p_connections and + // if the socket ID is not -1, the connection is still open. + // Send an EOF by closing the socket here. + for (int i=0; i < NUMBER_OF_FEDERATES; i++) { + + // Close outbound connections, in case they have not closed themselves. + // This will result in EOF being sent to the remote federate, except for + // abnormal termination, in which case it will just close the socket. + int flag = _lf_normal_termination? 1 : -1; + close_outbound_socket(i, flag); } - LF_PRINT_DEBUG("Updated MLAA to %d at time " PRINTF_TIME ".", - max_level_allowed_to_advance, - lf_time_logical_elapsed(env) - ); - return (prev_max_level_allowed_to_advance != max_level_allowed_to_advance); -} -#ifdef FEDERATED_DECENTRALIZED -/** - * @brief Return whether there exists an input port whose status is unknown. - * - * @param staa_elem A record of all input port actions. - */ -static bool a_port_is_unknown(staa_t* staa_elem) { - bool do_wait = false; - for (int j = 0; j < staa_elem->numActions; ++j) { - if (staa_elem->actions[j]->trigger->status == unknown) { - do_wait = true; - break; + LF_PRINT_DEBUG("Waiting for inbound p2p socket listener threads."); + // Wait for each inbound socket listener thread to close. + if (_fed.number_of_inbound_p2p_connections > 0 && _fed.inbound_socket_listeners != NULL) { + LF_PRINT_LOG("Waiting for %zu threads listening for incoming messages to exit.", + _fed.number_of_inbound_p2p_connections); + for (int i=0; i < _fed.number_of_inbound_p2p_connections; i++) { + // Ignoring errors here. + lf_thread_join(_fed.inbound_socket_listeners[i], NULL); } } - return do_wait; -} -#endif -/** - * @brief Return the port ID of the port associated with the given action. - */ -static int id_of_action(lf_action_base_t* input_port_action) { - for (int i = 0; 1; i++) { - if (_lf_action_for_port(i) == input_port_action) return i; + LF_PRINT_DEBUG("Waiting for RTI's socket listener threads."); + // Wait for the thread listening for messages from the RTI to close. + lf_thread_join(_fed.RTI_socket_listener, NULL); + + // For abnormal termination, there is no need to free memory. + if (_lf_normal_termination) { + LF_PRINT_DEBUG("Freeing memory occupied by the federate."); + free(_fed.inbound_socket_listeners); + free(federation_metadata.rti_host); + free(federation_metadata.rti_user); } - // There will be no UB buffer overrun because _lf_action_for_port(i) has a check. } -/** - * @brief Given a list of staa offsets and its associated triggers, - * have a single thread work to set ports to absent at a given logical time - * - */ -#ifdef FEDERATED_DECENTRALIZED -static void* update_ports_from_staa_offsets(void* args) { - environment_t *env; - int num_envs = _lf_get_environments(&env); - while (1) { - bool restart = false; - tag_t tag_when_started_waiting = lf_tag(env); - for (int i = 0; i < staa_lst_size; ++i) { - staa_t* staa_elem = staa_lst[i]; - interval_t wait_until_time = env->current_tag.time + staa_elem->STAA + _lf_fed_STA_offset - _lf_action_delay_table[i]; - lf_mutex_lock(&env->mutex); - // Both before and after the wait, check that the tag has not changed - if (a_port_is_unknown(staa_elem) && lf_tag_compare(lf_tag(env), tag_when_started_waiting) == 0 && wait_until(env, wait_until_time, &port_status_changed) && lf_tag_compare(lf_tag(env), tag_when_started_waiting) == 0) { - for (int j = 0; j < staa_elem->numActions; ++j) { - lf_action_base_t* input_port_action = staa_elem->actions[j]; - if (input_port_action->trigger->status == unknown) { - input_port_action->trigger->status = absent; - LF_PRINT_DEBUG("Assuming port absent at time %lld.", (long long) (lf_tag(env).time - start_time)); - update_last_known_status_on_input_port(lf_tag(env), id_of_action(input_port_action)); - update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); - lf_cond_broadcast(&port_status_changed); - } - } - lf_mutex_unlock(&env->mutex); - } else if (lf_tag_compare(lf_tag(env), tag_when_started_waiting) != 0) { - // We have committed to a new tag before we finish processing the list. Start over. - restart = true; - lf_mutex_unlock(&env->mutex); - break; + +////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////// +// Public functions (declared in federate.h, in alphabetical order) + +void lf_connect_to_federate(uint16_t remote_federate_id) { + int result = -1; + int count_retries = 0; + + // Ask the RTI for port number of the remote federate. + // The buffer is used for both sending and receiving replies. + // The size is what is needed for receiving replies. + unsigned char buffer[sizeof(int32_t) + INET_ADDRSTRLEN + 1]; + int port = -1; + struct in_addr host_ip_addr; + int count_tries = 0; + while (port == -1 && !_lf_termination_executed) { + buffer[0] = MSG_TYPE_ADDRESS_QUERY; + // NOTE: Sending messages in little endian. + encode_uint16(remote_federate_id, &(buffer[1])); + + LF_PRINT_DEBUG("Sending address query for federate %d.", remote_federate_id); + // Trace the event when tracing is enabled + tracepoint_federate_to_rti(_fed.trace, send_ADR_QR, _lf_my_fed_id, NULL); + + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + write_to_socket_fail_on_error( + &_fed.socket_TCP_RTI, sizeof(uint16_t) + 1, buffer, &lf_outbound_socket_mutex, + "Failed to send address query for federate %d to RTI.", + remote_federate_id); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + + // Read RTI's response. + read_from_socket_fail_on_error(&_fed.socket_TCP_RTI, sizeof(int32_t) + 1, buffer, NULL, + "Failed to read the requested port number for federate %d from RTI.", + remote_federate_id); + + if (buffer[0] != MSG_TYPE_ADDRESS_QUERY) { + // Unexpected reply. Could be that RTI has failed and sent a resignation. + if (buffer[0] == MSG_TYPE_FAILED) { + lf_print_error_and_exit("RTI has failed."); } else { - lf_mutex_unlock(&env->mutex); + lf_print_error_and_exit("Unexpected reply of type %hhu from RTI (see net_common.h).", buffer[0]); } } - if (restart) continue; + port = extract_int32(&buffer[1]); - lf_mutex_lock(&env->mutex); - while (lf_tag_compare(lf_tag(env), tag_when_started_waiting) == 0) { - lf_cond_wait(&logical_time_changed); + read_from_socket_fail_on_error( + &_fed.socket_TCP_RTI, sizeof(host_ip_addr), (unsigned char*)&host_ip_addr, NULL, + "Failed to read the IP address for federate %d from RTI.", + remote_federate_id); + + // A reply of -1 for the port means that the RTI does not know + // the port number of the remote federate, presumably because the + // remote federate has not yet sent an MSG_TYPE_ADDRESS_ADVERTISEMENT message to the RTI. + // Sleep for some time before retrying. + if (port == -1) { + if (count_tries++ >= CONNECT_MAX_RETRIES) { + lf_print_error_and_exit("TIMEOUT obtaining IP/port for federate %d from the RTI.", + remote_federate_id); + } + // Wait ADDRESS_QUERY_RETRY_INTERVAL nanoseconds. + lf_sleep(ADDRESS_QUERY_RETRY_INTERVAL); } - lf_mutex_unlock(&env->mutex); } -} + assert(port < 65536); + assert(port > 0); + uint16_t uport = (uint16_t)port; -/** - * @brief Spawns a thread to iterate through STAA structs, setting its associated ports absent - * at an offset if the port is not present with a value by a certain physical time. - * - */ -void spawn_staa_thread(){ - lf_thread_create(&_fed.staaSetter, update_ports_from_staa_offsets, NULL); -} +#if LOG_LEVEL > 3 + // Print the received IP address in a human readable format + // Create the human readable format of the received address. + // This is avoided unless LOG_LEVEL is high enough to + // subdue the overhead caused by inet_ntop(). + char hostname[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, &host_ip_addr, hostname, INET_ADDRSTRLEN); + LF_PRINT_LOG("Received address %s port %d for federate %d from RTI.", + hostname, uport, remote_federate_id); #endif -/** - * Handle a provisional tag advance grant (PTAG) message from the RTI. - * This updates the last known TAG/PTAG and broadcasts - * a notification of this update, which may unblock whichever worker - * thread is trying to advance time. - * If current_time is less than the specified PTAG, then this will - * also insert into the event_q a dummy event with the specified tag. - * This will ensure that the federate advances time to the specified - * tag and, for centralized coordination, inserts blocking reactions - * and null-message-sending output reactions at that tag. - * - * @note This function is similar to handle_tag_advance_grant() except that - * it sets last_TAG_was_provisional to true and also it does not update the - * last known tag for input ports. - */ -void handle_provisional_tag_advance_grant() { - // Environment is always the one corresponding to the top-level scheduling enclave. - environment_t *env; - _lf_get_environments(&env); + // Iterate until we either successfully connect or exceed the number of + // attempts given by CONNECT_MAX_RETRIES. + int socket_id = -1; + while (result < 0 && !_lf_termination_executed) { + // Create an IPv4 socket for TCP (not UDP) communication over IP (0). + socket_id = create_real_time_tcp_socket_errexit(); - size_t bytes_to_read = sizeof(instant_t) + sizeof(microstep_t); - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, - "Failed to read provisional tag advance grant from RTI."); - tag_t PTAG = extract_tag(buffer); + // Server file descriptor. + struct sockaddr_in server_fd; + // Zero out the server_fd struct. + bzero((char*)&server_fd, sizeof(server_fd)); - // Trace the event when tracing is enabled - tracepoint_federate_from_rti(_fed.trace, receive_PTAG, _lf_my_fed_id, &PTAG); + // Set up the server_fd fields. + server_fd.sin_family = AF_INET; // IPv4 + server_fd.sin_addr = host_ip_addr; // Received from the RTI - // Note: it is important that last_known_status_tag of ports does not - // get updated to a PTAG value because a PTAG does not indicate that - // the RTI knows about the status of all ports up to and _including_ - // the value of PTAG. Only a TAG message indicates that. - lf_mutex_lock(&env->mutex); + // Convert the port number from host byte order to network byte order. + server_fd.sin_port = htons(uport); + result = connect( + socket_id, + (struct sockaddr *)&server_fd, + sizeof(server_fd)); - // Sanity check - if (lf_tag_compare(PTAG, _fed.last_TAG) < 0 - || (lf_tag_compare(PTAG, _fed.last_TAG) == 0 && !_fed.is_last_TAG_provisional)) { - lf_mutex_unlock(&env->mutex); - lf_print_error_and_exit("Received a PTAG " PRINTF_TAG " that is equal or earlier " - "than an already received TAG " PRINTF_TAG ".", - PTAG.time, PTAG.microstep, - _fed.last_TAG.time, _fed.last_TAG.microstep); - } + if (result != 0) { + lf_print_error("Failed to connect to federate %d on port %d.", remote_federate_id, uport); - _fed.last_TAG = PTAG; - _fed.is_last_TAG_provisional = true; - LF_PRINT_LOG("At tag " PRINTF_TAG ", received Provisional Tag Advance Grant (PTAG): " PRINTF_TAG ".", - env->current_tag.time - start_time, env->current_tag.microstep, - _fed.last_TAG.time - start_time, _fed.last_TAG.microstep); + // Try again after some time if the connection failed. + // Note that this should not really happen since the remote federate should be + // accepting socket connections. But possibly it will be busy (in process of accepting + // another socket connection?). Hence, we retry. + count_retries++; + if (count_retries > CONNECT_MAX_RETRIES) { + // If the remote federate is not accepting the connection after CONNECT_MAX_RETRIES + // treat it as a soft error condition and return. + lf_print_error("Failed to connect to federate %d after %d retries. Giving up.", + remote_federate_id, CONNECT_MAX_RETRIES); + return; + } + lf_print_warning("Could not connect to federate %d. Will try again every %lld nanoseconds.\n", + remote_federate_id, ADDRESS_QUERY_RETRY_INTERVAL); + + // Check whether the RTI is still there. + if (rti_failed()) break; - // Even if we don't modify the event queue, we need to broadcast a change - // because we do not need to continue to wait for a TAG. - lf_cond_broadcast(&env->event_q_changed); - // Notify level advance thread which is blocked. - update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); - lf_cond_broadcast(&port_status_changed); + // Wait ADDRESS_QUERY_RETRY_INTERVAL nanoseconds. + lf_sleep(ADDRESS_QUERY_RETRY_INTERVAL); + } else { + // Connect was successful. + size_t buffer_length = 1 + sizeof(uint16_t) + 1; + unsigned char buffer[buffer_length]; + buffer[0] = MSG_TYPE_P2P_SENDING_FED_ID; + if (_lf_my_fed_id > UINT16_MAX) { + // This error is very unlikely to occur. + lf_print_error_and_exit("Too many federates! More than %d.", UINT16_MAX); + } + encode_uint16((uint16_t)_lf_my_fed_id, (unsigned char*)&(buffer[1])); + unsigned char federation_id_length = (unsigned char)strnlen(federation_metadata.federation_id, 255); + buffer[sizeof(uint16_t) + 1] = federation_id_length; + // Trace the event when tracing is enabled + tracepoint_federate_to_federate(_fed.trace, send_FED_ID, _lf_my_fed_id, remote_federate_id, NULL); + + // No need for a mutex because we have the only handle on the socket. + write_to_socket_fail_on_error(&socket_id, + buffer_length, buffer, NULL, + "Failed to send fed_id to federate %d.", remote_federate_id); + write_to_socket_fail_on_error(&socket_id, + federation_id_length, (unsigned char*)federation_metadata.federation_id, NULL, + "Failed to send federation id to federate %d.", + remote_federate_id); - // Possibly insert a dummy event into the event queue if current time is behind - // (which it should be). Do not do this if the federate has not fully - // started yet. + read_from_socket_fail_on_error(&socket_id, 1, (unsigned char*)buffer, NULL, + "Failed to read MSG_TYPE_ACK from federate %d in response to sending fed_id.", + remote_federate_id); + if (buffer[0] != MSG_TYPE_ACK) { + // Get the error code. + read_from_socket_fail_on_error(&socket_id, 1, (unsigned char*)buffer, NULL, + "Failed to read error code from federate %d in response to sending fed_id.", remote_federate_id); + lf_print_error("Received MSG_TYPE_REJECT message from remote federate (%d).", buffer[0]); + result = -1; + continue; + } else { + lf_print("Connected to federate %d, port %d.", remote_federate_id, port); + // Trace the event when tracing is enabled + tracepoint_federate_to_federate(_fed.trace, receive_ACK, _lf_my_fed_id, remote_federate_id, NULL); + } + } + } + // Once we set this variable, then all future calls to close() on this + // socket ID should reset it to -1 within a critical section. + _fed.sockets_for_outbound_p2p_connections[remote_federate_id] = socket_id; +} - instant_t dummy_event_time = PTAG.time; - microstep_t dummy_event_relative_microstep = PTAG.microstep; +void lf_connect_to_rti(const char* hostname, int port) { + LF_PRINT_LOG("Connecting to the RTI."); - if (lf_tag_compare(env->current_tag, PTAG) == 0) { - // The current tag can equal the PTAG if we are at the start time - // or if this federate has been able to advance time to the current - // tag (e.g., it has no upstream federates). In either case, either - // it is already treating the current tag as PTAG cycle (e.g. at the - // start time) or it will be completing the current cycle and sending - // a LTC message shortly. In either case, there is nothing more to do. - lf_mutex_unlock(&env->mutex); - return; - } else if (lf_tag_compare(env->current_tag, PTAG) > 0) { - // Current tag is greater than the PTAG. - // It could be that we have sent an LTC that crossed with the incoming - // PTAG or that we have advanced to a tag greater than the PTAG. - // In the former case, there is nothing more to do. - // In the latter case, we may be blocked processing a PTAG cycle at - // a greater tag or we may be in the middle of processing a regular - // TAG. In either case, we know that at the PTAG tag, all outputs - // have either been sent or are absent, so we can send an LTC. - // Send an LTC to indicate absent outputs. - _lf_logical_tag_complete(PTAG); - // Nothing more to do. - lf_mutex_unlock(&env->mutex); - return; - } else if (PTAG.time == env->current_tag.time) { - // We now know env->current_tag < PTAG, but the times are equal. - // Adjust the microstep for scheduling the dummy event. - dummy_event_relative_microstep -= env->current_tag.microstep; - } - // We now know env->current_tag < PTAG. + // Override passed hostname and port if passed as runtime arguments. + hostname = federation_metadata.rti_host ? federation_metadata.rti_host : hostname; + port = federation_metadata.rti_port >= 0 ? federation_metadata.rti_port : port; - if (dummy_event_time != FOREVER) { - // Schedule a dummy event at the specified time and (relative) microstep. - LF_PRINT_DEBUG("At tag " PRINTF_TAG ", inserting into the event queue a dummy event " - "with time " PRINTF_TIME " and (relative) microstep " PRINTF_MICROSTEP ".", - env->current_tag.time - start_time, env->current_tag.microstep, - dummy_event_time - start_time, dummy_event_relative_microstep); - // Dummy event points to a NULL trigger and NULL real event. - event_t* dummy = _lf_create_dummy_events(env, - NULL, dummy_event_time, NULL, dummy_event_relative_microstep); - pqueue_insert(env->event_q, dummy); + // Adjust the port. + uint16_t uport = 0; + if (port < 0 || port > INT16_MAX) { + lf_print_error( + "lf_connect_to_rti(): Specified port (%d) is out of range," + " using the default port %d instead.", + port, DEFAULT_PORT + ); + uport = DEFAULT_PORT; + port = 0; // Mark so that increments occur between tries. + } else { + uport = (uint16_t)port; + } + if (uport == 0) { + uport = DEFAULT_PORT; } - lf_mutex_unlock(&env->mutex); -} + // Create a socket + _fed.socket_TCP_RTI = create_real_time_tcp_socket_errexit(); + + int result = -1; + int count_retries = 0; + struct addrinfo* res = NULL; + + while (count_retries++ < CONNECT_MAX_RETRIES && !_lf_termination_executed) { + if (res != NULL) { + // This is a repeated attempt. + if (_fed.socket_TCP_RTI >= 0) close_rti_socket(); + + lf_sleep(CONNECT_RETRY_INTERVAL); + + // Create a new socket. + _fed.socket_TCP_RTI = create_real_time_tcp_socket_errexit(); + + if (port == 0) { + // Free previously allocated address info. + freeaddrinfo(res); + // Increment the port number. + uport++; + if (uport >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) uport = DEFAULT_PORT; + + // Reconstruct the address info. + rti_address(hostname, uport, &res); + } + lf_print("Trying RTI again on port %d (attempt %d).", uport, count_retries); + } else { + // This is the first attempt. + rti_address(hostname, uport, &res); + } -/** - * Send a MSG_TYPE_STOP_REQUEST message to the RTI with payload equal - * to the specified tag plus one microstep. If this federate has previously - * received a stop request from the RTI, then do not send the message and - * return 1. Return -1 if the socket is disconnected. Otherwise, return 0. - * @return 0 if the message is sent. - */ -int _lf_fd_send_stop_request_to_rti(tag_t stop_tag) { + result = connect(_fed.socket_TCP_RTI, res->ai_addr, res->ai_addrlen); + if (result < 0) continue; // Connect failed. - // Send a stop request with the specified tag to the RTI - unsigned char buffer[MSG_TYPE_STOP_REQUEST_LENGTH]; - // Stop at the next microstep - ENCODE_STOP_REQUEST(buffer, stop_tag.time, stop_tag.microstep + 1); + // Have connected to an RTI, but not sure it's the right RTI. + // Send a MSG_TYPE_FED_IDS message and wait for a reply. + // Notify the RTI of the ID of this federate and its federation. - lf_mutex_lock(&outbound_socket_mutex); - // Do not send a stop request if a stop request has been previously received from the RTI. - if (!_fed.received_stop_request_from_rti) { - LF_PRINT_LOG("Sending to RTI a MSG_TYPE_STOP_REQUEST message with tag " PRINTF_TAG ".", - stop_tag.time - start_time, - stop_tag.microstep); +#ifdef FEDERATED_AUTHENTICATED + LF_PRINT_LOG("Connected to an RTI. Performing HMAC-based authentication using federation ID."); + if (perform_hmac_authentication()) { + if (port == 0) { + continue; // Try again with a new port. + } else { + // No point in trying again because it will be the same port. + close_rti_socket(); + lf_print_error_and_exit("Authentication failed."); + } + } +#else + LF_PRINT_LOG("Connected to an RTI. Sending federation ID for authentication."); +#endif - if (_fed.socket_TCP_RTI < 0) { - lf_print_warning("Socket is no longer connected. Dropping message."); - lf_mutex_unlock(&outbound_socket_mutex); - return -1; + // Send the message type first. + unsigned char buffer[4]; + buffer[0] = MSG_TYPE_FED_IDS; + // Next send the federate ID. + if (_lf_my_fed_id > UINT16_MAX) { + lf_print_error_and_exit("Too many federates! More than %d.", UINT16_MAX); } + encode_uint16((uint16_t)_lf_my_fed_id, &buffer[1]); + // Next send the federation ID length. + // The federation ID is limited to 255 bytes. + size_t federation_id_length = strnlen(federation_metadata.federation_id, 255); + buffer[1 + sizeof(uint16_t)] = (unsigned char)(federation_id_length & 0xff); + // Trace the event when tracing is enabled - tracepoint_federate_to_rti(_fed.trace, send_STOP_REQ, _lf_my_fed_id, &stop_tag); - write_to_socket_with_mutex(_fed.socket_TCP_RTI, MSG_TYPE_STOP_REQUEST_LENGTH, - buffer, &outbound_socket_mutex, - "Failed to send stop time " PRINTF_TIME " to the RTI.", stop_tag.time - start_time); - lf_mutex_unlock(&outbound_socket_mutex); - return 0; - } else { - lf_mutex_unlock(&outbound_socket_mutex); - return 1; - } -} + tracepoint_federate_to_rti(_fed.trace, send_FED_ID, _lf_my_fed_id, NULL); -/** - * Handle a MSG_TYPE_STOP_GRANTED message from the RTI. - * - * This function removes the global barrier on - * logical time raised when lf_request_stop() was - * called in the environment for each enclave. - */ -void handle_stop_granted_message() { + // No need for a mutex here because no other threads are writing to this socket. + if (write_to_socket(_fed.socket_TCP_RTI, 2 + sizeof(uint16_t), buffer)) { + continue; // Try again, possibly on a new port. + } - size_t bytes_to_read = MSG_TYPE_STOP_GRANTED_LENGTH - 1; - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, - "Failed to read stop granted from RTI."); + // Next send the federation ID itself. + if (write_to_socket( + _fed.socket_TCP_RTI, + federation_id_length, + (unsigned char*)federation_metadata.federation_id)) { + continue; // Try again. + } - tag_t received_stop_tag = extract_tag(buffer); + // Wait for a response. + // The response will be MSG_TYPE_REJECT if the federation ID doesn't match. + // Otherwise, it will be either MSG_TYPE_ACK or MSG_TYPE_UDP_PORT, where the latter + // is used if clock synchronization will be performed. + unsigned char response; - // Trace the event when tracing is enabled - tracepoint_federate_from_rti(_fed.trace, receive_STOP_GRN, _lf_my_fed_id, &received_stop_tag); + LF_PRINT_DEBUG("Waiting for response to federation ID from the RTI."); - LF_PRINT_LOG("Received from RTI a MSG_TYPE_STOP_GRANTED message with elapsed tag " PRINTF_TAG ".", - received_stop_tag.time - start_time, received_stop_tag.microstep); + if (read_from_socket(_fed.socket_TCP_RTI, 1, &response)) { + continue; // Try again. + } + if (response == MSG_TYPE_REJECT) { + // Trace the event when tracing is enabled + tracepoint_federate_from_rti(_fed.trace, receive_REJECT, _lf_my_fed_id, NULL); + // Read one more byte to determine the cause of rejection. + unsigned char cause; + read_from_socket_fail_on_error(&_fed.socket_TCP_RTI, 1, &cause, NULL, + "Failed to read the cause of rejection by the RTI."); + if (cause == FEDERATION_ID_DOES_NOT_MATCH || cause == WRONG_SERVER) { + lf_print_warning("Connected to the wrong RTI on port %d. Will try again", uport); + continue; + } + } else if (response == MSG_TYPE_ACK) { + // Trace the event when tracing is enabled + tracepoint_federate_from_rti(_fed.trace, receive_ACK, _lf_my_fed_id, NULL); + LF_PRINT_LOG("Received acknowledgment from the RTI."); + break; + } else if (response == MSG_TYPE_RESIGN) { + lf_print_warning("RTI on port %d resigned. Will try again", uport); + continue; + } else { + lf_print_warning("RTI on port %d gave unexpect response %u. Will try again", uport, response); + continue; + } + } + if (result < 0) { + lf_print_error_and_exit("Failed to connect to RTI after %d tries.", CONNECT_MAX_RETRIES); + } - environment_t *env; - int num_environments = _lf_get_environments(&env); + freeaddrinfo(res); /* No longer needed */ - for (int i = 0; i < num_environments; i++) { - lf_mutex_lock(&env[i].mutex); + // Call a generated (external) function that sends information + // about connections between this federate and other federates + // where messages are routed through the RTI. + // @see MSG_TYPE_NEIGHBOR_STRUCTURE in net_common.h + lf_send_neighbor_structure_to_RTI(_fed.socket_TCP_RTI); - // Sanity check. - if (lf_tag_compare(received_stop_tag, env[i].current_tag) <= 0) { - lf_print_error("RTI granted a MSG_TYPE_STOP_GRANTED tag that is equal to or less than this federate's current tag " PRINTF_TAG ". " - "Stopping at the next microstep instead.", - env[i].current_tag.time - start_time, env[i].current_tag.microstep); - received_stop_tag = env[i].current_tag; - received_stop_tag.microstep++; - } + uint16_t udp_port = setup_clock_synchronization_with_rti(); - _lf_set_stop_tag(&env[i], received_stop_tag); - LF_PRINT_DEBUG("Setting the stop tag to " PRINTF_TAG ".", - env[i].stop_tag.time - start_time, - env[i].stop_tag.microstep); + // Write the returned port number to the RTI + unsigned char UDP_port_number[1 + sizeof(uint16_t)]; + UDP_port_number[0] = MSG_TYPE_UDP_PORT; + encode_uint16(udp_port, &(UDP_port_number[1])); + write_to_socket_fail_on_error(&_fed.socket_TCP_RTI, 1 + sizeof(uint16_t), UDP_port_number, NULL, + "Failed to send the UDP port number to the RTI."); - if (env[i].barrier.requestors) _lf_decrement_tag_barrier_locked(&env[i]); - // We signal instead of broadcast under the assumption that only - // one worker thread can call wait_until at a given time because - // the call to wait_until is protected by a mutex lock - lf_cond_signal(&env[i].event_q_changed); - lf_mutex_unlock(&env[i].mutex); - } + lf_print("Connected to RTI at %s:%d.", hostname, uport); } -/** - * Handle a MSG_TYPE_STOP_REQUEST message from the RTI. - */ -void handle_stop_request_message() { - size_t bytes_to_read = MSG_TYPE_STOP_REQUEST_LENGTH - 1; - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, - "Failed to read stop request from RTI."); - tag_t tag_to_stop = extract_tag(buffer); +void lf_create_server(int specified_port) { + assert(specified_port <= UINT16_MAX && specified_port >= 0); + uint16_t port = (uint16_t)specified_port; + LF_PRINT_LOG("Creating a socket server on port %d.", port); + // Create an IPv4 socket for TCP (not UDP) communication over IP (0). + int socket_descriptor = create_real_time_tcp_socket_errexit(); - // Trace the event when tracing is enabled - tracepoint_federate_from_rti(_fed.trace, receive_STOP_REQ, _lf_my_fed_id, &tag_to_stop); - LF_PRINT_LOG("Received from RTI a MSG_TYPE_STOP_REQUEST message with tag " PRINTF_TAG ".", - tag_to_stop.time - start_time, - tag_to_stop.microstep); + // Server file descriptor. + struct sockaddr_in server_fd; + // Zero out the server address structure. + bzero((char*)&server_fd, sizeof(server_fd)); - // If we have previously received from the RTI a stop request, - // or we have previously sent a stop request to the RTI, - // then we have already blocked tag advance in enclaves. - // Do not do this twice. The record of whether the first has occurred - // is guarded by the outbound socket mutex. - // The second is guarded by the global mutex. - // Note that the RTI should not send stop requests more than once to federates. - lf_mutex_lock(&outbound_socket_mutex); - bool already_blocked = false; - if (_fed.received_stop_request_from_rti) { - already_blocked = true; - } - _fed.received_stop_request_from_rti = true; - lf_mutex_unlock(&outbound_socket_mutex); + server_fd.sin_family = AF_INET; // IPv4 + server_fd.sin_addr.s_addr = INADDR_ANY; // All interfaces, 0.0.0.0. + // Convert the port number from host byte order to network byte order. + server_fd.sin_port = htons(port); - extern lf_mutex_t global_mutex; - extern bool lf_stop_requested; - lf_mutex_lock(&global_mutex); - if (lf_stop_requested) { - already_blocked = true; + int result = bind( + socket_descriptor, + (struct sockaddr *) &server_fd, + sizeof(server_fd)); + int count = 0; + while (result < 0 && count++ < PORT_BIND_RETRY_LIMIT) { + lf_sleep(PORT_BIND_RETRY_INTERVAL); + result = bind( + socket_descriptor, + (struct sockaddr *) &server_fd, + sizeof(server_fd)); + } + if (result < 0) { + lf_print_error_and_exit("Failed to bind socket on port %d.", port); } - lf_mutex_unlock(&global_mutex); - - // Iterate over the scheduling enclaves to find the maximum current tag - // and adjust the tag_to_stop if any of those is greater than tag_to_stop. - // If not done previously, block tag advance in the enclave. - environment_t *env; - int num_environments = _lf_get_environments(&env); - for (int i = 0; i < num_environments; i++) { - lf_mutex_lock(&env[i].mutex); - if (lf_tag_compare(tag_to_stop, env[i].current_tag) <= 0) { - // Can't stop at the requested tag. Make a counteroffer. - tag_to_stop = env->current_tag; - tag_to_stop.microstep++; - } - if (!already_blocked) { - // Set a barrier to prevent the enclave from advancing past the so-far tag to stop. - _lf_increment_tag_barrier_locked(&env[i], tag_to_stop); + // Set the global server port. + if (specified_port == 0) { + // Need to retrieve the port number assigned by the OS. + struct sockaddr_in assigned; + socklen_t addr_len = sizeof(assigned); + if (getsockname(socket_descriptor, (struct sockaddr *) &assigned, &addr_len) < 0) { + lf_print_error_and_exit("Failed to retrieve assigned port number."); } - lf_mutex_unlock(&env[i].mutex); + _fed.server_port = ntohs(assigned.sin_port); + } else { + _fed.server_port = port; } - // Send the reply, which is the least tag at which we can stop. - unsigned char outgoing_buffer[MSG_TYPE_STOP_REQUEST_REPLY_LENGTH]; - ENCODE_STOP_REQUEST_REPLY(outgoing_buffer, tag_to_stop.time, tag_to_stop.microstep); - lf_mutex_lock(&outbound_socket_mutex); - if (_fed.socket_TCP_RTI < 0) { - lf_print_warning("Socket is no longer connected. Dropping message."); - lf_mutex_unlock(&outbound_socket_mutex); - return; - } + // Enable listening for socket connections. + // The second argument is the maximum number of queued socket requests, + // which according to the Mac man page is limited to 128. + listen(socket_descriptor, 128); + + LF_PRINT_LOG("Server for communicating with other federates started using port %d.", _fed.server_port); + + // Send the server port number to the RTI + // on an MSG_TYPE_ADDRESS_ADVERTISEMENT message (@see net_common.h). + unsigned char buffer[sizeof(int32_t) + 1]; + buffer[0] = MSG_TYPE_ADDRESS_ADVERTISEMENT; + encode_int32(_fed.server_port, &(buffer[1])); + // Trace the event when tracing is enabled - tracepoint_federate_to_rti(_fed.trace, send_STOP_REQ_REP, _lf_my_fed_id, &tag_to_stop); - // Send the current logical time to the RTI. This message does not have an identifying byte - // since the RTI is waiting for a response from this federate. - write_to_socket_with_mutex( - _fed.socket_TCP_RTI, MSG_TYPE_STOP_REQUEST_REPLY_LENGTH, outgoing_buffer, &outbound_socket_mutex, - "Failed to send the answer to MSG_TYPE_STOP_REQUEST to RTI."); - lf_mutex_unlock(&outbound_socket_mutex); + tracepoint_federate_to_rti(_fed.trace, send_ADR_AD, _lf_my_fed_id, NULL); + + // No need for a mutex because we have the only handle on this socket. + write_to_socket_fail_on_error(&_fed.socket_TCP_RTI, sizeof(int32_t) + 1, (unsigned char*)buffer, NULL, + "Failed to send address advertisement."); + + LF_PRINT_DEBUG("Sent port %d to the RTI.", _fed.server_port); + + // Set the global server socket + _fed.server_socket = socket_descriptor; } -/** - * Close sockets used to communicate with other federates, if they are open, - * and send a MSG_TYPE_RESIGN message to the RTI. This implements the function - * defined in reactor.h. For unfederated execution, the code generator - * generates an empty implementation. - * @param env The environment of the federate - */ -void terminate_execution(environment_t* env) { +void lf_enqueue_port_absent_reactions(environment_t* env){ assert(env != GLOBAL_ENVIRONMENT); - - // Check for all outgoing physical connections in - // _fed.sockets_for_outbound_p2p_connections and - // if the socket ID is not -1, the connection is still open. - // Send an EOF by closing the socket here. - // NOTE: It is dangerous to acquire a mutex in a termination - // process because it can block program exit if a deadlock occurs. - // Hence, it is paramount that these mutexes not allow for any - // possibility of deadlock. To ensure this, this - // function should NEVER be called while holding any mutex lock. - lf_mutex_lock(&outbound_socket_mutex); - for (int i=0; i < NUMBER_OF_FEDERATES; i++) { - // Close outbound connections, in case they have not closed themselves. - // This will result in EOF being sent to the remote federate, I think. - _lf_close_outbound_socket(i); +#ifdef FEDERATED_CENTRALIZED + if (!_fed.has_downstream) { + // This federate is not connected to any downstream federates via a + // logical connection. No need to trigger port absent + // reactions. + return; } - // Resign the federation, which will close the socket to the RTI. - if (_fed.socket_TCP_RTI >= 0) { - size_t bytes_to_write = 1 + sizeof(tag_t); - unsigned char buffer[bytes_to_write]; - buffer[0] = MSG_TYPE_RESIGN; - tag_t tag = env->current_tag; - encode_tag(&(buffer[1]), tag); - // Trace the event when tracing is enabled - tracepoint_federate_to_rti(_fed.trace, send_RESIGN, _lf_my_fed_id, &tag); - ssize_t written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, &(buffer[0])); - if (written == bytes_to_write) { - LF_PRINT_LOG("Resigned."); +#endif + LF_PRINT_DEBUG("Enqueueing port absent reactions at time %lld.", (long long) (env->current_tag.time - start_time)); + if (num_port_absent_reactions == 0) { + LF_PRINT_DEBUG("No port absent reactions."); + return; + } + for (int i = 0; i < num_port_absent_reactions; i++) { + reaction_t* reaction = port_absent_reaction[i]; + if (reaction && reaction->status == inactive) { + LF_PRINT_DEBUG("Inserting port absent reaction on reaction queue."); + lf_scheduler_trigger_reaction(env->scheduler, reaction, -1); } } - lf_mutex_unlock(&outbound_socket_mutex); +} - LF_PRINT_DEBUG("Requesting closing of incoming P2P sockets."); - // Request closing the incoming P2P sockets. - for (int i=0; i < NUMBER_OF_FEDERATES; i++) { - if (_lf_request_close_inbound_socket(i) == 0) { - // Sending the close request failed. Mark the socket closed. - _fed.sockets_for_inbound_p2p_connections[i] = -1; +void* lf_handle_p2p_connections_from_federates(void* env_arg) { + assert(env_arg); + environment_t* env = (environment_t *) env_arg; + int received_federates = 0; + // Allocate memory to store thread IDs. + _fed.inbound_socket_listeners = (lf_thread_t*)calloc(_fed.number_of_inbound_p2p_connections, sizeof(lf_thread_t)); + while (received_federates < _fed.number_of_inbound_p2p_connections && !_lf_termination_executed) { + // Wait for an incoming connection request. + struct sockaddr client_fd; + uint32_t client_length = sizeof(client_fd); + int socket_id = accept(_fed.server_socket, &client_fd, &client_length); + + if (socket_id < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) { + if (rti_failed()) break; + else continue; // Try again. + } else if (errno == EPERM) { + lf_print_error_system_failure("Firewall permissions prohibit connection."); + } else { + lf_print_error_system_failure("A fatal error occurred while accepting a new socket."); + } } - } + LF_PRINT_LOG("Accepted new connection from remote federate."); - LF_PRINT_DEBUG("Waiting for inbound p2p socket listener threads."); - // Wait for each inbound socket listener thread to close. - if (_fed.number_of_inbound_p2p_connections > 0) { - LF_PRINT_LOG("Waiting for %zu threads listening for incoming messages to exit.", - _fed.number_of_inbound_p2p_connections); - for (int i=0; i < _fed.number_of_inbound_p2p_connections; i++) { - // Ignoring errors here. - lf_thread_join(_fed.inbound_socket_listeners[i], NULL); + size_t header_length = 1 + sizeof(uint16_t) + 1; + unsigned char buffer[header_length]; + int read_failed = read_from_socket(socket_id, header_length, (unsigned char*)&buffer); + if (read_failed || buffer[0] != MSG_TYPE_P2P_SENDING_FED_ID) { + lf_print_warning("Federate received invalid first message on P2P socket. Closing socket."); + if (read_failed == 0) { + // Wrong message received. + unsigned char response[2]; + response[0] = MSG_TYPE_REJECT; + response[1] = WRONG_SERVER; + // Trace the event when tracing is enabled + tracepoint_federate_to_federate(_fed.trace, send_REJECT, _lf_my_fed_id, -3, NULL); + // Ignore errors on this response. + write_to_socket(socket_id, 2, response); + } + close(socket_id); + continue; } - } - LF_PRINT_DEBUG("Waiting for RTI's socket listener threads."); - // Wait for the thread listening for messages from the RTI to close. - lf_thread_join(_fed.RTI_socket_listener, NULL); + // Get the federation ID and check it. + unsigned char federation_id_length = buffer[header_length - 1]; + char remote_federation_id[federation_id_length]; + read_failed = read_from_socket(socket_id, federation_id_length, (unsigned char*)remote_federation_id); + if (read_failed || (strncmp(federation_metadata.federation_id, remote_federation_id, strnlen(federation_metadata.federation_id, 255)) != 0)) { + lf_print_warning("Received invalid federation ID. Closing socket."); + if (read_failed == 0) { + unsigned char response[2]; + response[0] = MSG_TYPE_REJECT; + response[1] = FEDERATION_ID_DOES_NOT_MATCH; + // Trace the event when tracing is enabled + tracepoint_federate_to_federate(_fed.trace, send_REJECT, _lf_my_fed_id, -3, NULL); + // Ignore errors on this response. + write_to_socket(socket_id, 2, response); + } + close(socket_id); + continue; + } - LF_PRINT_DEBUG("Freeing memory occupied by the federate."); - free(_fed.inbound_socket_listeners); - free(federation_metadata.rti_host); - free(federation_metadata.rti_user); -} + // Extract the ID of the sending federate. + uint16_t remote_fed_id = extract_uint16((unsigned char*)&(buffer[1])); + LF_PRINT_DEBUG("Received sending federate ID %d.", remote_fed_id); + + // Trace the event when tracing is enabled + tracepoint_federate_to_federate(_fed.trace, receive_FED_ID, _lf_my_fed_id, remote_fed_id, NULL); -/** - * Thread that listens for inputs from other federates. - * This thread listens for messages of type MSG_TYPE_P2P_MESSAGE, - * MSG_TYPE_P2P_TAGGED_MESSAGE, or MSG_TYPE_PORT_ABSENT (@see net_common.h) from the specified - * peer federate and calls the appropriate handling function for - * each message type. If an error occurs or an EOF is received - * from the peer, then this procedure sets the corresponding - * socket in _fed.sockets_for_inbound_p2p_connections - * to -1 and returns, terminating the thread. - * @param _args The remote federate ID (cast to void*). - * @param fed_id_ptr A pointer to a uint16_t containing federate ID being listened to. - * This procedure frees the memory pointed to before returning. - */ -void* listen_to_federates(void* _args) { - uint16_t fed_id = (uint16_t)(uintptr_t)_args; + // Once we record the socket_id here, all future calls to close() on + // the socket should be done while holding the socket_mutex, and this array + // element should be reset to -1 during that critical section. + // Otherwise, there can be race condition where, during termination, + // two threads attempt to simultaneously close the socket. + _fed.sockets_for_inbound_p2p_connections[remote_fed_id] = socket_id; - LF_PRINT_LOG("Listening to federate %d.", fed_id); + // Send an MSG_TYPE_ACK message. + unsigned char response = MSG_TYPE_ACK; - int socket_id = _fed.sockets_for_inbound_p2p_connections[fed_id]; + // Trace the event when tracing is enabled + tracepoint_federate_to_federate(_fed.trace, send_ACK, _lf_my_fed_id, remote_fed_id, NULL); - // Buffer for incoming messages. - // This does not constrain the message size - // because the message will be put into malloc'd memory. - unsigned char buffer[FED_COM_BUFFER_SIZE]; + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + write_to_socket_fail_on_error( + &_fed.sockets_for_inbound_p2p_connections[remote_fed_id], + 1, (unsigned char*)&response, + &lf_outbound_socket_mutex, + "Failed to write MSG_TYPE_ACK in response to federate %d.", + remote_fed_id); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); - // Listen for messages from the federate. - while (1) { - // Read one byte to get the message type. - LF_PRINT_DEBUG("Waiting for a P2P message on socket %d.", socket_id); - ssize_t bytes_read = read_from_socket(socket_id, 1, buffer); - if (bytes_read == 0) { - // EOF occurred. This breaks the connection. - lf_print("Received EOF from peer federate %d. Closing the socket.", fed_id); - _lf_close_inbound_socket(fed_id); - break; - } else if (bytes_read < 0) { - lf_print_error("P2P socket to federate %d is broken.", fed_id); - _lf_close_inbound_socket(fed_id); - break; - } - LF_PRINT_DEBUG("Received a P2P message on socket %d of type %d.", - socket_id, buffer[0]); - bool bad_message = false; - switch (buffer[0]) { - case MSG_TYPE_P2P_MESSAGE: - LF_PRINT_LOG("Received untimed message from federate %d.", fed_id); - handle_message(socket_id, fed_id); - break; - case MSG_TYPE_P2P_TAGGED_MESSAGE: - LF_PRINT_LOG("Received timed message from federate %d.", fed_id); - handle_tagged_message(socket_id, fed_id); - break; - case MSG_TYPE_PORT_ABSENT: - LF_PRINT_LOG("Received port absent message from federate %d.", fed_id); - handle_port_absent_message(socket_id, fed_id); - break; - default: - bad_message = true; - } - if (bad_message) { - // FIXME: Better error handling needed. - lf_print_error("Received erroneous message type: %d. Closing the socket.", buffer[0]); - break; - // Trace the event when tracing is enabled - tracepoint_federate_from_federate(_fed.trace, receive_UNIDENTIFIED, _lf_my_fed_id, fed_id, NULL); + // Start a thread to listen for incoming messages from other federates. + // The fed_id is a uint16_t, which we assume can be safely cast to and from void*. + void* fed_id_arg = (void*)(uintptr_t)remote_fed_id; + int result = lf_thread_create( + &_fed.inbound_socket_listeners[received_federates], + listen_to_federates, + fed_id_arg); + if (result != 0) { + // Failed to create a listening thread. + LF_MUTEX_LOCK(socket_mutex); + if (_fed.sockets_for_inbound_p2p_connections[remote_fed_id] != -1) { + close(socket_id); + _fed.sockets_for_inbound_p2p_connections[remote_fed_id] = -1; + } + LF_MUTEX_UNLOCK(socket_mutex); + lf_print_error_and_exit( + "Failed to create a thread to listen for incoming physical connection. Error code: %d.", + result + ); } + + received_federates++; } + + LF_PRINT_LOG("All %zu remote federates are connected.", _fed.number_of_inbound_p2p_connections); return NULL; } -/** - * @brief Stop the traces associated with all environments in the program. - */ -static void stop_all_traces() { - environment_t *env; - int num_envs = _lf_get_environments(&env); - for (int i = 0; i < num_envs; i++) { - stop_trace(env[i].trace); +void lf_latest_tag_complete(tag_t tag_to_send) { + int compare_with_last_tag = lf_tag_compare(_fed.last_sent_LTC, tag_to_send); + if (compare_with_last_tag >= 0) { + return; } + LF_PRINT_LOG("Sending Latest Tag Complete (LTC) " PRINTF_TAG " to the RTI.", + tag_to_send.time - start_time, + tag_to_send.microstep); + send_tag(MSG_TYPE_LATEST_TAG_COMPLETE, tag_to_send); + _fed.last_sent_LTC = tag_to_send; } -/** - * Thread that listens for TCP inputs from the RTI. - * When messages arrive, this calls the appropriate handler. - * @param args Ignored - */ -void* listen_to_rti_TCP(void* args) { - // Buffer for incoming messages. - // This does not constrain the message size - // because the message will be put into malloc'd memory. - unsigned char buffer[FED_COM_BUFFER_SIZE]; - - // Listen for messages from the federate. - while (1) { - // Check whether the RTI socket is still valid - if (_fed.socket_TCP_RTI < 0) { - lf_print_warning("Socket to the RTI unexpectedly closed."); - return NULL; +parse_rti_code_t lf_parse_rti_addr(const char* rti_addr) { + bool has_host = false, has_port = false, has_user = false; + rti_addr_info_t rti_addr_info = {0}; + extract_rti_addr_info(rti_addr, &rti_addr_info); + if (!rti_addr_info.has_host && !rti_addr_info.has_port && !rti_addr_info.has_user) { + return FAILED_TO_PARSE; + } + if (rti_addr_info.has_host) { + if (validate_host(rti_addr_info.rti_host_str)) { + char* rti_host = (char*) calloc(256, sizeof(char)); + strncpy(rti_host, rti_addr_info.rti_host_str, 255); + federation_metadata.rti_host = rti_host; + } else { + return INVALID_HOST; } - // Read one byte to get the message type. - // This will exit if the read fails. - ssize_t bytes_read = read_from_socket(_fed.socket_TCP_RTI, 1, buffer); - if (bytes_read < 0) { - if (errno == ECONNRESET) { - lf_print_error("Socket connection to the RTI was closed by the RTI without" - " properly sending an EOF first. Considering this a soft error."); - // FIXME: If this happens, possibly a new RTI must be elected. - _fed.socket_TCP_RTI = -1; - return NULL; - } else { - lf_print_error("Socket connection to the RTI has been broken" - " with error %d: %s. The RTI should" - " close connections with an EOF first." - " Considering this a soft error.", - errno, - strerror(errno)); - // FIXME: If this happens, possibly a new RTI must be elected. - _fed.socket_TCP_RTI = -1; - return NULL; - } - } else if (bytes_read == 0) { - // EOF received. - lf_print("Connection to the RTI closed with an EOF."); - _fed.socket_TCP_RTI = -1; - stop_all_traces(); - return NULL; + } + if (rti_addr_info.has_port) { + if (validate_port(rti_addr_info.rti_port_str)) { + federation_metadata.rti_port = atoi(rti_addr_info.rti_port_str); + } else { + return INVALID_PORT; } - switch (buffer[0]) { - case MSG_TYPE_TAGGED_MESSAGE: - handle_tagged_message(_fed.socket_TCP_RTI, -1); - break; - case MSG_TYPE_TAG_ADVANCE_GRANT: - handle_tag_advance_grant(); - break; - case MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT: - handle_provisional_tag_advance_grant(); - break; - case MSG_TYPE_STOP_REQUEST: - handle_stop_request_message(); - break; - case MSG_TYPE_STOP_GRANTED: - handle_stop_granted_message(); - break; - case MSG_TYPE_PORT_ABSENT: - handle_port_absent_message(_fed.socket_TCP_RTI, -1); - break; - case MSG_TYPE_CLOCK_SYNC_T1: - case MSG_TYPE_CLOCK_SYNC_T4: - lf_print_error("Federate %d received unexpected clock sync message from RTI on TCP socket.", - _lf_my_fed_id); - break; - default: - lf_print_error_and_exit("Received from RTI an unrecognized TCP message type: %hhx.", buffer[0]); - // Trace the event when tracing is enabled - tracepoint_federate_from_rti(_fed.trace, receive_UNIDENTIFIED, _lf_my_fed_id, NULL); - } } - return NULL; + if (rti_addr_info.has_user) { + if (validate_user(rti_addr_info.rti_user_str)) { + char* rti_user = (char*) calloc(256, sizeof(char)); + strncpy(rti_user, rti_addr_info.rti_user_str, 255); + federation_metadata.rti_user = rti_user; + } else { + return INVALID_USER; + } + } + return SUCCESS; } -void synchronize_with_other_federates(void) { - - LF_PRINT_DEBUG("Synchronizing with other federates."); - - // Reset the start time to the coordinated start time for all federates. - // Note that this does not grant execution to this federate. - start_time = get_start_time_from_rti(lf_time_physical()); - - // Start a thread to listen for incoming TCP messages from the RTI. - // @note Up until this point, the federate has been listening for messages - // from the RTI in a sequential manner in the main thread. From now on, a - // separate thread is created to allow for asynchronous communication. - lf_thread_create(&_fed.RTI_socket_listener, listen_to_rti_TCP, NULL); - lf_thread_t thread_id; - if (create_clock_sync_thread(&thread_id)) { - lf_print_warning("Failed to create thread to handle clock synchronization."); +void lf_reset_status_fields_on_input_port_triggers() { + environment_t *env; + _lf_get_environments(&env); + tag_t now = lf_tag(env); + for (int i = 0; i < _lf_action_table_size; i++) { + if (lf_tag_compare(_lf_action_table[i]->trigger->last_known_status_tag, now) >= 0) { + set_network_port_status(i, absent); // Default may be overriden to become present. + } else { + set_network_port_status(i, unknown); + } } + LF_PRINT_DEBUG("Resetting port status fields."); + lf_update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); + lf_cond_broadcast(&lf_port_status_changed); } -/** - * Modify the specified tag, if necessary, to be an earlier tag based - * on the current physical time. The earlier tag is necessary if this federate - * has downstream federates and also has physical actions that may trigger - * outputs. In that case, the earlier tag will be the current physical time - * plus the minimum delay on all such physical actions plus any other delays - * along the path from the triggering physical action to the output port - * minus one nanosecond. The modified tag is assured of being less than any - * output tag that might later be produced. - * @param tag A pointer to the proposed NET. - * @return True if this federate requires this modification and the tag was - * modified. - */ -bool _lf_bounded_NET(tag_t* tag) { - // The tag sent by this function is a promise that, absent - // inputs from another federate, this federate will not produce events - // earlier than t. But if there are downstream federates and there is - // a physical action (not counting receivers from upstream federates), - // then we can only promise up to current physical time (plus the minimum - // of all minimum delays on the physical actions). - // In this case, we send a NET message with the current physical time - // to permit downstream federates to advance. To avoid - // overwhelming the network, this NET message should be sent periodically - // at specified intervals controlled by the target parameter - // coordination-options: {advance-message-interval: time units}. - // The larger the interval, the more downstream federates will lag - // behind real time, but the less network traffic. If this option is - // missing, we issue a warning message suggesting that a redesign - // might be in order so that outputs don't depend on physical actions. - LF_PRINT_DEBUG("Checking NET to see whether it should be bounded by physical time." - " Min delay from physical action: " PRINTF_TIME ".", - _fed.min_delay_from_physical_action_to_federate_output); - if (_fed.min_delay_from_physical_action_to_federate_output >= 0LL - && _fed.has_downstream - ) { - // There is a physical action upstream of some output from this - // federate, and there is at least one downstream federate. - // Compare the tag to the current physical time. - instant_t physical_time = lf_time_physical(); - if (physical_time + _fed.min_delay_from_physical_action_to_federate_output < tag->time) { - // Can only promise up and not including this new time: - tag->time = physical_time + _fed.min_delay_from_physical_action_to_federate_output - 1L; - tag->microstep = 0; - LF_PRINT_LOG("Has physical actions that bound NET to " PRINTF_TAG ".", - tag->time - start_time, tag->microstep); - return true; - } +int lf_send_message(int message_type, + unsigned short port, + unsigned short federate, + const char* next_destination_str, + size_t length, + unsigned char* message) { + unsigned char header_buffer[1 + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t)]; + // First byte identifies this as a timed message. + if (message_type != MSG_TYPE_P2P_MESSAGE ) { + lf_print_error("lf_send_message: Unsupported message type (%d).", message_type); + return -1; + } + header_buffer[0] = (unsigned char)message_type; + // Next two bytes identify the destination port. + // NOTE: Send messages little endian (network order), not big endian. + encode_uint16(port, &(header_buffer[1])); + + // Next two bytes identify the destination federate. + encode_uint16(federate, &(header_buffer[1 + sizeof(uint16_t)])); + + // The next four bytes are the message length. + encode_int32((int32_t)length, &(header_buffer[1 + sizeof(uint16_t) + sizeof(uint16_t)])); + + LF_PRINT_LOG("Sending untagged message to %s.", next_destination_str); + + // Header: message_type + port_id + federate_id + length of message + timestamp + microstep + const int header_length = 1 + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t); + + // Use a mutex lock to prevent multiple threads from simultaneously sending. + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + + int* socket = &_fed.sockets_for_outbound_p2p_connections[federate]; + + // Trace the event when tracing is enabled + tracepoint_federate_to_federate(_fed.trace, send_P2P_MSG, _lf_my_fed_id, federate, NULL); + + int result = write_to_socket_close_on_error(socket, header_length, header_buffer); + if (result == 0) { + // Header sent successfully. Send the body. + result = write_to_socket_close_on_error(socket, length, message); } - return false; + if (result != 0) { + // Message did not send. Since this is used for physical connections, this is not critical. + lf_print_warning("Failed to send message to %s. Dropping the message.", next_destination_str); + } + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + return result; } -/** - * If this federate depends on upstream federates or sends data to downstream - * federates, then send to the RTI a NET, which will give the tag of the - * earliest event on the event queue, or, if the queue is empty, the timeout - * time, or, if there is no timeout, FOREVER. - * - * If there are network outputs that - * depend on physical actions, then insert a dummy event to ensure this federate - * advances its tag so that downstream federates can make progress. - * - * A NET is a promise saying that, absent network inputs, this federate will - * not produce an output message with tag earlier than the NET value. - * - * If there are upstream federates, then after sending a NET, this will block - * until either the RTI grants the advance to the requested time or the wait - * for the response from the RTI is interrupted by a change in the event queue - * (e.g., a physical action triggered or a network message arrived). - * If there are no upstream federates, then it will not wait for a TAG - * (which won't be forthcoming anyway) and returns the earliest tag on the event queue. - * - * If the federate has neither upstream nor downstream federates, then this - * returns the specified tag immediately without sending anything to the RTI. - * - * If there is at least one physical action somewhere in the federate that can - * trigger an output to a downstream federate, then the NET is required to be - * less than the current physical time. If physical time is less than the - * earliest event in the event queue (or the event queue is empty), then this - * function will insert a dummy event with a tag equal to the current physical - * time (and a microstep of 0). This will enforce advancement of tag for this - * federate and causes a NET message to be sent repeatedly as physical time - * advances with the time interval between messages controlled by the target - * parameter coordination-options: {advance-message-interval timevalue}. It will - * stop creating dummy events if and when its event queue has an event with a - * timestamp less than physical time. - * - * If wait_for_reply is false, then this function will simply send the - * specified tag and return that tag immediately. This is useful when a - * federate is shutting down and will not be sending any more messages at all. - * - * In all cases, this returns either the specified tag or - * another tag when it is safe to advance logical time to the returned tag. - * The returned tag may be less than the specified tag if there are upstream - * federates and either the RTI responds with a lesser tag or - * the wait for a response from the RTI is interrupted by a - * change in the event queue. - * - * This function is used in centralized coordination only. - * - * This function assumes the caller holds the mutex lock. - * - * @param env The environment of the federate - * @param tag The tag. - * @param wait_for_reply If true, wait for a reply. - */ -tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply) { +tag_t lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply) { assert(env != GLOBAL_ENVIRONMENT); while (true) { if (!_fed.has_downstream && !_fed.has_upstream) { @@ -2722,7 +2419,7 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply return _fed.last_TAG; } - // Copy the tag because _lf_bounded_NET() may modify it. + // Copy the tag because bounded_NET() may modify it. tag_t original_tag = tag; // A NET sent by this function is a promise that, absent @@ -2733,7 +2430,7 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply // of all minimum delays on the physical actions). // If wait_for_reply is false, leave the tag alone. bool tag_bounded_by_physical_time = wait_for_reply ? - _lf_bounded_NET(&tag) + bounded_NET(&tag) : false; // What we do next depends on whether the NET has been bounded by @@ -2742,7 +2439,7 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply // This if statement does not fall through but rather returns. // NET is not bounded by physical time or has no downstream federates. // Normal case. - _lf_send_tag(MSG_TYPE_NEXT_EVENT_TAG, tag, wait_for_reply); + send_tag(MSG_TYPE_NEXT_EVENT_TAG, tag); _fed.last_sent_NET = tag; LF_PRINT_LOG("Sent next event tag (NET) " PRINTF_TAG " to RTI.", tag.time - start_time, tag.microstep); @@ -2768,7 +2465,9 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply while (true) { // Wait until either something changes on the event queue or // the RTI has responded with a TAG. - LF_PRINT_DEBUG("Waiting for a TAG from the RTI with _fed.last_TAG.time=%lld, %lld and net=%lld, %lld", (long long) _fed.last_TAG.time - start_time, (long long) _fed.last_TAG.microstep, (long long) tag.time - start_time, (long long) tag.microstep); + LF_PRINT_DEBUG("Waiting for a TAG from the RTI with _fed.last_TAG= " PRINTF_TAG " and net=" PRINTF_TAG, + _fed.last_TAG.time - start_time, _fed.last_TAG.microstep, + tag.time - start_time, tag.microstep); if (lf_cond_wait(&env->event_q_changed) != 0) { lf_print_error("Wait error."); } @@ -2781,7 +2480,7 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply return _fed.last_TAG; } if (lf_tag_compare(next_tag, tag) != 0) { - _lf_send_tag(MSG_TYPE_NEXT_EVENT_TAG, next_tag, wait_for_reply); + send_tag(MSG_TYPE_NEXT_EVENT_TAG, next_tag); _fed.last_sent_NET = next_tag; LF_PRINT_LOG("Sent next event tag (NET) " PRINTF_TAG " to RTI from loop.", next_tag.time - lf_time_start(), next_tag.microstep); @@ -2790,9 +2489,10 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply } if (tag.time != FOREVER) { - // Create a dummy event that will force this federate to advance time and subsequently enable progress for - // downstream federates. - event_t* dummy = _lf_create_dummy_events(env, NULL, tag.time, NULL, 0); + // Create a dummy event that will force this federate to advance time and subsequently + // enable progress for downstream federates. Increment the time by ADVANCE_MESSAGE_INTERVAL + // to prevent too frequent dummy events. + event_t* dummy = _lf_create_dummy_events(env, NULL, tag.time + ADVANCE_MESSAGE_INTERVAL, NULL, 0); pqueue_insert(env->event_q, dummy); } @@ -2800,7 +2500,7 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply tag.time - lf_time_start()); if (!wait_for_reply) { - LF_PRINT_LOG("Not waiting physical time to advance further."); + LF_PRINT_LOG("Not waiting for physical time to advance further."); return tag; } @@ -2809,7 +2509,7 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply // RTI. That amount of time will be no greater than ADVANCE_MESSAGE_INTERVAL in the future. LF_PRINT_DEBUG("Waiting for physical time to elapse or an event on the event queue."); - // The above call to _lf_bounded_NET called lf_time_physical() + // The above call to bounded_NET called lf_time_physical() // set _lf_last_reported_unadjusted_physical_time_ns, the // time obtained using CLOCK_REALTIME before adjustment for // clock synchronization. Since that is the clock used by @@ -2835,53 +2535,301 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply } } -/** - * Parse the address of the RTI and store them into the global federation_metadata struct. - * @return a parse_rti_code_t indicating the result of the parse. - */ -parse_rti_code_t parse_rti_addr(const char* rti_addr) { - bool has_host = false, has_port = false, has_user = false; - rti_addr_info_t rti_addr_info = {0}; - extract_rti_addr_info(rti_addr, &rti_addr_info); - if (!rti_addr_info.has_host && !rti_addr_info.has_port && !rti_addr_info.has_user) { - return FAILED_TO_PARSE; +void lf_send_port_absent_to_federate( + environment_t* env, + interval_t additional_delay, + unsigned short port_ID, + unsigned short fed_ID) { + assert(env != GLOBAL_ENVIRONMENT); + + // Construct the message + size_t message_length = 1 + sizeof(port_ID) + sizeof(fed_ID) + sizeof(instant_t) + sizeof(microstep_t); + unsigned char buffer[message_length]; + + // Apply the additional delay to the current tag and use that as the intended + // tag of the outgoing message. Note that if there is delay on the connection, + // then we cannot promise no message with tag = current_tag + delay because a + // subsequent reaction might produce such a message. But we can promise no + // message with a tag strictly less than current_tag + delay. + tag_t current_message_intended_tag = lf_delay_strict(env->current_tag, additional_delay); + + LF_PRINT_LOG("Sending port " + "absent for tag " PRINTF_TAG " for port %d to federate %d.", + current_message_intended_tag.time - start_time, + current_message_intended_tag.microstep, + port_ID, fed_ID); + + buffer[0] = MSG_TYPE_PORT_ABSENT; + encode_uint16(port_ID, &(buffer[1])); + encode_uint16(fed_ID, &(buffer[1+sizeof(port_ID)])); + encode_tag(&(buffer[1+sizeof(port_ID)+sizeof(fed_ID)]), current_message_intended_tag); + +#ifdef FEDERATED_CENTRALIZED + // Send the absent message through the RTI + int* socket = &_fed.socket_TCP_RTI; +#else + // Send the absent message directly to the federate + int* socket = &_fed.sockets_for_outbound_p2p_connections[fed_ID]; +#endif + + if (socket == &_fed.socket_TCP_RTI) { + tracepoint_federate_to_rti( + _fed.trace, send_PORT_ABS, _lf_my_fed_id, ¤t_message_intended_tag); + } else { + tracepoint_federate_to_federate( + _fed.trace, send_PORT_ABS, _lf_my_fed_id, fed_ID, ¤t_message_intended_tag); } - if (rti_addr_info.has_host) { - if (validate_host(rti_addr_info.rti_host_str)) { - char* rti_host = (char*) calloc(256, sizeof(char)); - strncpy(rti_host, rti_addr_info.rti_host_str, 255); - federation_metadata.rti_host = rti_host; + + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + int result = write_to_socket_close_on_error(socket, message_length, buffer); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + + if (result != 0) { + // Write failed. Response depends on whether coordination is centralized. + if (socket == &_fed.socket_TCP_RTI) { + // Centralized coordination. This is a critical error. + lf_print_error_system_failure("Failed to send port absent message for port %hu to federate %hu.", + port_ID, fed_ID); } else { - return INVALID_HOST; + // Decentralized coordination. This is not a critical error. + lf_print_warning("Failed to send port absent message for port %hu to federate %hu.", + port_ID, fed_ID); } } - if (rti_addr_info.has_port) { - if (validate_port(rti_addr_info.rti_port_str)) { - federation_metadata.rti_port = atoi(rti_addr_info.rti_port_str); - } else { - return INVALID_PORT; +} + +int lf_send_stop_request_to_rti(tag_t stop_tag) { + + // Send a stop request with the specified tag to the RTI + unsigned char buffer[MSG_TYPE_STOP_REQUEST_LENGTH]; + // Stop at the next microstep + stop_tag.microstep++; + ENCODE_STOP_REQUEST(buffer, stop_tag.time, stop_tag.microstep); + + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + // Do not send a stop request if a stop request has been previously received from the RTI. + if (!_fed.received_stop_request_from_rti) { + LF_PRINT_LOG("Sending to RTI a MSG_TYPE_STOP_REQUEST message with tag " PRINTF_TAG ".", + stop_tag.time - start_time, + stop_tag.microstep); + + if (_fed.socket_TCP_RTI < 0) { + lf_print_warning("Socket is no longer connected. Dropping message."); + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + return -1; } + // Trace the event when tracing is enabled + tracepoint_federate_to_rti(_fed.trace, send_STOP_REQ, _lf_my_fed_id, &stop_tag); + + write_to_socket_fail_on_error(&_fed.socket_TCP_RTI, MSG_TYPE_STOP_REQUEST_LENGTH, + buffer, &lf_outbound_socket_mutex, + "Failed to send stop time " PRINTF_TIME " to the RTI.", stop_tag.time - start_time); + + // Treat this sending as equivalent to having received a stop request from the RTI. + _fed.received_stop_request_from_rti = true; + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + return 0; + } else { + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + return 1; } - if (rti_addr_info.has_user) { - if (validate_user(rti_addr_info.rti_user_str)) { - char* rti_user = (char*) calloc(256, sizeof(char)); - strncpy(rti_user, rti_addr_info.rti_user_str, 255); - federation_metadata.rti_user = rti_user; +} + +int lf_send_tagged_message(environment_t* env, + interval_t additional_delay, + int message_type, + unsigned short port, + unsigned short federate, + const char* next_destination_str, + size_t length, + unsigned char* message) { + assert(env != GLOBAL_ENVIRONMENT); + + size_t header_length = 1 + sizeof(uint16_t) + sizeof(uint16_t) + + sizeof(int32_t) + sizeof(instant_t) + sizeof(microstep_t); + unsigned char header_buffer[header_length]; + + if (message_type != MSG_TYPE_TAGGED_MESSAGE && message_type != MSG_TYPE_P2P_TAGGED_MESSAGE) { + lf_print_error("lf_send_message: Unsupported message type (%d).", message_type); + return -1; + } + + size_t buffer_head = 0; + // First byte is the message type. + header_buffer[buffer_head] = (unsigned char)message_type; + buffer_head += sizeof(unsigned char); + // Next two bytes identify the destination port. + // NOTE: Send messages little endian, not big endian. + encode_uint16(port, &(header_buffer[buffer_head])); + buffer_head += sizeof(uint16_t); + + // Next two bytes identify the destination federate. + encode_uint16(federate, &(header_buffer[buffer_head])); + buffer_head += sizeof(uint16_t); + + // The next four bytes are the message length. + encode_int32((int32_t)length, &(header_buffer[buffer_head])); + buffer_head += sizeof(int32_t); + + // Apply the additional delay to the current tag and use that as the intended + // tag of the outgoing message. + tag_t current_message_intended_tag = lf_delay_tag(env->current_tag, additional_delay); + + if (_lf_is_tag_after_stop_tag(env, current_message_intended_tag)) { + // Message tag is past the timeout time (the stop time) so it should not be sent. + LF_PRINT_LOG("Dropping message because it will be after the timeout time."); + return -1; + } + + // Next 8 + 4 will be the tag (timestamp, microstep) + encode_tag( + &(header_buffer[buffer_head]), + current_message_intended_tag + ); + + LF_PRINT_LOG("Sending message with tag " PRINTF_TAG " to %s.", + current_message_intended_tag.time - start_time, + current_message_intended_tag.microstep, + next_destination_str); + + // Use a mutex lock to prevent multiple threads from simultaneously sending. + LF_MUTEX_LOCK(lf_outbound_socket_mutex); + + int* socket; + if (message_type == MSG_TYPE_P2P_TAGGED_MESSAGE) { + socket = &_fed.sockets_for_outbound_p2p_connections[federate]; + tracepoint_federate_to_federate(_fed.trace, send_P2P_TAGGED_MSG, _lf_my_fed_id, federate, ¤t_message_intended_tag); + } else { + socket = &_fed.socket_TCP_RTI; + tracepoint_federate_to_rti(_fed.trace, send_TAGGED_MSG, _lf_my_fed_id, ¤t_message_intended_tag); + } + + int result = write_to_socket_close_on_error(socket, header_length, header_buffer); + if (result == 0) { + // Header sent successfully. Send the body. + result = write_to_socket_close_on_error(socket, length, message); + } + if (result != 0) { + // Message did not send. Handling depends on message type. + if (message_type == MSG_TYPE_P2P_TAGGED_MESSAGE) { + lf_print_warning("Failed to send message to %s. Dropping the message.", next_destination_str); } else { - return INVALID_USER; + lf_print_error_system_failure("Failed to send message to %s. Connection lost to the RTI.", + next_destination_str); } } - return SUCCESS; + LF_MUTEX_UNLOCK(lf_outbound_socket_mutex); + return result; } -/** - * Sets the federation_id of this federate to fid. - */ -void set_federation_id(const char* fid) { +void lf_set_federation_id(const char* fid) { federation_metadata.federation_id = fid; } -void set_federation_trace_object(trace_t * trace) { +void lf_set_federation_trace_object(trace_t * trace) { _fed.trace = trace; } + +#ifdef FEDERATED_DECENTRALIZED +void lf_spawn_staa_thread(){ + lf_thread_create(&_fed.staaSetter, update_ports_from_staa_offsets, NULL); +} +#endif // FEDERATED_DECENTRALIZED + +void lf_stall_advance_level_federation(environment_t* env, size_t level) { + LF_PRINT_DEBUG("Acquiring the environment mutex."); + LF_MUTEX_LOCK(env->mutex); + LF_PRINT_DEBUG("Waiting on MLAA with next_reaction_level %zu and MLAA %d.", level, max_level_allowed_to_advance); + while (((int) level) >= max_level_allowed_to_advance) { + lf_cond_wait(&lf_port_status_changed); + }; + LF_PRINT_DEBUG("Exiting wait with MLAA %d and next_reaction_level %zu.", max_level_allowed_to_advance, level); + LF_MUTEX_UNLOCK(env->mutex); +} + +void lf_synchronize_with_other_federates(void) { + + LF_PRINT_DEBUG("Synchronizing with other federates."); + + // Reset the start time to the coordinated start time for all federates. + // Note that this does not grant execution to this federate. + start_time = get_start_time_from_rti(lf_time_physical()); + + // Start a thread to listen for incoming TCP messages from the RTI. + // @note Up until this point, the federate has been listening for messages + // from the RTI in a sequential manner in the main thread. From now on, a + // separate thread is created to allow for asynchronous communication. + lf_thread_create(&_fed.RTI_socket_listener, listen_to_rti_TCP, NULL); + lf_thread_t thread_id; + if (create_clock_sync_thread(&thread_id)) { + lf_print_warning("Failed to create thread to handle clock synchronization."); + } +} + +bool lf_update_max_level(tag_t tag, bool is_provisional) { + // This always needs the top-level environment, which will be env[0]. + environment_t *env; + _lf_get_environments(&env); + int prev_max_level_allowed_to_advance = max_level_allowed_to_advance; + max_level_allowed_to_advance = INT_MAX; +#ifdef FEDERATED_DECENTRALIZED + size_t action_table_size = _lf_action_table_size; + lf_action_base_t** action_table = _lf_action_table; +#else + // Note that the following test is never true for decentralized coordination, + // where tag always is NEVER_TAG. + if ((lf_tag_compare(env->current_tag, tag) < 0) || ( + lf_tag_compare(env->current_tag, tag) == 0 && !is_provisional + )) { + LF_PRINT_DEBUG("Updated MLAA to %d at time " PRINTF_TIME ".", + max_level_allowed_to_advance, + lf_time_logical_elapsed(env) + ); + // Safe to complete the current tag + return (prev_max_level_allowed_to_advance != max_level_allowed_to_advance); + } + + size_t action_table_size = _lf_zero_delay_cycle_action_table_size; + lf_action_base_t** action_table = _lf_zero_delay_cycle_action_table; +#endif // FEDERATED_DECENTRALIZED + for (int i = 0; i < action_table_size; i++) { + lf_action_base_t* input_port_action = action_table[i]; +#ifdef FEDERATED_DECENTRALIZED + // In decentralized execution, if the current_tag is close enough to the + // start tag and there is a large enough delay on an incoming + // connection, then there is no need to block progress waiting for this + // port status. This is irrelevant for centralized because blocking only + // occurs on zero-delay cycles. + if ( + (_lf_action_delay_table[i] == 0 && env->current_tag.time == start_time && env->current_tag.microstep == 0) + || (_lf_action_delay_table[i] > 0 && lf_tag_compare( + env->current_tag, + lf_delay_strict((tag_t) {.time=start_time, .microstep=0}, _lf_action_delay_table[i]) + ) <= 0) + ) { + continue; + } +#endif // FEDERATED_DECENTRALIZED + // If the current tag is greater than the last known status tag of the input port, + // and the input port is not physical, then block on that port by ensuring + // the MLAA is no greater than the level of that port. + // For centralized coordination, this is applied only to input ports coming from + // federates that are in a ZDC. For decentralized coordination, this is applied + // to all input ports. + if (lf_tag_compare(env->current_tag, + input_port_action->trigger->last_known_status_tag) > 0 + && !input_port_action->trigger->is_physical) { + max_level_allowed_to_advance = LF_MIN( + max_level_allowed_to_advance, + ((int) LF_LEVEL(input_port_action->trigger->reactions[0]->index)) + ); + } + } + LF_PRINT_DEBUG("Updated MLAA to %d at time " PRINTF_TIME ".", + max_level_allowed_to_advance, + lf_time_logical_elapsed(env) + ); + return (prev_max_level_allowed_to_advance != max_level_allowed_to_advance); +} + #endif diff --git a/core/federated/network/CMakeLists.txt b/core/federated/network/CMakeLists.txt new file mode 100644 index 0000000000..5306eae02d --- /dev/null +++ b/core/federated/network/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LF_NETWORK_FILES net_util.c) + +list(TRANSFORM LF_NETWORK_FILES PREPEND federated/network/) +list(APPEND REACTORC_SOURCES ${LF_NETWORK_FILES}) diff --git a/core/federated/net_util.c b/core/federated/network/net_util.c similarity index 79% rename from core/federated/net_util.c rename to core/federated/network/net_util.c index 99c6e6cf8e..754a28adab 100644 --- a/core/federated/net_util.c +++ b/core/federated/network/net_util.c @@ -56,120 +56,172 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /** Number of nanoseconds to sleep before retrying a socket read. */ #define SOCKET_READ_RETRY_INTERVAL 1000000 +// Mutex lock held while performing socket close operations. +// A deadlock can occur if two threads simulataneously attempt to close the same socket. +lf_mutex_t socket_mutex; + int create_real_time_tcp_socket_errexit() { int sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (sock < 0) { - lf_print_error_and_exit("Could not open TCP socket. Err=%d", sock); + lf_print_error_system_failure("Could not open TCP socket."); } // Disable Nagle's algorithm which bundles together small TCP messages to - // reduce network traffic + // reduce network traffic. // TODO: Re-consider if we should do this, and whether disabling delayed ACKs - // is enough. + // is enough. int flag = 1; int result = setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(int)); if (result < 0) { - lf_print_error_and_exit("Failed to disable Nagle algorithm on socket server."); + lf_print_error_system_failure("Failed to disable Nagle algorithm on socket server."); } +#if defined(PLATFORM_Linux) // Disable delayed ACKs. Only possible on Linux - #if defined(PLATFORM_Linux) result = setsockopt(sock, IPPROTO_TCP, TCP_QUICKACK, &flag, sizeof(int)); if (result < 0) { - lf_print_error_and_exit("Failed to disable Nagle algorithm on socket server."); + lf_print_error_system_failure("Failed to disable Nagle algorithm on socket server."); } - #endif +#endif // Linux return sock; } -ssize_t read_from_socket_errexit( - int socket, - size_t num_bytes, - unsigned char* buffer, - char* format, ...) { - va_list args; - // Error checking first - if (socket < 0 && format != NULL) { - lf_print_error("Socket is no longer open."); - lf_print_error_and_exit(format, args); - } +int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer) { + if (socket < 0) { + // Socket is not open. + errno = EBADF; + return -1; + } ssize_t bytes_read = 0; + int retry_count = 0; while (bytes_read < (ssize_t)num_bytes) { ssize_t more = read(socket, buffer + bytes_read, num_bytes - (size_t)bytes_read); - if(more <= 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { - // The error code set by the socket indicates + if(more < 0 && retry_count++ < NUM_SOCKET_RETRIES + && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) { + // Those error codes set by the socket indicates // that we should try again (@see man errno). - LF_PRINT_DEBUG("Reading from socket was blocked. Will try again."); + lf_print_warning("Reading from socket failed. Will try again."); + lf_sleep(DELAY_BETWEEN_SOCKET_RETRIES); continue; - } else if (more <= 0) { - if (format != NULL) { - shutdown(socket, SHUT_RDWR); - close(socket); - lf_print_error("Read %ld bytes, but expected %zu. errno=%d", - more + bytes_read, num_bytes, errno); - lf_print_error_and_exit(format, args); - } else if (more == 0) { - // According to this: https://stackoverflow.com/questions/4160347/close-vs-shutdown-socket, - // upon receiving a zero length packet or an error, we can close the socket. - // If there are any pending outgoing messages, this will attempt to send those - // followed by an EOF. - close(socket); - } - return more; + } else if (more < 0) { + // A more serious error occurred. + return -1; + } else if (more == 0) { + // EOF received. + return 1; } bytes_read += more; } - return bytes_read; + return 0; } -ssize_t read_from_socket(int socket, size_t num_bytes, unsigned char* buffer) { - return read_from_socket_errexit(socket, num_bytes, buffer, NULL); +int read_from_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer) { + assert(socket); + int read_failed = read_from_socket(*socket, num_bytes, buffer); + if (read_failed) { + // Read failed. + // Socket has probably been closed from the other side. + // Shut down and close the socket from this side. + shutdown(*socket, SHUT_RDWR); + close(*socket); + // Mark the socket closed. + *socket = -1; + return -1; + } + return 0; } -ssize_t write_to_socket_with_mutex( - int socket, +void read_from_socket_fail_on_error( + int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, char* format, ...) { + va_list args; + assert(socket); + int read_failed = read_from_socket_close_on_error(socket, num_bytes, buffer); + if (read_failed) { + // Read failed. + if (mutex != NULL) { + lf_mutex_unlock(mutex); + } + if (format != NULL) { + lf_print_error_system_failure(format, args); + } else { + lf_print_error_system_failure("Failed to read from socket."); + } + } +} + +ssize_t peek_from_socket(int socket, unsigned char* result) { + ssize_t bytes_read = recv(socket, result, 1, MSG_DONTWAIT | MSG_PEEK); + if (bytes_read < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) return 0; + else return bytes_read; +} + +int write_to_socket(int socket, size_t num_bytes, unsigned char* buffer) { + if (socket < 0) { + // Socket is not open. + errno = EBADF; + return -1; + } ssize_t bytes_written = 0; va_list args; while (bytes_written < (ssize_t)num_bytes) { ssize_t more = write(socket, buffer + bytes_written, num_bytes - (size_t)bytes_written); - if (more <= 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { - // The error code set by the socket indicates - // that we should try again (@see man errno). + if (more <= 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) { + // The error codes EAGAIN or EWOULDBLOCK indicate + // that we should try again (@see man errno). + // The error code EINTR means the system call was interrupted before completing. LF_PRINT_DEBUG("Writing to socket was blocked. Will try again."); + lf_sleep(DELAY_BETWEEN_SOCKET_RETRIES); continue; - } else if (more <= 0) { - if (format != NULL) { - shutdown(socket, SHUT_RDWR); - close(socket); - if (mutex != NULL) { - lf_mutex_unlock(mutex); - } - lf_print_error(format, args); - lf_print_error("Code %d: %s.", errno, strerror(errno)); - } - return more; + } else if (more < 0) { + // A more serious error occurred. + return -1; } bytes_written += more; } - return bytes_written; + return 0; +} + +int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer) { + assert(socket); + int result = write_to_socket(*socket, num_bytes, buffer); + if (result) { + // Write failed. + // Socket has probably been closed from the other side. + // Shut down and close the socket from this side. + shutdown(*socket, SHUT_RDWR); + close(*socket); + // Mark the socket closed. + *socket = -1; + } + return result; } -ssize_t write_to_socket_errexit( - int socket, +void write_to_socket_fail_on_error( + int* socket, size_t num_bytes, unsigned char* buffer, + lf_mutex_t* mutex, char* format, ...) { - return write_to_socket_with_mutex(socket, num_bytes, buffer, NULL, format); -} - -ssize_t write_to_socket(int socket, size_t num_bytes, unsigned char* buffer) { - return write_to_socket_with_mutex(socket, num_bytes, buffer, NULL, NULL); + va_list args; + assert(socket); + int result = write_to_socket_close_on_error(socket, num_bytes, buffer); + if (result) { + // Write failed. + if (mutex != NULL) { + lf_mutex_unlock(mutex); + } + if (format != NULL) { + lf_print_error_system_failure(format, args); + } else { + lf_print_error("Failed to write to socket. Closing it."); + } + } } #endif // FEDERATED diff --git a/core/modal_models/CMakeLists.txt b/core/modal_models/CMakeLists.txt index 9839040a3a..00abf6b343 100644 --- a/core/modal_models/CMakeLists.txt +++ b/core/modal_models/CMakeLists.txt @@ -1,5 +1,4 @@ set(MODAL_SOURCES modes.c) -list(APPEND INFO_SOURCES ${MODAL_SOURCES}) list(TRANSFORM MODAL_SOURCES PREPEND modal_models/) -target_sources(core PRIVATE ${MODAL_SOURCES}) +list(APPEND REACTORC_SOURCES ${MODAL_SOURCES}) diff --git a/core/modal_models/modes.c b/core/modal_models/modes.c index 16d630cfef..91d58b7dfd 100644 --- a/core/modal_models/modes.c +++ b/core/modal_models/modes.c @@ -422,13 +422,15 @@ void _lf_process_mode_changes( tag_t schedule_tag = {.time = current_logical_tag.time + local_remaining_delay, .microstep = (local_remaining_delay == 0 ? current_logical_tag.microstep + 1 : 0)}; _lf_schedule_at_tag(env, event->trigger, schedule_tag, event->token); - if (event->next != NULL) { - // The event has more events stacked up in super dense time, attach them to the newly created event. - if (event->trigger->last->next == NULL) { - event->trigger->last->next = event->next; - } else { - lf_print_error("Modes: Cannot attach events stacked up in super dense to the just unsuspended root event."); - } + // Also schedule events stacked up in super dense time. + event_t* e = event; + while (e->next != NULL) { + schedule_tag.microstep++; + _lf_schedule_at_tag(env, e->next->trigger, schedule_tag, e->next->token); + event_t* tmp = e->next; + e = tmp->next; + // A fresh event was created by schedule, hence, recycle old one + _lf_recycle_event(env, tmp); } } // A fresh event was created by schedule, hence, recycle old one diff --git a/core/platform/CMakeLists.txt b/core/platform/CMakeLists.txt index 949d651774..33393246e2 100644 --- a/core/platform/CMakeLists.txt +++ b/core/platform/CMakeLists.txt @@ -2,37 +2,35 @@ # file and assign the file's path to LF_PLATFORM_FILE set(LF_PLATFORM_FILES -lf_unix_clock_support.c -lf_unix_syscall_support.c -lf_linux_support.c -lf_macos_support.c -lf_windows_support.c -lf_nrf52_support.c -lf_zephyr_support.c -lf_zephyr_clock_counter.c -lf_zephyr_clock_kernel.c -lf_rp2040_support.c -lf_STM32f4_support.c + lf_unix_clock_support.c + lf_unix_syscall_support.c + lf_linux_support.c + lf_macos_support.c + lf_windows_support.c + lf_nrf52_support.c + lf_zephyr_support.c + lf_zephyr_clock_counter.c + lf_zephyr_clock_kernel.c + lf_rp2040_support.c + lf_STM32f4_support.c ) if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") set(CMAKE_SYSTEM_VERSION 10.0) message("Using Windows SDK version ${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}") elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Nrf52") - target_compile_definitions(core PUBLIC PLATFORM_NRF52) + list(APPEND REACTORC_COMPILE_DEFS PLATFORM_NRF52) elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Zephyr") - target_compile_definitions(core PUBLIC PLATFORM_ZEPHYR) + list(APPEND REACTORC_COMPILE_DEFS PLATFORM_ZEPHYR) + set(PLATFORM_ZEPHYR true) elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Stm32") target_compile_definitions(core PUBLIC PLATFORM_STM32F4) elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Rp2040") - target_compile_definitions(core PUBLIC PLATFORM_RP2040) + list(APPEND REACTORC_COMPILE_DEFS PLATFORM_RP2040) endif() -# Add sources to the list for debug info -list(APPEND INFO_SOURCES ${LF_PLATFORM_FILES}) - # Prepend all sources with platform list(TRANSFORM LF_PLATFORM_FILES PREPEND platform/) -# Add sources to core lib -target_sources(core PRIVATE ${LF_PLATFORM_FILES}) +# Add sources to the list for debug info +list(APPEND REACTORC_SOURCES ${LF_PLATFORM_FILES}) diff --git a/core/platform/lf_arduino_support.c b/core/platform/lf_arduino_support.c index 57c28d1ec4..ded2850e13 100644 --- a/core/platform/lf_arduino_support.c +++ b/core/platform/lf_arduino_support.c @@ -95,7 +95,7 @@ int lf_sleep(interval_t sleep_duration) { do { _lf_clock_now(&now); } while ((now < wakeup)); - + return 0; } /** diff --git a/core/reactor.c b/core/reactor.c index a41a0847d1..ceace5125d 100644 --- a/core/reactor.c +++ b/core/reactor.c @@ -364,21 +364,25 @@ int lf_reactor_c_main(int argc, const char* argv[]) { _lf_create_environments(); // code-generated function environment_t *env; int num_environments = _lf_get_environments(&env); - lf_assert(num_environments == 1, + LF_ASSERT(num_environments == 1, "Found %d environments. Only 1 can be used with the single-threaded runtime", num_environments); LF_PRINT_DEBUG("Initializing."); initialize_global(); // Set start time start_time = lf_time_physical(); + + LF_PRINT_DEBUG("NOTE: FOREVER is displayed as " PRINTF_TAG " and NEVER as " PRINTF_TAG, + FOREVER_TAG.time - start_time, FOREVER_TAG.microstep, + NEVER_TAG.time - start_time, 0); + environment_init_tags(env, start_time, duration); - // Start tracing if enalbed + // Start tracing if enabled. start_trace(env->trace); #ifdef MODAL_REACTORS // Set up modal infrastructure _lf_initialize_modes(env); #endif - _lf_execution_started = true; _lf_trigger_startup_reactions(env); _lf_initialize_timers(env); // If the stop_tag is (0,0), also insert the shutdown @@ -389,9 +393,11 @@ int lf_reactor_c_main(int argc, const char* argv[]) { } LF_PRINT_DEBUG("Running the program's main loop."); // Handle reactions triggered at time (T,m). + env->execution_started = true; if (_lf_do_step(env)) { while (next(env) != 0); } + _lf_normal_termination = true; return 0; } else { return -1; diff --git a/core/reactor_common.c b/core/reactor_common.c index 5422d1453b..9382029091 100644 --- a/core/reactor_common.c +++ b/core/reactor_common.c @@ -99,12 +99,6 @@ unsigned int _lf_number_of_workers = 0u; */ instant_t duration = -1LL; -/** - * Indicates whether or not the execution - * has started. - */ -bool _lf_execution_started = false; - /** Indicator of whether the keepalive command-line option was given. */ bool keepalive_specified = false; @@ -116,6 +110,16 @@ bool keepalive_specified = false; */ interval_t _lf_fed_STA_offset = 0LL; +void _lf_print_event(void* event) { + if (event == NULL) { + printf("NULL"); + } else { + event_t* ev = (event_t*)event; + lf_print("Event: Time=" PRINTF_TIME ", dummy=%d, timer=%d", + ev->time - start_time, ev->is_dummy, ev->trigger->is_timer); + } +} + /** * Allocate memory using calloc (so the allocated memory is zeroed out) * and record the allocated memory on the specified self struct so that @@ -266,6 +270,11 @@ void _lf_trigger_reaction(environment_t* env, reaction_t* reaction, int worker_n */ void _lf_start_time_step(environment_t *env) { assert(env != GLOBAL_ENVIRONMENT); + if (!env->execution_started) { + // Execution hasn't started, so this is probably being invoked in termination + // due to an error. + return; + } LF_PRINT_LOG("--------- Start time step at tag " PRINTF_TAG ".", env->current_tag.time - start_time, env->current_tag.microstep); // Handle dynamically created tokens for mutable inputs. _lf_free_token_copies(env); @@ -291,22 +300,29 @@ void _lf_start_time_step(environment_t *env) { } } } + env->is_present_fields_abbreviated_size = 0; + +#ifdef FEDERATED + // If the environment is the top-level one, we have some work to do. + environment_t *envs; + int num_envs = _lf_get_environments(&envs); + if (env == envs) { + // This is the top-level environment. #ifdef FEDERATED_DECENTRALIZED - for (int i = 0; i < env->is_present_fields_size; i++) { - // FIXME: For now, an intended tag of (NEVER, 0) - // indicates that it has never been set. - *env->_lf_intended_tag_fields[i] = (tag_t) {NEVER, 0}; + for (int i = 0; i < env->is_present_fields_size; i++) { + // An intended tag of NEVER_TAG indicates that it has never been set. + *env->_lf_intended_tag_fields[i] = NEVER_TAG; + } +#endif // FEDERATED_DECENTRALIZED + + // Reset absent fields on network ports because + // their status is unknown + lf_reset_status_fields_on_input_port_triggers(); + // Signal the helper thread to reset its progress since the logical time has changed. + lf_cond_signal(&lf_current_tag_changed); } -#endif -#ifdef FEDERATED - // Reset absent fields on network ports because - // their status is unknown - reset_status_fields_on_input_port_triggers(); - // Signal the helper thread to reset its progress since the logical time has changed. - lf_cond_signal(&logical_time_changed); -#endif - env->is_present_fields_abbreviated_size = 0; +#endif // FEDERATED } /** @@ -349,8 +365,8 @@ void _lf_pop_events(environment_t *env) { } #ifdef MODAL_REACTORS - // If this event is associated with an incative it should haven been suspended and no longer on the event queue. - // FIXME This should not be possible + // If this event is associated with an inactive mode it should haven been suspended and no longer on the event queue. + // NOTE: This should not be possible if (!_lf_mode_is_active(event->trigger->mode)) { lf_print_warning("Assumption violated. There is an event on the event queue that is associated to an inactive mode."); } @@ -366,22 +382,28 @@ void _lf_pop_events(environment_t *env) { #ifdef FEDERATED_DECENTRALIZED // In federated execution, an intended tag that is not (NEVER, 0) // indicates that this particular event is triggered by a network message. - // The intended tag is set in handle_timed_message in federate.c whenever - // a timed message arrives from another federate. + // The intended tag is set in handle_tagged_message in federate.c whenever + // a tagged message arrives from another federate. if (event->intended_tag.time != NEVER) { // If the intended tag of the event is actually set, // transfer the intended tag to the trigger so that // the reaction can access the value. event->trigger->intended_tag = event->intended_tag; // And check if it is in the past compared to the current tag. - if (lf_tag_compare(event->intended_tag, - env->current_tag) < 0) { + if (lf_tag_compare(event->intended_tag, env->current_tag) < 0) { // Mark the triggered reaction with a STP violation reaction->is_STP_violated = true; LF_PRINT_LOG("Trigger %p has violated the reaction's STP offset. Intended tag: " PRINTF_TAG ". Current tag: " PRINTF_TAG, event->trigger, event->intended_tag.time - start_time, event->intended_tag.microstep, env->current_tag.time - start_time, env->current_tag.microstep); + // Need to update the last_known_status_tag of the port because otherwise, + // the MLAA could get stuck, causing the program to lock up. + // This should not call update_last_known_status_on_input_port because we + // are starting a new tag step execution, so there are no reactions blocked on this input. + if (lf_tag_compare(env->current_tag, event->trigger->last_known_status_tag) > 0) { + event->trigger->last_known_status_tag = env->current_tag; + } } } #endif @@ -643,8 +665,8 @@ static void _lf_replace_token(event_t* event, lf_token_t* token) { /** * Schedule events at a specific tag (time, microstep), provided - * that the tag is in the future relative to the current tag. - * The input time values are absolute. + * that the tag is in the future relative to the current tag (or the + * environment has not started executing). The input time values are absolute. * * If there is an event found at the requested tag, the payload * is replaced and 0 is returned. @@ -665,18 +687,19 @@ static void _lf_replace_token(event_t* event, lf_token_t* token) { * @param tag Logical tag of the event * @param token The token wrapping the payload or NULL for no payload. * - * @return 1 for success, 0 if no new event was scheduled (instead, the payload was updated), - * or -1 for error (the tag is equal to or less than the current tag). + * @return A positive trigger handle for success, 0 if no new event was scheduled + * (instead, the payload was updated), or -1 for error (the tag is equal to or less + * than the current tag). */ -int _lf_schedule_at_tag(environment_t* env, trigger_t* trigger, tag_t tag, lf_token_t* token) { +trigger_handle_t _lf_schedule_at_tag(environment_t* env, trigger_t* trigger, tag_t tag, lf_token_t* token) { assert(env != GLOBAL_ENVIRONMENT); tag_t current_logical_tag = env->current_tag; LF_PRINT_DEBUG("_lf_schedule_at_tag() called with tag " PRINTF_TAG " at tag " PRINTF_TAG ".", tag.time - start_time, tag.microstep, current_logical_tag.time - start_time, current_logical_tag.microstep); - if (lf_tag_compare(tag, current_logical_tag) <= 0) { - lf_print_warning("_lf_schedule_at_tag(): requested to schedule an event in the past."); + if (lf_tag_compare(tag, current_logical_tag) <= 0 && env->execution_started) { + lf_print_warning("_lf_schedule_at_tag(): requested to schedule an event at the current or past tag."); return -1; } @@ -827,10 +850,11 @@ int _lf_schedule_at_tag(environment_t* env, trigger_t* trigger, tag_t tag, lf_to if (tag.time == current_logical_tag.time) { relative_microstep -= current_logical_tag.microstep; } - if (((tag.time == current_logical_tag.time) && (relative_microstep == 1)) || + if ((tag.time == current_logical_tag.time && relative_microstep == 1 && env->execution_started) || tag.microstep == 0) { // Do not need a dummy event if we are scheduling at 1 microstep // in the future at current time or at microstep 0 in a future time. + // Note that if execution hasn't started, then we have to insert dummy events. pqueue_insert(env->event_q, e); } else { // Create a dummy event. Insert it into the queue, and let its next @@ -838,7 +862,11 @@ int _lf_schedule_at_tag(environment_t* env, trigger_t* trigger, tag_t tag, lf_to pqueue_insert(env->event_q, _lf_create_dummy_events(env, trigger, tag.time, e, relative_microstep)); } } - return 1; + trigger_handle_t return_value = env->_lf_handle++; + if (env->_lf_handle < 0) { + env->_lf_handle = 1; + } + return return_value; } /** @@ -853,10 +881,10 @@ int _lf_schedule_at_tag(environment_t* env, trigger_t* trigger, tag_t tag, lf_to * the will be freed. Hence, it is essential that the payload be in * memory allocated using malloc. * - * There are three conditions under which this function will not + * There are several conditions under which this function will not * actually put an event on the event queue and decrement the reference count * of the token (if there is one), which could result in the payload being - * freed. In all three cases, this function returns 0. Otherwise, + * freed. In all cases, this function returns 0. Otherwise, * it returns a handle to the scheduled trigger, which is an integer * greater than 0. * @@ -864,7 +892,9 @@ int _lf_schedule_at_tag(environment_t* env, trigger_t* trigger, tag_t tag, lf_to * offset plus the extra delay is greater than zero. * The second condition is that the trigger offset plus the extra delay * is greater that the requested stop time (timeout). - * The third condition is that the trigger argument is null. + * A third condition is that the trigger argument is null. + * Also, an event might not be scheduled if the trigger is an action + * with a `min_spacing` parameter. See the documentation. * * @param env Environment in which we are executing. * @param trigger The trigger to be invoked at a later logical time. @@ -916,7 +946,8 @@ trigger_handle_t _lf_schedule(environment_t *env, trigger_t* trigger, interval_t if (!trigger->is_timer) { delay += trigger->offset; } - interval_t intended_time = env->current_tag.time + delay; + tag_t intended_tag = (tag_t){.time = env->current_tag.time + delay, .microstep = 0}; + LF_PRINT_DEBUG("_lf_schedule: env->current_tag.time = " PRINTF_TIME ". Total logical delay = " PRINTF_TIME "", env->current_tag.time, delay); interval_t min_spacing = trigger->period; @@ -938,7 +969,7 @@ trigger_handle_t _lf_schedule(environment_t *env, trigger_t* trigger, interval_t // modify the intended time. if (trigger->is_physical) { // Get the current physical time and assign it as the intended time. - intended_time = lf_time_physical() + delay; + intended_tag.time = lf_time_physical() + delay; } else { // FIXME: We need to verify that we are executing within a reaction? // See reactor_threaded. @@ -949,11 +980,11 @@ trigger_handle_t _lf_schedule(environment_t *env, trigger_t* trigger, interval_t // - we have eliminated the possibility to have a negative additional delay; and // - we detect the asynchronous use of logical actions #ifndef NDEBUG - if (intended_time < env->current_tag.time) { + if (intended_tag.time < env->current_tag.time) { lf_print_warning("Attempting to schedule an event earlier than current time by " PRINTF_TIME " nsec! " "Revising to the current time " PRINTF_TIME ".", - env->current_tag.time - intended_time, env->current_tag.time); - intended_time = env->current_tag.time; + env->current_tag.time - intended_tag.time, env->current_tag.time); + intended_tag.time = env->current_tag.time; } #endif } @@ -964,11 +995,9 @@ trigger_handle_t _lf_schedule(environment_t *env, trigger_t* trigger, interval_t e->intended_tag = trigger->intended_tag; #endif - event_t* existing = (event_t*)(trigger->last); // Check for conflicts (a queued event with the same trigger and time). - if (trigger->period < 0) { + if (min_spacing <= 0) { // No minimum spacing defined. - tag_t intended_tag = (tag_t) {.time = intended_time, .microstep = 0u}; e->time = intended_tag.time; event_t* found = (event_t *)pqueue_find_equal_same_priority(env->event_q, e); // Check for conflicts. Let events pile up in super dense time. @@ -988,75 +1017,61 @@ trigger_handle_t _lf_schedule(environment_t *env, trigger_t* trigger, interval_t } // Hook the event into the list. found->next = e; + trigger->last_tag = intended_tag; return(0); // FIXME: return value } // If there are not conflicts, schedule as usual. If intended time is // equal to the current logical time, the event will effectively be // scheduled at the next microstep. - } else if (!trigger->is_timer && existing != NULL) { - // There exists a previously scheduled event. It determines the + } else if (!trigger->is_timer && trigger->last_tag.time != NEVER) { + // There is a min_spacing and there exists a previously + // scheduled event. It determines the // earliest time at which the new event can be scheduled. // Check to see whether the event is too early. - instant_t earliest_time = existing->time + min_spacing; + instant_t earliest_time = trigger->last_tag.time + min_spacing; LF_PRINT_DEBUG("There is a previously scheduled event; earliest possible time " "with min spacing: " PRINTF_TIME, earliest_time); // If the event is early, see which policy applies. - if (earliest_time >= intended_time) { + if (earliest_time > intended_tag.time) { LF_PRINT_DEBUG("Event is early."); switch(trigger->policy) { case drop: LF_PRINT_DEBUG("Policy is drop. Dropping the event."); - if (min_spacing > 0 || - pqueue_find_equal_same_priority(env->event_q, existing) != NULL) { - // Recycle the new event and the token. - if (existing->token != token) { - _lf_done_using(token); - } - _lf_recycle_event(env, e); - return(0); - } + // Recycle the new event and decrement the + // reference count of the token. + _lf_done_using(token); + _lf_recycle_event(env, e); + return(0); case replace: LF_PRINT_DEBUG("Policy is replace. Replacing the previous event."); - // If the existing event has not been handled yet, update - // it. WARNING: If provide a mechanism for unscheduling, we - // can no longer rely on the tag of the existing event to - // determine whether or not it has been recycled (the - // existing->time < env->current_tag.time case below). - // NOTE: Because microsteps are not explicit, if the tag of - // the preceding event is equal to the current time, then - // we search the event queue to figure out whether it has - // been handled yet. - if (existing->time > env->current_tag.time || - (existing->time == env->current_tag.time && - pqueue_find_equal_same_priority(env->event_q, existing) != NULL)) { + // If the event with the previous time is still on the event + // queue, then replace the token. To find this event, we have + // to construct a dummy event_t struct. + event_t* dummy = _lf_get_new_event(env); + dummy->next = NULL; + dummy->trigger = trigger; + dummy->time = trigger->last_tag.time; + event_t* found = (event_t *)pqueue_find_equal_same_priority(env->event_q, dummy); + + if (found != NULL) { // Recycle the existing token and the new event // and update the token of the existing event. - _lf_replace_token(existing, token); + _lf_replace_token(found, token); _lf_recycle_event(env, e); + _lf_recycle_event(env, dummy); + // Leave the last_tag the same. return(0); } + _lf_recycle_event(env, dummy); + // If the preceding event _has_ been handled, then adjust // the tag to defer the event. - intended_time = earliest_time; + intended_tag = (tag_t){.time = earliest_time, .microstep = 0}; break; default: - if (existing->time == env->current_tag.time && - pqueue_find_equal_same_priority(env->event_q, existing) != NULL) { - if (_lf_is_tag_after_stop_tag(env, (tag_t){.time=existing->time,.microstep=env->current_tag.microstep+1})) { - // Scheduling e will incur a microstep at timeout, - // which is illegal. - _lf_recycle_event(env, e); - return 0; - } - // If the last event hasn't been handled yet, insert - // the new event right behind. - existing->next = e; - return 0; // FIXME: return a value - } else { - // Adjust the tag. - intended_time = earliest_time; - } + // Default policy is defer + intended_tag = (tag_t){.time = earliest_time, .microstep = 0}; break; } } @@ -1067,16 +1082,16 @@ trigger_handle_t _lf_schedule(environment_t *env, trigger_t* trigger, interval_t // FIXME: This is a development assertion and might // not be necessary for end-user LF programs #ifndef NDEBUG - if (intended_time < env->current_tag.time) { + if (intended_tag.time < env->current_tag.time) { lf_print_error("Attempting to schedule an event earlier than current time by " PRINTF_TIME " nsec! " "Revising to the current time " PRINTF_TIME ".", - env->current_tag.time - intended_time, env->current_tag.time); - intended_time = env->current_tag.time; + env->current_tag.time - intended_tag.time, env->current_tag.time); + intended_tag.time = env->current_tag.time; } #endif // Set the tag of the event. - e->time = intended_time; + e->time = intended_tag.time; // Do not schedule events if if the event time is past the stop time // (current microsteps are checked earlier). @@ -1088,10 +1103,9 @@ trigger_handle_t _lf_schedule(environment_t *env, trigger_t* trigger, interval_t return(0); } - // Store a pointer to the current event in order to check the min spacing - // between this and the following event. Only necessary for actions - // that actually specify a min spacing. - trigger->last = (event_t*)e; + // Store the time in order to check the min spacing + // between this and any following event. + trigger->last_tag = intended_tag; // Queue the event. // NOTE: There is no need for an explicit microstep because @@ -1110,7 +1124,7 @@ trigger_handle_t _lf_schedule(environment_t *env, trigger_t* trigger, interval_t // NOTE: Rather than wrapping around to get a negative number, // we reset the handle on the assumption that much earlier // handles are irrelevant. - int return_value = env->_lf_handle++; + trigger_handle_t return_value = env->_lf_handle++; if (env->_lf_handle < 0) { env->_lf_handle = 1; } @@ -1381,7 +1395,7 @@ void schedule_output_reactions(environment_t *env, reaction_t* reaction, int wor #ifdef FEDERATED_DECENTRALIZED // Only pass down STP violation for federated programs that use decentralized coordination. // Extract the inherited STP violation bool inherited_STP_violation = reaction->is_STP_violated; - LF_PRINT_LOG("Reaction %s has STP violation status: %d.", reaction->name, reaction->is_STP_violated); + LF_PRINT_DEBUG("Reaction %s has STP violation status: %d.", reaction->name, reaction->is_STP_violated); #endif LF_PRINT_DEBUG("There are %zu outputs from reaction %s.", reaction->num_outputs, reaction->name); for (size_t i=0; i < reaction->num_outputs; i++) { @@ -1539,6 +1553,8 @@ void usage(int argc, const char* argv[]) { #ifdef FEDERATED printf(" -r, --rti \n"); printf(" The address of the RTI, which can be in the form of user@host:port or ip:port.\n\n"); + printf(" -l\n"); + printf(" Send stdout to individual log files for each federate.\n\n"); #endif printf("Command given:\n"); @@ -1662,7 +1678,7 @@ int process_args(int argc, const char* argv[]) { return 0; } const char* fid = argv[i++]; - set_federation_id(fid); + lf_set_federation_id(fid); lf_print("Federation ID for executable %s: %s", argv[0], fid); } else if (strcmp(arg, "-r") == 0 || strcmp(arg, "--rti") == 0) { if (argc < i + 1) { @@ -1670,7 +1686,7 @@ int process_args(int argc, const char* argv[]) { usage(argc, argv); return 0; } - parse_rti_code_t code = parse_rti_addr(argv[i++]); + parse_rti_code_t code = lf_parse_rti_addr(argv[i++]); if (code != SUCCESS) { switch (code) { case INVALID_HOST: @@ -1721,87 +1737,110 @@ void initialize_global(void) { // Federation trace object must be set before `initialize_trigger_objects` is called because it // uses tracing functionality depending on that pointer being set. #ifdef FEDERATED - set_federation_trace_object(envs->trace); + lf_set_federation_trace_object(envs->trace); #endif // Call the code-generated function to initialize all actions, timers, and ports // This is done for all environments/enclaves at the same time. _lf_initialize_trigger_objects() ; } +/** + * Flag to prevent termination function from executing twice and to signal to background + * threads to terminate. + */ +bool _lf_termination_executed = false; + +/** Flag used to disable cleanup operations on abnormal termination. */ +bool _lf_normal_termination = false; + /** * Report elapsed logical and physical times and report if any * memory allocated by set_new, set_new_array, or lf_writable_copy * has not been freed. */ void termination(void) { + if (_lf_termination_executed) return; + _lf_termination_executed = true; + environment_t *env; int num_envs = _lf_get_environments(&env); // Invoke the code generated termination function. It terminates the federated related services. - // It should only be called for the top-level environment, which, after convention, is the first environment. + // It should only be called for the top-level environment, which, by convention, is the first environment. terminate_execution(env); - // In order to free tokens, we perform the same actions we would have for a new time step. - for (int i = 0; iid); - if (!env->initialized) { - lf_print_warning("---- Environment %u was never initialized", env->id); + for (int i = 0; i < num_envs; i++) { + if (!env[i].initialized) { + lf_print_warning("---- Environment %u was never initialized", env[i].id); continue; } + LF_PRINT_LOG("---- Terminating environment %u, normal termination: %d", env[i].id, _lf_normal_termination); // Stop any tracing, if it is running. - stop_trace(env->trace); + // No need to acquire a mutex because if this is normal termination, all + // other threads have stopped, and if it's not, then acquiring a mutex could + // lead to a deadlock. + stop_trace_locked(env[i].trace); - _lf_start_time_step(env); + // Skip most cleanup on abnormal termination. + if (_lf_normal_termination) { + _lf_start_time_step(&env[i]); #ifdef MODAL_REACTORS - // Free events and tokens suspended by modal reactors. - _lf_terminate_modal_reactors(env); + // Free events and tokens suspended by modal reactors. + _lf_terminate_modal_reactors(&env[i]); #endif - - // If the event queue still has events on it, report that. - if (env->event_q != NULL && pqueue_size(env->event_q) > 0) { - lf_print_warning("---- There are %zu unprocessed future events on the event queue.", pqueue_size(env->event_q)); - event_t* event = (event_t*)pqueue_peek(env->event_q); - interval_t event_time = event->time - start_time; - lf_print_warning("---- The first future event has timestamp " PRINTF_TIME " after start time.", event_time); - } - // Print elapsed times. - // If these are negative, then the program failed to start up. - interval_t elapsed_time = lf_time_logical_elapsed(env); - if (elapsed_time >= 0LL) { - char time_buffer[29]; // 28 bytes is enough for the largest 64 bit number: 9,223,372,036,854,775,807 - lf_comma_separated_time(time_buffer, elapsed_time); - printf("---- Elapsed logical time (in nsec): %s\n", time_buffer); - - // If start_time is 0, then execution didn't get far enough along - // to initialize this. - if (start_time > 0LL) { - lf_comma_separated_time(time_buffer, lf_time_physical_elapsed()); - printf("---- Elapsed physical time (in nsec): %s\n", time_buffer); + // If the event queue still has events on it, report that. + if (env[i].event_q != NULL && pqueue_size(env[i].event_q) > 0) { + lf_print_warning("---- There are %zu unprocessed future events on the event queue.", pqueue_size(env[i].event_q)); + event_t* event = (event_t*)pqueue_peek(env[i].event_q); + interval_t event_time = event->time - start_time; + lf_print_warning("---- The first future event has timestamp " PRINTF_TIME " after start time.", event_time); + } + // Print elapsed times. + // If these are negative, then the program failed to start up. + interval_t elapsed_time = lf_time_logical_elapsed(&env[i]); + if (elapsed_time >= 0LL) { + char time_buffer[29]; // 28 bytes is enough for the largest 64 bit number: 9,223,372,036,854,775,807 + lf_comma_separated_time(time_buffer, elapsed_time); + printf("---- Elapsed logical time (in nsec): %s\n", time_buffer); + + // If start_time is 0, then execution didn't get far enough along + // to initialize this. + if (start_time > 0LL) { + lf_comma_separated_time(time_buffer, lf_time_physical_elapsed()); + printf("---- Elapsed physical time (in nsec): %s\n", time_buffer); + } } } - - // Free up memory associated with environment - environment_free(env); - - env++; - } - _lf_free_all_tokens(); // Must be done before freeing reactors. - // Issue a warning if a memory leak has been detected. - if (_lf_count_payload_allocations > 0) { - lf_print_warning("Memory allocated for messages has not been freed."); - lf_print_warning("Number of unfreed messages: %d.", _lf_count_payload_allocations); - } - if (_lf_count_token_allocations > 0) { - lf_print_warning("Memory allocated for tokens has not been freed!"); - lf_print_warning("Number of unfreed tokens: %d.", _lf_count_token_allocations); } + // Skip most cleanup on abnormal termination. + if (_lf_normal_termination) { + _lf_free_all_tokens(); // Must be done before freeing reactors. + // Issue a warning if a memory leak has been detected. + if (_lf_count_payload_allocations > 0) { + lf_print_warning("Memory allocated for messages has not been freed."); + lf_print_warning("Number of unfreed messages: %d.", _lf_count_payload_allocations); + } + if (_lf_count_token_allocations > 0) { + lf_print_warning("Memory allocated for tokens has not been freed!"); + lf_print_warning("Number of unfreed tokens: %d.", _lf_count_token_allocations); + } #if !defined(LF_SINGLE_THREADED) - for (int i = 0; i < _lf_watchdog_count; i++) { - if (_lf_watchdogs[i].base->reactor_mutex != NULL) { - free(_lf_watchdogs[i].base->reactor_mutex); + for (int i = 0; i < _lf_watchdog_count; i++) { + if (_lf_watchdogs[i].base->reactor_mutex != NULL) { + free(_lf_watchdogs[i].base->reactor_mutex); + } } - } #endif - _lf_free_all_reactors(); + _lf_free_all_reactors(); + + // Free up memory associated with environment. + // Do this last so that printed warnings don't access freed memory. + for (int i = 0; i < num_envs; i++) { + environment_free(&env[i]); + } +#if defined LF_ENCLAVES + free_local_rti(); +#endif + } } diff --git a/core/tag.c b/core/tag.c index 3e9c366ff9..955f3b5ae0 100644 --- a/core/tag.c +++ b/core/tag.c @@ -99,13 +99,14 @@ instant_t _lf_physical_time() { _lf_last_reported_physical_time_ns = adjusted_clock_ns; } + /* Possibly useful, but usually noisy: LF_PRINT_DEBUG("Physical time: " PRINTF_TIME ". Elapsed: " PRINTF_TIME ". Offset: " PRINTF_TIME, _lf_last_reported_physical_time_ns, _lf_last_reported_physical_time_ns - start_time, _lf_time_physical_clock_offset + _lf_time_test_physical_clock_offset); - + */ return _lf_last_reported_physical_time_ns; } @@ -116,14 +117,23 @@ tag_t lf_tag(void *env) { return ((environment_t *)env)->current_tag; } +tag_t lf_tag_add(tag_t a, tag_t b) { + if (a.time == NEVER || b.time == NEVER) return NEVER_TAG; + if (a.time == FOREVER || b.time == FOREVER) return FOREVER_TAG; + if (b.time > 0) a.microstep = 0; // Ignore microstep of first arg if time of second is > 0. + tag_t result = {.time = a.time + b.time, .microstep = a.microstep + b.microstep}; + if (result.microstep < a.microstep) return FOREVER_TAG; + if (result.time < a.time && b.time > 0) return FOREVER_TAG; + if (result.time > a.time && b.time < 0) return NEVER_TAG; + return result; +} + int lf_tag_compare(tag_t tag1, tag_t tag2) { if (tag1.time < tag2.time) { - LF_PRINT_DEBUG(PRINTF_TIME " < " PRINTF_TIME, tag1.time, tag2.time); return -1; } else if (tag1.time > tag2.time) { return 1; } else if (tag1.microstep < tag2.microstep) { - LF_PRINT_DEBUG(PRINTF_TIME " and microstep < " PRINTF_TIME, tag1.time, tag2.time); return -1; } else if (tag1.microstep > tag2.microstep) { return 1; @@ -134,6 +144,8 @@ int lf_tag_compare(tag_t tag1, tag_t tag2) { tag_t lf_delay_tag(tag_t tag, interval_t interval) { if (tag.time == NEVER || interval < 0LL) return tag; + // Note that overflow in C is undefined for signed variables. + if (tag.time >= FOREVER - interval) return FOREVER_TAG; // Overflow. tag_t result = tag; if (interval == 0LL) { // Note that unsigned variables will wrap on overflow. @@ -141,12 +153,7 @@ tag_t lf_delay_tag(tag_t tag, interval_t interval) { // microsteps. result.microstep++; } else { - // Note that overflow in C is undefined for signed variables. - if (FOREVER - interval < result.time) { - result.time = FOREVER; - } else { - result.time += interval; - } + result.time += interval; result.microstep = 0; } return result; @@ -155,7 +162,6 @@ tag_t lf_delay_tag(tag_t tag, interval_t interval) { tag_t lf_delay_strict(tag_t tag, interval_t interval) { tag_t result = lf_delay_tag(tag, interval); if (interval != 0 && interval != NEVER && interval != FOREVER && result.time != NEVER && result.time != FOREVER) { - LF_PRINT_DEBUG("interval=%lld, result time=%lld", (long long) interval, (long long) result.time); result.time -= 1; result.microstep = UINT_MAX; } @@ -167,9 +173,6 @@ instant_t lf_time_logical(void *env) { return ((environment_t *) env)->current_tag.time; } -/** - * Return the elapsed logical time in nanoseconds since the start of execution. - */ interval_t lf_time_logical_elapsed(void *env) { return lf_time_logical(env) - start_time; } @@ -257,10 +260,10 @@ size_t lf_readable_time(char* buffer, instant_t time) { const char* units = "nanoseconds"; if (time % MSEC(1) == (instant_t) 0) { units = "milliseconds"; - time = time % MSEC(1); + time = time / MSEC(1); } else if (time % USEC(1) == (instant_t) 0) { units = "microseconds"; - time = time % USEC(1); + time = time / USEC(1); } size_t printed = lf_comma_separated_time(buffer, time); buffer += printed; diff --git a/core/threaded/CMakeLists.txt b/core/threaded/CMakeLists.txt index 268d6d4a4a..31fce534ac 100644 --- a/core/threaded/CMakeLists.txt +++ b/core/threaded/CMakeLists.txt @@ -8,8 +8,7 @@ set( scheduler_instance.c watchdog.c ) -list(APPEND INFO_SOURCES ${THREADED_SOURCES}) list(TRANSFORM THREADED_SOURCES PREPEND threaded/) -target_sources(core PRIVATE ${THREADED_SOURCES}) +list(APPEND REACTORC_SOURCES ${THREADED_SOURCES}) diff --git a/core/threaded/reactor_threaded.c b/core/threaded/reactor_threaded.c index 749550bc39..b24d703903 100644 --- a/core/threaded/reactor_threaded.c +++ b/core/threaded/reactor_threaded.c @@ -48,6 +48,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "scheduler.h" #include "tag.h" #include "environment.h" +#include "rti_local.h" #ifdef FEDERATED #include "federate.h" @@ -64,16 +65,6 @@ extern instant_t start_time; */ #define MAX_STALL_INTERVAL MSEC(1) -/** - * Unless the "fast" option is given, an LF program will wait until - * physical time matches logical time before handling an event with - * a given logical time. The amount of time is less than this given - * threshold, then no wait will occur. The purpose of this is - * to prevent unnecessary delays caused by simply setting up and - * performing the wait. - */ -#define MIN_SLEEP_DURATION USEC(10) - /** * Global mutex, used for synchronizing across environments. Mainly used for token-management and tracing */ @@ -240,28 +231,26 @@ void _lf_set_present(lf_port_base_t* port) { } } -// Forward declaration. See federate.h -void synchronize_with_other_federates(void); - /** * Wait until physical time matches or exceeds the specified logical time, - * unless -fast is given. + * unless -fast is given. For decentralized coordination, this function will + * add the STA offset to the wait time. * * If an event is put on the event queue during the wait, then the wait is * interrupted and this function returns false. It also returns false if the - * timeout time is reached before the wait has completed. + * timeout time is reached before the wait has completed. Note this this could + * return true even if the a new event was placed on the queue if that event + * time matches or exceeds the specified time. * - * The mutex lock is assumed to be held by the calling thread. - * Note this this could return true even if the a new event - * was placed on the queue if that event time matches or exceeds - * the specified time. + * The mutex lock associated with the condition argument is assumed to be held by + * the calling thread. This mutex is released while waiting. If the wait time is + * too small to actually wait (less than MIN_SLEEP_DURATION), then this function + * immediately returns true and the mutex is not released. * * @param env Environment within which we are executing. * @param logical_time Logical time to wait until physical time matches it. - * @param return_if_interrupted If this is false, then wait_util will wait - * until physical time matches the logical time regardless of whether new - * events get put on the event queue. This is useful, for example, for - * synchronizing the start of the program. + * @param condition A condition variable that can interrupt the wait. The mutex + * associated with this condition variable will be released during the wait. * * @return Return false if the wait is interrupted either because of an event * queue signal or if the wait time was interrupted early by reaching @@ -392,11 +381,6 @@ tag_t get_next_event_tag(environment_t *env) { return next_tag; } -#ifdef FEDERATED_CENTRALIZED -// The following is defined in federate.c and used in the following function. -tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply); -#endif - /** * In a federated execution with centralized coordination, this function returns * a tag that is less than or equal to the specified tag when, as far @@ -412,8 +396,10 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply * @return The tag to which it is safe to advance. */ tag_t send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply) { -#ifdef FEDERATED_CENTRALIZED - return _lf_send_next_event_tag(env, tag, wait_for_reply); +#if defined(FEDERATED_CENTRALIZED) + return lf_send_next_event_tag(env, tag, wait_for_reply); +#elif defined(LF_ENCLAVES) + return rti_next_event_tag_locked(env->enclave_info, tag); #else return tag; #endif @@ -447,10 +433,30 @@ void _lf_next_locked(environment_t *env) { _lf_handle_mode_changes(env); #endif - // Previous logical time is complete. + // Get the tag of the next event on the event queue. tag_t next_tag = get_next_event_tag(env); -#ifdef FEDERATED_CENTRALIZED +#if defined LF_ENCLAVES + // Request permission to advance time. This call might block. + tag_t grant_tag = rti_next_event_tag_locked(env->enclave_info, next_tag); + + // If we received are granted a tag which is less than the requested tag + // then we return and re-do the next function. We might have gotten a new + // event on the event queue. + if (lf_tag_compare(grant_tag, next_tag) < 0) return; + + // Next event might have changed while waiting for the TAG + next_tag = get_next_event_tag(env); + + // Check for starvation. If our next tag is FOREVER_TAG now. This means that + // we have no events on our event queue and that the RTI has granted us a TAG + // to advance to FOREVER. I.e. all upstream enclaves have terminated and sent + // an LTC for FOREVER. We can, in this case, terminate the current enclave. + if(!keepalive_specified && lf_tag_compare(next_tag, FOREVER_TAG) == 0) { + _lf_set_stop_tag(env, (tag_t){.time=env->current_tag.time,.microstep=env->current_tag.microstep+1}); + next_tag = get_next_event_tag(env); + } +#elif defined FEDERATED_CENTRALIZED // In case this is in a federation with centralized coordination, notify // the RTI of the next earliest tag at which this federate might produce // an event. This function may block until it is safe to advance the current @@ -475,7 +481,7 @@ void _lf_next_locked(environment_t *env) { // allow keepalive to be either true or false and could get the same // behavior with centralized coordination as with unfederated execution. -#else // not FEDERATED_CENTRALIZED +#else // not FEDERATED_CENTRALIZED nor LF_ENCLAVES if (pqueue_peek(env->event_q) == NULL && !keepalive_specified) { // There is no event on the event queue and keepalive is false. // No event in the queue @@ -558,10 +564,10 @@ void _lf_next_locked(environment_t *env) { // stick them into the reaction queue. _lf_pop_events(env); #ifdef FEDERATED - enqueue_port_absent_reactions(env); + lf_enqueue_port_absent_reactions(env); // _lf_pop_events may have set some triggers present. extern federate_instance_t _fed; - update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); + lf_update_max_level(_fed.last_TAG, _fed.is_last_TAG_provisional); #endif } @@ -573,9 +579,11 @@ bool lf_stop_requested = false; // See reactor.h for docs. void lf_request_stop() { // If a requested stop is pending, return without doing anything. + LF_PRINT_LOG("lf_request_stop() has been called."); lf_mutex_lock(&global_mutex); if (lf_stop_requested) { lf_mutex_unlock(&global_mutex); + LF_PRINT_LOG("Ignoring redundant lf_request_stop() call."); return; } lf_stop_requested = true; @@ -597,10 +605,10 @@ void lf_request_stop() { } #ifdef FEDERATED - // In the federated case, do not set lf_stop_requested because the RTI might grant a + // In the federated case, the RTI might grant a // later stop tag than the current tag. The above code has raised - // a barrier no greater than the requested stop tag for each enclave. - if (_lf_fd_send_stop_request_to_rti(max_current_tag) != 0) { + // a barrier no greater than max_current_tag. + if (lf_send_stop_request_to_rti(max_current_tag) != 0) { // Message was not sent to the RTI. // Decrement the barriers to reverse our previous increment. for (int i = 0; i < num_environments; i++) { @@ -668,38 +676,35 @@ void _lf_initialize_start_tag(environment_t *env) { // Add reactions invoked at tag (0,0) (including startup reactions) to the reaction queue _lf_trigger_startup_reactions(env); -#ifdef FEDERATED +#if defined FEDERATED // If env is the environment for the top-level enclave, then initialize the federate. environment_t *top_level_env; _lf_get_environments(&top_level_env); if (env == top_level_env) { // Reset status fields before talking to the RTI to set network port // statuses to unknown - reset_status_fields_on_input_port_triggers(); + lf_reset_status_fields_on_input_port_triggers(); // Get a start_time from the RTI - synchronize_with_other_federates(); // Resets start_time in federated execution according to the RTI. + lf_synchronize_with_other_federates(); // Resets start_time in federated execution according to the RTI. } + // The start time will likely have changed. Adjust the current tag and stop tag. env->current_tag = (tag_t){.time = start_time, .microstep = 0u}; if (duration >= 0LL) { // A duration has been specified. Recalculate the stop time. env->stop_tag = ((tag_t) {.time = start_time + duration, .microstep = 0}); } -#endif _lf_initialize_timers(env); - // If the stop_tag is (0,0), also insert the shutdown - // reactions. This can only happen if the timeout time - // was set to 0. - if (lf_tag_compare(env->current_tag, env->stop_tag) >= 0) { - _lf_trigger_shutdown_reactions(env); - } + // If we have a non-zero STA offset, then we need to allow messages to arrive + // prior to the start time. To avoid spurious STP violations, we temporarily + // set the current time back by the STA offset. + env->current_tag = (tag_t){.time = start_time - _lf_fed_STA_offset, .microstep = 0u}; -#ifdef FEDERATED // Call wait_until if federated. This is required because the startup procedure - // in synchronize_with_other_federates() can decide on a new start_time that is + // in lf_synchronize_with_other_federates() can decide on a new start_time that is // larger than the current physical time. // Therefore, if --fast was not specified, wait until physical time matches // or exceeds the start time. Microstep is ignored. @@ -713,17 +718,24 @@ void _lf_initialize_start_tag(environment_t *env) { // a chance to process incoming messages while utilizing the STA. LF_PRINT_LOG("Waiting for start time " PRINTF_TIME " plus STA " PRINTF_TIME ".", start_time, _lf_fed_STA_offset); - // Ignore interrupts to this wait. We don't want to start executing until - // physical time matches or exceeds the logical start time. - while (!wait_until(env, start_time, &env->event_q_changed)) {} - LF_PRINT_DEBUG("Done waiting for start time " PRINTF_TIME ".", start_time); - LF_PRINT_DEBUG("Physical time is ahead of current time by " PRINTF_TIME ". This should be small.", + // Here we wait until the start time and also release the environment mutex. + // this means that the other worker threads will be allowed to start. We need + // this to avoid potential deadlock in federated startup. + while(!wait_until(env, start_time + _lf_fed_STA_offset, &env->event_q_changed)) {}; + LF_PRINT_DEBUG("Done waiting for start time + STA offset " PRINTF_TIME ".", start_time + _lf_fed_STA_offset); + LF_PRINT_DEBUG("Physical time is ahead of current time by " PRINTF_TIME + ". This should be close to the STA offset.", lf_time_physical() - start_time); - // Each federate executes the start tag (which is the current - // tag). Inform the RTI of this if needed. - send_next_event_tag(env, env->current_tag, true); -#endif + // Restore the current tag to match the start time. + env->current_tag = (tag_t){.time = start_time, .microstep = 0u}; + + // If the stop_tag is (0,0), also insert the shutdown + // reactions. This can only happen if the timeout time + // was set to 0. + if (lf_tag_compare(env->current_tag, env->stop_tag) >= 0) { + _lf_trigger_shutdown_reactions(env); + } #ifdef FEDERATED_DECENTRALIZED // In federated execution (at least under decentralized coordination), @@ -734,12 +746,32 @@ void _lf_initialize_start_tag(environment_t *env) { // once the complete message has been read. Here, we wait for that barrier // to be removed, if appropriate before proceeding to executing tag (0,0). _lf_wait_on_tag_barrier(env, (tag_t){.time=start_time,.microstep=0}); - spawn_staa_thread(); -#endif // FEDERATED_DECENTRALIZED + lf_spawn_staa_thread(); + +#else // NOT FEDERATED_DECENTRALIZED + // Each federate executes the start tag (which is the current + // tag). Inform the RTI of this if needed. + send_next_event_tag(env, env->current_tag, true); +#endif // NOT FEDERATED_DECENTRALIZED + + // For messages that may have arrived while we were waiting, put + // reactions on the reaction queue. + _lf_pop_events(env); + +#else // NOT FEDERATED + _lf_initialize_timers(env); + + // If the stop_tag is (0,0), also insert the shutdown + // reactions. This can only happen if the timeout time + // was set to 0. + if (lf_tag_compare(env->current_tag, env->stop_tag) >= 0) { + _lf_trigger_shutdown_reactions(env); + } +#endif // NOT FEDERATED // Set the following boolean so that other thread(s), including federated threads, // know that the execution has started - _lf_execution_started = true; + env->execution_started = true; } /** For logging and debugging, each worker thread is numbered. */ @@ -800,7 +832,7 @@ bool _lf_worker_handle_deadline_violation_for_reaction(environment_t *env, int w * @param worker_number The ID of the worker. * @param reaction The reaction whose STP offset has been violated. * - * @return true if an STP violation occurred. false otherwise. + * @return true if an STP violation occurred and was handled. false otherwise. */ bool _lf_worker_handle_STP_violation_for_reaction(environment_t* env, int worker_number, reaction_t* reaction) { bool violation_occurred = false; @@ -831,6 +863,10 @@ bool _lf_worker_handle_STP_violation_for_reaction(environment_t* env, int worker violation_occurred = true; (*handler)(reaction->self); + // Reset the STP violation flag because it has been dealt with. + // Downstream handlers should not be invoked. + reaction->is_STP_violated = false; + // If the reaction produced outputs, put the resulting // triggered reactions into the queue or execute them directly if possible. schedule_output_reactions(env, reaction, worker_number); @@ -861,7 +897,7 @@ bool _lf_worker_handle_STP_violation_for_reaction(environment_t* env, int worker * @param worker_number The ID of the worker. * @param reaction The reaction. * - * @return true if a violation occurred. false otherwise. + * @return true if a violation occurred and was handled. false otherwise. */ bool _lf_worker_handle_violations(environment_t *env, int worker_number, reaction_t* reaction) { bool violation = false; @@ -898,10 +934,11 @@ void _lf_worker_invoke_reaction(environment_t *env, int worker_number, reaction_ void try_advance_level(environment_t* env, volatile size_t* next_reaction_level) { #ifdef FEDERATED - stall_advance_level_federation(env, *next_reaction_level); + lf_stall_advance_level_federation(env, *next_reaction_level); #endif - *next_reaction_level += 1; + if (*next_reaction_level < SIZE_MAX) *next_reaction_level += 1; } + /** * The main looping logic of each LF worker thread. * This function assumes the caller holds the mutex lock. @@ -919,7 +956,7 @@ void _lf_worker_do_work(environment_t *env, int worker_number) { // lf_print_snapshot(); // This is quite verbose (but very useful in debugging reaction deadlocks). reaction_t* current_reaction_to_execute = NULL; #ifdef FEDERATED - stall_advance_level_federation(env, 0); + lf_stall_advance_level_federation(env, 0); #endif while ((current_reaction_to_execute = lf_sched_get_ready_reaction(env->scheduler, worker_number)) @@ -953,33 +990,56 @@ void _lf_worker_do_work(environment_t *env, int worker_number) { } /** - * Worker thread for the thread pool. - * This acquires the mutex lock and releases it to wait for time to - * elapse or for asynchronous events and also releases it to execute reactions. + * Worker thread for the thread pool. Its argument is the environment within which is working + * The very first worker per environment/enclave is in charge of synchronizing with + * the other enclaves by getting a TAG to (0,0) this might block until upstream enclaves + * have finished tag (0,0). This is unlike federated scheduling where each federate will + * get a PTAG to (0,0) and use network control reactions to handle upstream dependencies * @param arg Environment within which the worker should execute. */ void* worker(void* arg) { environment_t *env = (environment_t* ) arg; - - assert(env != GLOBAL_ENVIRONMENT); - lf_mutex_lock(&env->mutex); - int worker_number = worker_thread_count++; - LF_PRINT_LOG("Worker thread %d started.", worker_number); - lf_mutex_unlock(&env->mutex); - _lf_worker_do_work(env, worker_number); + int worker_number = env->worker_thread_count++; + LF_PRINT_LOG("Environment %u: Worker thread %d started.",env->id, worker_number); + + // If we have scheduling enclaves. The first worker will block here until + // it receives a TAG for tag (0,0) from the local RTI. In federated scheduling + // we use PTAGs to get things started on tag (0,0) but those are not used + // with enclaves. + #if defined LF_ENCLAVES + if (worker_number == 0) { + // If we have scheduling enclaves. We must get a TAG to the start tag. + LF_PRINT_LOG("Environment %u: Worker thread %d waits for TAG to (0,0).",env->id, worker_number); + + tag_t tag_granted = rti_next_event_tag_locked(env->enclave_info, env->current_tag); + LF_ASSERT( lf_tag_compare(tag_granted, env->current_tag) == 0, + "We did not receive a TAG to the start tag."); + } + #endif + // Release mutex and start working. + lf_mutex_unlock(&env->mutex); + _lf_worker_do_work(env, worker_number); lf_mutex_lock(&env->mutex); // This thread is exiting, so don't count it anymore. - worker_thread_count--; + env->worker_thread_count--; - if (worker_thread_count == 0) { + if (env->worker_thread_count == 0) { // The last worker thread to exit will inform the RTI if needed. - // Notify the RTI that there will be no more events (if centralized coord). - // False argument means don't wait for a reply. +#if defined LF_ENCLAVES + // If we have scheduling enclaves. Then we must send a LTC of FOREVER. + // to grant other enclaves a TAG to FOREVER. + // TODO: Can we unify this? Preferraby also have federates send NETs + rti_logical_tag_complete_locked(env->enclave_info, FOREVER_TAG); +#else + // In federated execution we send a NET to the RTI. This will result in + // giving the other federates a PTAG to FOREVER. send_next_event_tag(env, FOREVER_TAG, false); +#endif + } lf_cond_signal(&env->event_q_changed); @@ -1081,6 +1141,10 @@ int lf_reactor_c_main(int argc, const char* argv[]) { // Ignore SIGPIPE errors, which terminate the entire application if // socket write() fails because the reader has closed the socket. // Instead, cause an EPIPE error to be set when write() fails. + // NOTE: The reason for a broken socket causing a SIGPIPE signal + // instead of just having write() return an error is to robutly + // a foo | bar pipeline where bar crashes. The default behavior + // is for foo to also exit. signal(SIGPIPE, SIG_IGN); #endif // SIGPIPE @@ -1113,10 +1177,10 @@ int lf_reactor_c_main(int argc, const char* argv[]) { environment_t *envs; int num_envs = _lf_get_environments(&envs); - if (num_envs > 1) { - // TODO: This must be refined when we introduce multiple enclaves - keepalive_specified = true; - } + +#if defined LF_ENCLAVES + initialize_local_rti(envs, num_envs); +#endif // Do environment-specific setup for (int i = 0; inum_workers, NULL); @@ -1140,11 +1203,10 @@ int lf_reactor_c_main(int argc, const char* argv[]) { lf_print_error_and_exit("Could not lock environment mutex"); } - // Call the following function only once, rather than per worker thread (although - // it can be probably called in that manner as well). + // Initialize start tag + lf_print("Environment %u: ---- Intializing start tag", env->id); _lf_initialize_start_tag(env); - lf_print("Environment %u: ---- Spawning %d workers.",env->id, env->num_workers); start_threads(env); // Unlock mutex and allow threads proceed @@ -1171,6 +1233,7 @@ int lf_reactor_c_main(int argc, const char* argv[]) { LF_PRINT_LOG("---- All worker threads exited successfully."); } } + _lf_normal_termination = true; return 0; } diff --git a/core/threaded/scheduler_GEDF_NP.c b/core/threaded/scheduler_GEDF_NP.c index e45763ea72..39e7687639 100644 --- a/core/threaded/scheduler_GEDF_NP.c +++ b/core/threaded/scheduler_GEDF_NP.c @@ -50,7 +50,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "scheduler_instance.h" #include "scheduler_sync_tag_advance.h" #include "scheduler.h" -#include "semaphore.h" +#include "lf_semaphore.h" #include "trace.h" #include "util.h" diff --git a/core/threaded/scheduler_NP.c b/core/threaded/scheduler_NP.c index a9310aa2f1..9856525db9 100644 --- a/core/threaded/scheduler_NP.c +++ b/core/threaded/scheduler_NP.c @@ -49,7 +49,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "scheduler_instance.h" #include "scheduler_sync_tag_advance.h" #include "scheduler.h" -#include "semaphore.h" +#include "lf_semaphore.h" #include "trace.h" #include "util.h" #include "reactor_threaded.h" @@ -128,7 +128,7 @@ int _lf_sched_distribute_ready_reactions(lf_scheduler_t* scheduler) { scheduler->next_reaction_level - 1 ]; - LF_PRINT_DEBUG("DEBUG: start of rxn queue at %lu is %p", scheduler->next_reaction_level - 1, ((reaction_t**)scheduler->executing_reactions)[0]); + LF_PRINT_DEBUG("Start of rxn queue at %lu is %p", scheduler->next_reaction_level - 1, ((reaction_t**)scheduler->executing_reactions)[0]); if (((reaction_t**)scheduler->executing_reactions)[0] != NULL) { // There is at least one reaction to execute return 1; @@ -279,9 +279,8 @@ void lf_sched_init( if (init_sched_instance(env, &env->scheduler, number_of_workers, params)) { // Scheduler has not been initialized before. if (params == NULL || params->num_reactions_per_level == NULL) { - lf_print_error_and_exit( - "Scheduler: Internal error. The NP scheduler " - "requires params.num_reactions_per_level to be set."); + lf_print_warning("Scheduler initialized with no reactions"); + return; } } else { // Already initialized @@ -332,10 +331,13 @@ void lf_sched_init( * This must be called when the scheduler is no longer needed. */ void lf_sched_free(lf_scheduler_t* scheduler) { - for (size_t j = 0; j <= scheduler->max_reaction_level; j++) { - free(((reaction_t***)scheduler->triggered_reactions)[j]); - } + if (scheduler->triggered_reactions) { + for (size_t j = 0; j <= scheduler->max_reaction_level; j++) { + free(((reaction_t***)scheduler->triggered_reactions)[j]); + } free(scheduler->triggered_reactions); + } + lf_semaphore_destroy(scheduler->semaphore); } diff --git a/core/threaded/scheduler_adaptive.c b/core/threaded/scheduler_adaptive.c index 61d95f5a00..c011361225 100644 --- a/core/threaded/scheduler_adaptive.c +++ b/core/threaded/scheduler_adaptive.c @@ -38,11 +38,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "environment.h" #include "scheduler_sync_tag_advance.h" #include "scheduler.h" +#include "environment.h" #include "util.h" #ifndef MAX_REACTION_LEVEL #define MAX_REACTION_LEVEL INITIAL_REACT_QUEUE_SIZE #endif + +void try_advance_level(environment_t* env, volatile size_t* next_reaction_level); + /////////////////// Forward declarations ///////////////////////// extern bool fast; static void worker_states_lock(lf_scheduler_t* scheduler, size_t worker); @@ -466,7 +470,8 @@ static void advance_level_and_unlock(lf_scheduler_t* scheduler, size_t worker) { return; } } else { - set_level(scheduler, try_advance_level(scheduler->env, &worker_assignments->current_level)); + try_advance_level(scheduler->env, &worker_assignments->current_level); + set_level(scheduler, worker_assignments->current_level); } size_t total_num_reactions = get_num_reactions(scheduler); if (total_num_reactions) { @@ -711,13 +716,13 @@ void lf_sched_init(environment_t* env, size_t number_of_workers, sched_params_t* lf_scheduler_t* scheduler = env->scheduler; scheduler->custom_data = (custom_scheduler_data_t *) calloc(1, sizeof(custom_scheduler_data_t)); - lf_assert(scheduler->custom_data, "Out of memory"); + LF_ASSERT(scheduler->custom_data, "Out of memory"); scheduler->custom_data->worker_states = (worker_states_t *) calloc(1, sizeof(worker_states_t)); - lf_assert(scheduler->custom_data->worker_states, "Out of memory"); + LF_ASSERT(scheduler->custom_data->worker_states, "Out of memory"); scheduler->custom_data->worker_assignments = (worker_assignments_t *) calloc(1, sizeof(worker_assignments_t)); - lf_assert(scheduler->custom_data->worker_assignments, "Out of memory"); + LF_ASSERT(scheduler->custom_data->worker_assignments, "Out of memory"); scheduler->custom_data->data_collection = (data_collection_t *) calloc(1, sizeof(data_collection_t)); - lf_assert(scheduler->custom_data->data_collection, "Out of memory"); + LF_ASSERT(scheduler->custom_data->data_collection, "Out of memory"); worker_states_init(scheduler, number_of_workers); worker_assignments_init(scheduler, number_of_workers, params); diff --git a/core/threaded/scheduler_instance.c b/core/threaded/scheduler_instance.c index afc4f5582f..77f8a21247 100644 --- a/core/threaded/scheduler_instance.c +++ b/core/threaded/scheduler_instance.c @@ -13,7 +13,7 @@ bool init_sched_instance( ) { assert(env != GLOBAL_ENVIRONMENT); - lf_assert(env, "`init_sched_instance` called without env pointer being set"); + LF_ASSERT(env, "`init_sched_instance` called without env pointer being set"); // Check if the instance is already initialized lf_critical_section_enter(env); diff --git a/core/threaded/scheduler_sync_tag_advance.c b/core/threaded/scheduler_sync_tag_advance.c index 4c68a623a1..28d3fa4582 100644 --- a/core/threaded/scheduler_sync_tag_advance.c +++ b/core/threaded/scheduler_sync_tag_advance.c @@ -36,6 +36,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "scheduler_sync_tag_advance.h" +#include "rti_local.h" #include "environment.h" #include "trace.h" #include "util.h" @@ -51,7 +52,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /** * @brief Indicator that execution of at least one tag has completed. */ -static bool _lf_logical_tag_completed = false; +static bool _latest_tag_completed = false; /** * Return true if the worker should stop now; false otherwise. @@ -59,7 +60,7 @@ static bool _lf_logical_tag_completed = false; */ bool should_stop_locked(lf_scheduler_t * sched) { // If this is not the very first step, check against the stop tag to see whether this is the last step. - if (_lf_logical_tag_completed) { + if (_latest_tag_completed) { // If we are at the stop tag, do not call _lf_next_locked() // to prevent advancing the logical time. if (lf_tag_compare(sched->env->current_tag, sched->env->stop_tag) >= 0) { @@ -81,11 +82,17 @@ bool _lf_sched_advance_tag_locked(lf_scheduler_t * sched) { environment_t* env = sched->env; logical_tag_complete(env->current_tag); + // If we are using scheduling enclaves. Notify the local RTI of the time + // advancement. + #if defined LF_ENCLAVES + rti_logical_tag_complete_locked(env->enclave_info, env->current_tag); + #endif + if (should_stop_locked(sched)) { return true; } - _lf_logical_tag_completed = true; + _latest_tag_completed = true; // Advance time. // _lf_next_locked() may block waiting for real time to pass or events to appear. diff --git a/core/trace.c b/core/trace.c index 1839e8f84d..34b7cd5d21 100644 --- a/core/trace.c +++ b/core/trace.c @@ -53,14 +53,13 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fprintf(stderr, "WARNING: Access to trace file failed.\n"); \ fclose(trace->_lf_trace_file); \ trace->_lf_trace_file = NULL; \ - lf_critical_section_exit(trace->env); \ return -1; \ } while(0) trace_t* trace_new(environment_t* env, const char * filename) { trace_t * trace = (trace_t *) calloc(1, sizeof(trace_t)); - lf_assert(trace, "Out of memory"); + LF_ASSERT(trace, "Out of memory"); trace->_lf_trace_stop=1; trace->env = env; @@ -70,7 +69,7 @@ trace_t* trace_new(environment_t* env, const char * filename) { // Allocate memory for the filename on the trace struct trace->filename = (char*) malloc(len * sizeof(char)); - lf_assert(trace->filename, "Out of memory"); + LF_ASSERT(trace->filename, "Out of memory"); // Copy it to the struct strncpy(trace->filename, filename, len); @@ -101,7 +100,7 @@ int _lf_register_trace_event(trace_t* trace, void* pointer1, void* pointer2, _lf } int register_user_trace_event(void *self, char* description) { - lf_assert(self, "Need a pointer to a self struct to register a user trace event"); + LF_ASSERT(self, "Need a pointer to a self struct to register a user trace event"); trace_t * trace = ((self_base_t *) self)->environment->trace; return _lf_register_trace_event(trace, description, NULL, trace_user, description); } @@ -196,7 +195,10 @@ void flush_trace_locked(trace_t* trace, int worker) { // This is deferred to here so that user trace objects can be // registered in startup reactions. if (!trace->_lf_trace_header_written) { - write_trace_header(trace); + if (write_trace_header(trace) < 0) { + lf_print_error("Failed to write trace header. Trace file will be incomplete."); + return; + } trace->_lf_trace_header_written = true; } @@ -380,7 +382,7 @@ void tracepoint_user_event(void* self, char* description) { // But to be safe, then, we have acquire a mutex before calling this // because multiple reactions might be calling the same tracepoint function. // There will be a performance hit for this. - lf_assert(self, "A pointer to the self struct is needed to trace an event"); + LF_ASSERT(self, "A pointer to the self struct is needed to trace an event"); environment_t *env = ((self_base_t *)self)->environment; trace_t *trace = env->trace; lf_critical_section_enter(env); @@ -459,6 +461,11 @@ void tracepoint_reaction_deadline_missed(trace_t* trace, reaction_t *reaction, i void stop_trace(trace_t* trace) { lf_critical_section_enter(trace->env); + stop_trace_locked(trace); + lf_critical_section_exit(trace->env); +} + +void stop_trace_locked(trace_t* trace) { if (trace->_lf_trace_stop) { // Trace was already stopped. Nothing to do. return; @@ -477,16 +484,17 @@ void stop_trace(trace_t* trace) { flush_trace_locked(trace, 0); } trace->_lf_trace_stop = 1; - fclose(trace->_lf_trace_file); - trace->_lf_trace_file = NULL; + if (trace->_lf_trace_file != NULL) { + fclose(trace->_lf_trace_file); + trace->_lf_trace_file = NULL; + } LF_PRINT_DEBUG("Stopped tracing."); - lf_critical_section_exit(trace->env); } //////////////////////////////////////////////////////////// //// For federated execution -#ifdef FEDERATED +#if defined FEDERATED || defined LF_ENCLAVES /** * Trace federate sending a message to the RTI. diff --git a/core/utils/CMakeLists.txt b/core/utils/CMakeLists.txt index 41e96ff509..7ab0db8d44 100644 --- a/core/utils/CMakeLists.txt +++ b/core/utils/CMakeLists.txt @@ -1,9 +1,8 @@ -set(UTIL_SOURCES vector.c pqueue.c util.c semaphore.c) +set(UTIL_SOURCES vector.c pqueue_base.c pqueue_tag.c pqueue.c util.c lf_semaphore.c) -list(APPEND INFO_SOURCES ${UTIL_SOURCES}) list(TRANSFORM UTIL_SOURCES PREPEND utils/) -target_sources(core PRIVATE ${UTIL_SOURCES}) +list(APPEND REACTORC_SOURCES ${UTIL_SOURCES}) # Include sources from subdirectories include(utils/hashset/CMakeLists.txt) diff --git a/core/utils/hashset/CMakeLists.txt b/core/utils/hashset/CMakeLists.txt index 1748b1eec1..ea811f53d7 100644 --- a/core/utils/hashset/CMakeLists.txt +++ b/core/utils/hashset/CMakeLists.txt @@ -1,6 +1,5 @@ set(HASHSET_SOURCES hashset.c hashset_itr.c) -list(APPEND INFO_SOURCES ${HASHSET_SOURCES}) list(TRANSFORM HASHSET_SOURCES PREPEND utils/hashset/) -target_sources(core PRIVATE ${HASHSET_SOURCES}) +list(APPEND REACTORC_SOURCES ${HASHSET_SOURCES}) diff --git a/core/utils/semaphore.c b/core/utils/lf_semaphore.c similarity index 99% rename from core/utils/semaphore.c rename to core/utils/lf_semaphore.c index 244594762e..e2b807e5ce 100644 --- a/core/utils/semaphore.c +++ b/core/utils/lf_semaphore.c @@ -31,7 +31,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * @author{Soroush Bateni } */ -#include "semaphore.h" +#include "lf_semaphore.h" #include /** diff --git a/core/utils/pqueue.c b/core/utils/pqueue.c index aa1f1bd15f..f7fe4bb67c 100644 --- a/core/utils/pqueue.c +++ b/core/utils/pqueue.c @@ -1,473 +1,65 @@ -/* - * Copyright (c) 2014, Volkan Yazıcı - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Modified by Marten Lohstroh (May, 2019). - * Changes: - * - Require implementation of a pqueue_eq_elem_f function to determine - * whether two elements are equal or not; and - * - The provided pqueue_eq_elem_f implementation is used to test and - * search for equal elements present in the queue; and - * - Removed capability to reassign priorities. +/** + * @file pqueue.c + * @author Marten Lohstroh + * @author Edward A. Lee + * @copyright (c) 2020-2023, The University of California at Berkeley. + * License: BSD 2-clause + * + * @brief Priority queue definitions for the event queue and reaction queue. */ -#include -#include -#include -#include -#include - #include "platform.h" #include "pqueue.h" #include "util.h" #include "lf_types.h" -#define LF_LEFT(i) ((i) << 1) -#define LF_RIGHT(i) (((i) << 1) + 1) -#define LF_PARENT(i) ((i) >> 1) - -/** - * Find an element in the queue that matches the given element up to - * and including the given maximum priority. - */ -void* find_equal(pqueue_t *q, void *e, int pos, pqueue_pri_t max) { - if (pos < 0) { - lf_print_error_and_exit("find_equal() called with a negative pos index."); - } - - // Stop the recursion when we've reached the end of the - // queue. This has to be done before accessing the queue - // to avoid segmentation fault. - if (!q || (size_t)pos >= q->size) { - return NULL; - } - - void* rval; - void* curr = q->d[pos]; - - // Stop the recursion when we've surpassed the maximum priority. - if (!curr || q->cmppri(q->getpri(curr), max)) { - return NULL; - } - - if (q->eqelem(curr, e)) { - return curr; - } else { - rval = find_equal(q, e, LF_LEFT(pos), max); - if (rval) - return rval; - else - return find_equal(q, e, LF_RIGHT(pos), max); - } - return NULL; -} - -/** - * Find an element in the queue that matches the given element up to - * but not including the given maximum priority. The matching element - * has to _also_ have the same priority. - */ -void* find_equal_same_priority(pqueue_t *q, void *e, int pos) { - if (pos < 0) { - lf_print_error_and_exit("find_equal_same_priority() called with a negative pos index."); - } - - // Stop the recursion when we've reached the end of the - // queue. This has to be done before accessing the queue - // to avoid segmentation fault. - if (!q || (size_t)pos >= q->size) { - return NULL; - } - - void* rval; - void* curr = q->d[pos]; - - // Stop the recursion once we've surpassed the priority of the element - // we're looking for. - if (!curr || q->cmppri(q->getpri(curr), q->getpri(e))) { - return NULL; - } - - if (q->getpri(curr) == q->getpri(e) && q->eqelem(curr, e)) { - return curr; - } else { - rval = find_equal_same_priority(q, e, LF_LEFT(pos)); - if (rval) - return rval; - else - return find_equal_same_priority(q, e, LF_RIGHT(pos)); - } - - // for (int i=1; i < q->size; i++) { - // if (q->d[i] == e) { - // return q->d[i]; - // } - // } - return NULL; -} - -pqueue_t * pqueue_init(size_t n, - pqueue_cmp_pri_f cmppri, - pqueue_get_pri_f getpri, - pqueue_get_pos_f getpos, - pqueue_set_pos_f setpos, - pqueue_eq_elem_f eqelem, - pqueue_print_entry_f prt) { - pqueue_t *q; - - if (!(q = (pqueue_t*)malloc(sizeof(pqueue_t)))) - return NULL; - - /* Need to allocate n+1 elements since element 0 isn't used. */ - if (!(q->d = (void**)malloc((n + 1) * sizeof(void *)))) { - free(q); - return NULL; - } - - q->size = 1; - q->avail = q->step = (n+1); /* see comment above about n+1 */ - q->cmppri = cmppri; - q->getpri = getpri; - q->getpos = getpos; - q->setpos = setpos; - q->eqelem = eqelem; - q->prt = prt; - return q; -} - -void pqueue_free(pqueue_t *q) { - free(q->d); - free(q); -} - -size_t pqueue_size(pqueue_t *q) { - if (!q) return 0; - // Queue element 0 exists but doesn't count since it isn't used. - return (q->size - 1); -} - -static size_t maxchild(pqueue_t *q, size_t i) { - size_t child_node = LF_LEFT(i); - - if (child_node >= q->size) - return 0; - - if ((child_node+1) < q->size && - (q->cmppri(q->getpri(q->d[child_node]), q->getpri(q->d[child_node+1])))) - child_node++; /* use right child instead of left */ - - return child_node; -} - -static size_t bubble_up(pqueue_t *q, size_t i) { - size_t parent_node; - void *moving_node = q->d[i]; - pqueue_pri_t moving_pri = q->getpri(moving_node); - - for (parent_node = LF_PARENT(i); - ((i > 1) && q->cmppri(q->getpri(q->d[parent_node]), moving_pri)); - i = parent_node, parent_node = LF_PARENT(i)) - { - q->d[i] = q->d[parent_node]; - q->setpos(q->d[i], i); - } - - q->d[i] = moving_node; - q->setpos(moving_node, i); - return i; -} - -static void percolate_down(pqueue_t *q, size_t i) { - size_t child_node; - void *moving_node = q->d[i]; - pqueue_pri_t moving_pri = q->getpri(moving_node); - - while ((child_node = maxchild(q, i)) && - q->cmppri(moving_pri, q->getpri(q->d[child_node]))) - { - q->d[i] = q->d[child_node]; - q->setpos(q->d[i], i); - i = child_node; - } - - q->d[i] = moving_node; - q->setpos(moving_node, i); -} - -void* pqueue_find_equal_same_priority(pqueue_t *q, void *e) { - return find_equal_same_priority(q, e, 1); -} - -void* pqueue_find_equal(pqueue_t *q, void *e, pqueue_pri_t max) { - return find_equal(q, e, 1, max); -} - -int pqueue_insert(pqueue_t *q, void *d) { - void **tmp; - size_t i; - size_t newsize; - - if (!q) return 1; - - /* allocate more memory if necessary */ - if (q->size >= q->avail) { - newsize = q->size + q->step; - if (!(tmp = (void**)realloc(q->d, sizeof(void *) * newsize))) - return 1; - q->d = tmp; - q->avail = newsize; - } - /* insert item and organize the tree */ - i = q->size++; - q->d[i] = d; - bubble_up(q, i); - - return 0; -} - -int pqueue_remove(pqueue_t *q, void *d) { - if (q->size == 1) return 0; // Nothing to remove - size_t posn = q->getpos(d); - q->d[posn] = q->d[--q->size]; - if (q->cmppri(q->getpri(d), q->getpri(q->d[posn]))) - bubble_up(q, posn); - else - percolate_down(q, posn); - - return 0; -} - -void* pqueue_pop(pqueue_t *q) { - if (!q || q->size == 1) - return NULL; - - void* head; - - head = q->d[1]; - q->d[1] = q->d[--q->size]; - percolate_down(q, 1); - - return head; -} - -/** - * @brief Empty 'src' into 'dest'. - * - * As an optimization, this function might swap 'src' and 'dest'. - * - * @param dest The queue to fill up - * @param src The queue to empty - */ -void pqueue_empty_into(pqueue_t** dest, pqueue_t** src) { - assert(src); - assert(dest); - assert(*src); - assert(*dest); - void* item; - if ((*dest)->size >= (*src)->size) { - while ((item = pqueue_pop(*src))) { - pqueue_insert(*dest, item); - } - } else { - while ((item = pqueue_pop(*dest))) { - pqueue_insert(*src, item); - } - - pqueue_t* tmp = *dest; - *dest = *src; - *src = tmp; - } -} - -void* pqueue_peek(pqueue_t *q) { - void *d; - if (!q || q->size == 1) - return NULL; - d = q->d[1]; - return d; -} - -void pqueue_dump(pqueue_t *q, pqueue_print_entry_f print) { - size_t i; - - LF_PRINT_DEBUG("posn\tleft\tright\tparent\tmaxchild\t..."); - for (i = 1; i < q->size ;i++) { - LF_PRINT_DEBUG("%zu\t%zu\t%zu\t%zu\t%ul\t", - i, - LF_LEFT(i), LF_RIGHT(i), LF_PARENT(i), - (unsigned int)maxchild(q, i)); - print(q->d[i]); - } -} - -void pqueue_print(pqueue_t *q, pqueue_print_entry_f print) { - pqueue_t *dup; - void *e; - - dup = pqueue_init(q->size, - q->cmppri, q->getpri, - q->getpos, q->setpos, q->eqelem, q->prt); - dup->size = q->size; - dup->avail = q->avail; - dup->step = q->step; - - memcpy(dup->d, q->d, (q->size * sizeof(void *))); - - while ((e = pqueue_pop(dup))) - print(e); - - pqueue_free(dup); -} - -static int subtree_is_valid(pqueue_t *q, int pos) { - if (pos < 0) { - lf_print_error_and_exit("subtree_is_valid() called with a negative pos index."); - } - - int left_pos = LF_LEFT(pos); - if (left_pos < 0) { - lf_print_error_and_exit("subtree_is_valid(): index overflow detected."); - } - - if ((size_t)left_pos < q->size) { - /* has a left child */ - if (q->cmppri(q->getpri(q->d[pos]), q->getpri(q->d[LF_LEFT(pos)]))) - return 0; - if (!subtree_is_valid(q, LF_LEFT(pos))) - return 0; - } - - int right_pos = LF_RIGHT(pos); - if (right_pos < 0) { - lf_print_error_and_exit("subtree_is_valid(): index overflow detected."); - } - if ((size_t)right_pos < q->size) { - /* has a right child */ - if (q->cmppri(q->getpri(q->d[pos]), q->getpri(q->d[LF_RIGHT(pos)]))) - return 0; - if (!subtree_is_valid(q, LF_RIGHT(pos))) - return 0; - } - return 1; -} - -int pqueue_is_valid(pqueue_t *q) { - return subtree_is_valid(q, 1); -} - -// ********** Priority Queue Support Start - -/** - * Return whether the first and second argument are given in reverse order. - */ int in_reverse_order(pqueue_pri_t thiz, pqueue_pri_t that) { return (thiz > that); } -/** - * Return false (0) regardless of reaction order. - */ int in_no_particular_order(pqueue_pri_t thiz, pqueue_pri_t that) { - return false; + return 0; } -/** - * Return whether or not the given events have matching triggers. - */ -int event_matches(void* next, void* curr) { - return (((event_t*)next)->trigger == ((event_t*)curr)->trigger); +int event_matches(void* event1, void* event2) { + return (((event_t*)event1)->trigger == ((event_t*)event2)->trigger); } -/** - * Return whether or not the given reaction_t pointers - * point to the same struct. - */ -int reaction_matches(void* next, void* curr) { - return (next == curr); +int reaction_matches(void* a, void* b) { + return (a == b); } -/** - * Report a priority equal to the time of the given event. - * Used for sorting pointers to event_t structs in the event queue. - */ -pqueue_pri_t get_event_time(void *a) { - return (pqueue_pri_t)(((event_t*) a)->time); +pqueue_pri_t get_event_time(void *event) { + return (pqueue_pri_t)(((event_t*) event)->time); } -/** - * Report a priority equal to the index of the given reaction. - * Used for sorting pointers to reaction_t structs in the - * blocked and executing queues. - */ -pqueue_pri_t get_reaction_index(void *a) { - return ((reaction_t*) a)->index; +pqueue_pri_t get_reaction_index(void *reaction) { + return ((reaction_t*) reaction)->index; } -/** - * Return the given event's position in the queue. - */ -size_t get_event_position(void *a) { - return ((event_t*) a)->pos; +size_t get_event_position(void *event) { + return ((event_t*) event)->pos; } -/** - * Return the given reaction's position in the queue. - */ -size_t get_reaction_position(void *a) { - return ((reaction_t*) a)->pos; +size_t get_reaction_position(void *reaction) { + return ((reaction_t*) reaction)->pos; } -/** - * Set the given event's position in the queue. - */ -void set_event_position(void *a, size_t pos) { - ((event_t*) a)->pos = pos; +void set_event_position(void *event, size_t pos) { + ((event_t*) event)->pos = pos; } -/** - * Return the given reaction's position in the queue. - */ -void set_reaction_position(void *a, size_t pos) { - ((reaction_t*) a)->pos = pos; +void set_reaction_position(void *reaction, size_t pos) { + ((reaction_t*) reaction)->pos = pos; } -/** - * Print some information about the given reaction. - * - * DEBUG function only. - */ void print_reaction(void *reaction) { reaction_t *r = (reaction_t*)reaction; - LF_PRINT_DEBUG("%s: chain_id:%llu, index: %llx, reaction: %p", + LF_PRINT_DEBUG("%s: chain_id: %llu, index: %llx, reaction: %p", r->name, r->chain_id, r->index, r); } -/** - * Print some information about the given event. - * - * DEBUG function only. - */ void print_event(void *event) { event_t *e = (event_t*)event; LF_PRINT_DEBUG("time: " PRINTF_TIME ", trigger: %p, token: %p", diff --git a/core/utils/pqueue_base.c b/core/utils/pqueue_base.c new file mode 100644 index 0000000000..9bba5289e7 --- /dev/null +++ b/core/utils/pqueue_base.c @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2014, Volkan Yazıcı + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Modified by Marten Lohstroh (May, 2019). + * Changes: + * - Require implementation of a pqueue_eq_elem_f function to determine + * whether two elements are equal or not; and + * - The provided pqueue_eq_elem_f implementation is used to test and + * search for equal elements present in the queue; and + * - Removed capability to reassign priorities. + */ + +#include +#include +#include +#include +#include + +#include "pqueue_base.h" +#include "util.h" + +#define LF_LEFT(i) ((i) << 1) +#define LF_RIGHT(i) (((i) << 1) + 1) +#define LF_PARENT(i) ((i) >> 1) + +void* find_equal(pqueue_t *q, void *e, int pos, pqueue_pri_t max) { + if (pos < 0) { + lf_print_error_and_exit("find_equal() called with a negative pos index."); + } + + // Stop the recursion when we've reached the end of the + // queue. This has to be done before accessing the queue + // to avoid segmentation fault. + if (!q || (size_t)pos >= q->size) { + return NULL; + } + + void* rval; + void* curr = q->d[pos]; + + // Stop the recursion when we've surpassed the maximum priority. + if (!curr || q->cmppri(q->getpri(curr), max)) { + return NULL; + } + + if (q->eqelem(curr, e)) { + return curr; + } else { + rval = find_equal(q, e, LF_LEFT(pos), max); + if (rval) + return rval; + else + return find_equal(q, e, LF_RIGHT(pos), max); + } + return NULL; +} + +void* find_equal_same_priority(pqueue_t *q, void *e, int pos) { + if (pos < 0) { + lf_print_error_and_exit("find_equal_same_priority() called with a negative pos index."); + } + + // Stop the recursion when we've reached the end of the + // queue. This has to be done before accessing the queue + // to avoid segmentation fault. + if (!q || (size_t)pos >= q->size) { + return NULL; + } + + void* rval; + void* curr = q->d[pos]; + + // Stop the recursion once we've surpassed the priority of the element + // we're looking for. + if (!curr || q->cmppri(q->getpri(curr), q->getpri(e))) { + return NULL; + } + + if (q->getpri(curr) == q->getpri(e) && q->eqelem(curr, e)) { + return curr; + } else { + rval = find_equal_same_priority(q, e, LF_LEFT(pos)); + if (rval) + return rval; + else + return find_equal_same_priority(q, e, LF_RIGHT(pos)); + } + + // for (int i=1; i < q->size; i++) { + // if (q->d[i] == e) { + // return q->d[i]; + // } + // } + return NULL; +} + +pqueue_t * pqueue_init(size_t n, + pqueue_cmp_pri_f cmppri, + pqueue_get_pri_f getpri, + pqueue_get_pos_f getpos, + pqueue_set_pos_f setpos, + pqueue_eq_elem_f eqelem, + pqueue_print_entry_f prt) { + pqueue_t *q; + + if (!(q = (pqueue_t*)malloc(sizeof(pqueue_t)))) + return NULL; + + /* Need to allocate n+1 elements since element 0 isn't used. */ + if (!(q->d = (void**)malloc((n + 1) * sizeof(void *)))) { + free(q); + return NULL; + } + + q->size = 1; + q->avail = q->step = (n+1); /* see comment above about n+1 */ + q->cmppri = cmppri; + q->getpri = getpri; + q->getpos = getpos; + q->setpos = setpos; + q->eqelem = eqelem; + q->prt = prt; + return q; +} + +void pqueue_free(pqueue_t *q) { + free(q->d); + free(q); +} + +size_t pqueue_size(pqueue_t *q) { + if (!q) return 0; + // Queue element 0 exists but doesn't count since it isn't used. + return (q->size - 1); +} + +static size_t maxchild(pqueue_t *q, size_t i) { + size_t child_node = LF_LEFT(i); + + if (child_node >= q->size) + return 0; + + if ((child_node+1) < q->size && + (q->cmppri(q->getpri(q->d[child_node]), q->getpri(q->d[child_node+1])))) + child_node++; /* use right child instead of left */ + + return child_node; +} + +static size_t bubble_up(pqueue_t *q, size_t i) { + size_t parent_node; + void *moving_node = q->d[i]; + pqueue_pri_t moving_pri = q->getpri(moving_node); + + for (parent_node = LF_PARENT(i); + ((i > 1) && q->cmppri(q->getpri(q->d[parent_node]), moving_pri)); + i = parent_node, parent_node = LF_PARENT(i)) + { + q->d[i] = q->d[parent_node]; + q->setpos(q->d[i], i); + } + + q->d[i] = moving_node; + q->setpos(moving_node, i); + return i; +} + +static void percolate_down(pqueue_t *q, size_t i) { + size_t child_node; + void *moving_node = q->d[i]; + pqueue_pri_t moving_pri = q->getpri(moving_node); + + while ((child_node = maxchild(q, i)) && + q->cmppri(moving_pri, q->getpri(q->d[child_node]))) + { + q->d[i] = q->d[child_node]; + q->setpos(q->d[i], i); + i = child_node; + } + + q->d[i] = moving_node; + q->setpos(moving_node, i); +} + +void* pqueue_find_equal_same_priority(pqueue_t *q, void *e) { + return find_equal_same_priority(q, e, 1); +} + +void* pqueue_find_equal(pqueue_t *q, void *e, pqueue_pri_t max) { + return find_equal(q, e, 1, max); +} + +int pqueue_insert(pqueue_t *q, void *d) { + void **tmp; + size_t i; + size_t newsize; + + if (!q) return 1; + + /* allocate more memory if necessary */ + if (q->size >= q->avail) { + newsize = q->size + q->step; + if (!(tmp = (void**)realloc(q->d, sizeof(void *) * newsize))) + return 1; + q->d = tmp; + q->avail = newsize; + } + /* insert item and organize the tree */ + i = q->size++; + q->d[i] = d; + bubble_up(q, i); + + return 0; +} + +int pqueue_remove(pqueue_t *q, void *d) { + if (q->size == 1) return 0; // Nothing to remove + size_t posn = q->getpos(d); + q->d[posn] = q->d[--q->size]; + if (q->cmppri(q->getpri(d), q->getpri(q->d[posn]))) + bubble_up(q, posn); + else + percolate_down(q, posn); + + return 0; +} + +void* pqueue_pop(pqueue_t *q) { + if (!q || q->size == 1) + return NULL; + + void* head; + + head = q->d[1]; + q->d[1] = q->d[--q->size]; + percolate_down(q, 1); + + return head; +} + +void pqueue_empty_into(pqueue_t** dest, pqueue_t** src) { + assert(src); + assert(dest); + assert(*src); + assert(*dest); + void* item; + if ((*dest)->size >= (*src)->size) { + while ((item = pqueue_pop(*src))) { + pqueue_insert(*dest, item); + } + } else { + while ((item = pqueue_pop(*dest))) { + pqueue_insert(*src, item); + } + + pqueue_t* tmp = *dest; + *dest = *src; + *src = tmp; + } +} + +void* pqueue_peek(pqueue_t *q) { + void *d; + if (!q || q->size == 1) + return NULL; + d = q->d[1]; + return d; +} + +void pqueue_dump(pqueue_t *q, pqueue_print_entry_f print) { + size_t i; + + LF_PRINT_DEBUG("posn\tleft\tright\tparent\tmaxchild\t..."); + for (i = 1; i < q->size ;i++) { + LF_PRINT_DEBUG("%zu\t%zu\t%zu\t%zu\t%ul\t", + i, + LF_LEFT(i), LF_RIGHT(i), LF_PARENT(i), + (unsigned int)maxchild(q, i)); + print(q->d[i]); + } +} + +void pqueue_print(pqueue_t *q, pqueue_print_entry_f print) { + pqueue_t *dup; + void *e; + + dup = pqueue_init(q->size, + q->cmppri, q->getpri, + q->getpos, q->setpos, q->eqelem, q->prt); + dup->size = q->size; + dup->avail = q->avail; + dup->step = q->step; + + memcpy(dup->d, q->d, (q->size * sizeof(void *))); + + while ((e = pqueue_pop(dup))) { + if (print == NULL) { + q->prt(e); + } else { + print(e); + } + } + pqueue_free(dup); +} + +static int subtree_is_valid(pqueue_t *q, int pos) { + if (pos < 0) { + lf_print_error_and_exit("subtree_is_valid() called with a negative pos index."); + } + + int left_pos = LF_LEFT(pos); + if (left_pos < 0) { + lf_print_error_and_exit("subtree_is_valid(): index overflow detected."); + } + + if ((size_t)left_pos < q->size) { + /* has a left child */ + if (q->cmppri(q->getpri(q->d[pos]), q->getpri(q->d[LF_LEFT(pos)]))) + return 0; + if (!subtree_is_valid(q, LF_LEFT(pos))) + return 0; + } + + int right_pos = LF_RIGHT(pos); + if (right_pos < 0) { + lf_print_error_and_exit("subtree_is_valid(): index overflow detected."); + } + if ((size_t)right_pos < q->size) { + /* has a right child */ + if (q->cmppri(q->getpri(q->d[pos]), q->getpri(q->d[LF_RIGHT(pos)]))) + return 0; + if (!subtree_is_valid(q, LF_RIGHT(pos))) + return 0; + } + return 1; +} + +int pqueue_is_valid(pqueue_t *q) { + return subtree_is_valid(q, 1); +} diff --git a/core/utils/pqueue_tag.c b/core/utils/pqueue_tag.c new file mode 100644 index 0000000000..2d05af7bcc --- /dev/null +++ b/core/utils/pqueue_tag.c @@ -0,0 +1,169 @@ +/** + * @file pqueue_tag.c + * @author Byeonggil Jun + * @author Edward A. Lee + * @copyright (c) 2023, The University of California at Berkeley + * License in [BSD 2-clause](https://github.com/lf-lang/reactor-c/blob/main/LICENSE.md) + * + * @brief Priority queue that uses tags for sorting. + */ + +#include + +#include "pqueue_tag.h" +#include "util.h" // For lf_print +#include "platform.h" // For PRINTF_TAG + +////////////////// +// Local functions, not intended for use outside this file. + +/** + * @brief Callback function to get the priority of an element. + * Return the pointer argument cast to pqueue_pri_t because the + * element is also the priority. This function is of type pqueue_get_pri_f. + * @param element A pointer to a pqueue_tag_element_t, cast to void*. + */ +static pqueue_pri_t pqueue_tag_get_priority(void *element) { + return (pqueue_pri_t) element; +} + +/** + * @brief Callback comparison function for the tag-based priority queue. + * Return 0 if the first argument is less than second and 1 otherwise. + * This function is of type pqueue_cmp_pri_f. + * @param priority1 A pointer to a pqueue_tag_element_t, cast to pqueue_pri_t. + * @param priority2 A pointer to a pqueue_tag_element_t, cast to pqueue_pri_t. +*/ +static int pqueue_tag_compare(pqueue_pri_t priority1, pqueue_pri_t priority2) { + return (lf_tag_compare(((pqueue_tag_element_t*) priority1)->tag, ((pqueue_tag_element_t*) priority2)->tag) > 0); +} + +/** + * @brief Callback function to determine whether two elements are equivalent. + * Return 1 if the tags contained by given elements are identical, 0 otherwise. + * This function is of type pqueue_eq_elem_f. + * @param element1 A pointer to a pqueue_tag_element_t, cast to void*. + * @param element2 A pointer to a pqueue_tag_element_t, cast to void*. + */ +static int pqueue_tag_matches(void* element1, void* element2) { + return lf_tag_compare(((pqueue_tag_element_t*) element1)->tag, ((pqueue_tag_element_t*) element2)->tag) == 0; +} + +/** + * @brief Callback function to return the position of an element. + * This function is of type pqueue_get_pos_f. + * @param element A pointer to a pqueue_tag_element_t, cast to void*. + */ +static size_t pqueue_tag_get_position(void *element) { + return ((pqueue_tag_element_t*)element)->pos; +} + +/** + * @brief Callback function to set the position of an element. + * This function is of type pqueue_set_pos_f. + * @param element A pointer to a pqueue_tag_element_t, cast to void*. + * @param pos The position. + */ +static void pqueue_tag_set_position(void *element, size_t pos) { + ((pqueue_tag_element_t*)element)->pos = pos; +} + +/** + * @brief Callback function to print information about an element. + * This is a function of type pqueue_print_entry_f. + * @param element A pointer to a pqueue_tag_element_t, cast to void*. + */ +static void pqueue_tag_print_element(void *element) { + tag_t tag = ((pqueue_tag_element_t*) element)->tag; + lf_print("Element with tag " PRINTF_TAG ".", tag.time, tag.microstep); +} + +////////////////// +// Functions defined in pqueue_tag.h. + +pqueue_tag_t* pqueue_tag_init(size_t initial_size) { + return (pqueue_tag_t*) pqueue_init( + initial_size, + pqueue_tag_compare, + pqueue_tag_get_priority, + pqueue_tag_get_position, + pqueue_tag_set_position, + pqueue_tag_matches, + pqueue_tag_print_element); +} + +void pqueue_tag_free(pqueue_tag_t *q) { + for (int i = 1; i < q->size ;i++) { + if (q->d[i] != NULL && ((pqueue_tag_element_t*)q->d[i])->is_dynamic) { + free(q->d[i]); + } + } + pqueue_free((pqueue_t*)q); +} + +size_t pqueue_tag_size(pqueue_tag_t *q) { + return pqueue_size((pqueue_t*)q); +} + +int pqueue_tag_insert(pqueue_tag_t* q, pqueue_tag_element_t* d) { + return pqueue_insert((pqueue_t*)q, (void*)d); +} + +int pqueue_tag_insert_tag(pqueue_tag_t* q, tag_t t) { + pqueue_tag_element_t* d = (pqueue_tag_element_t*) malloc(sizeof(pqueue_tag_element_t)); + d->is_dynamic = 1; + d->tag = t; + return pqueue_tag_insert(q, d); +} + +pqueue_tag_element_t* pqueue_tag_find_with_tag(pqueue_tag_t *q, tag_t t) { + // Create elements on the stack. These elements are only needed during + // the duration of this function call, so putting them on the stack is OK. + pqueue_tag_element_t element = {.tag = t, .pos = 0, .is_dynamic = false}; + pqueue_tag_element_t forever = {.tag = FOREVER_TAG, .pos = 0, .is_dynamic = false}; + return pqueue_find_equal((pqueue_t*)q, (void*)&element, (pqueue_pri_t)&forever); +} + +int pqueue_tag_insert_if_no_match(pqueue_tag_t* q, tag_t t) { + if (pqueue_tag_find_with_tag(q, t) == NULL) { + return pqueue_tag_insert_tag(q, t); + } else { + return 1; + } +} + +pqueue_tag_element_t* pqueue_tag_peek(pqueue_tag_t* q) { + return (pqueue_tag_element_t*) pqueue_peek((pqueue_t*)q); +} + +tag_t pqueue_tag_peek_tag(pqueue_tag_t* q) { + pqueue_tag_element_t* element = (pqueue_tag_element_t*)pqueue_tag_peek(q); + if (element == NULL) return FOREVER_TAG; + else return element->tag; +} + +pqueue_tag_element_t* pqueue_tag_pop(pqueue_tag_t* q) { + return (pqueue_tag_element_t*)pqueue_pop((pqueue_t*)q); +} + +tag_t pqueue_tag_pop_tag(pqueue_tag_t* q) { + pqueue_tag_element_t* element = (pqueue_tag_element_t*)pqueue_tag_pop(q); + if (element == NULL) return FOREVER_TAG; + else { + tag_t result = element->tag; + if (element->is_dynamic) free(element); + return result; + } +} + +void pqueue_tag_remove(pqueue_tag_t* q, pqueue_tag_element_t* e) { + pqueue_remove((pqueue_t*) q, (void*) e); +} + +void pqueue_tag_remove_up_to(pqueue_tag_t* q, tag_t t){ + tag_t head = pqueue_tag_peek_tag(q); + while (lf_tag_compare(head, FOREVER_TAG) < 0 && lf_tag_compare(head, t) <= 0) { + pqueue_tag_pop(q); + head = pqueue_tag_peek_tag(q); + } +} \ No newline at end of file diff --git a/core/utils/util.c b/core/utils/util.c index d6bde95a7d..f03403eaf5 100644 --- a/core/utils/util.c +++ b/core/utils/util.c @@ -32,6 +32,11 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "util.h" + +#ifndef STANDALONE_RTI +#include "environment.h" +#endif + #include #include #include @@ -49,10 +54,8 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SOCKET_READ_RETRY_INTERVAL 1000000 /** - * The ID of this federate. For a non-federated execution, this will - * be -1. For a federated execution, it will be assigned when the generated function - * _lf_initialize_trigger_objects() is called. - * @see xtext/org.icyphy.linguafranca/src/org/icyphy/generator/CGenerator.xtend. + * The ID of this federate. For a non-federated execution, this will be -1. + * For a federated execution, it will be assigned in the generated code. */ int _lf_my_fed_id = -1; @@ -65,9 +68,6 @@ print_message_function_t* print_message_function = NULL; /** The level of messages to redirect to print_message_function. */ int print_message_level = -1; -/** - * Return the federate ID or -1 if this program is not part of a federation. - */ int lf_fed_id() { return _lf_my_fed_id; } @@ -79,6 +79,13 @@ void _lf_message_print( int is_error, const char* prefix, const char* format, va_list args, int log_level ) ATTRIBUTE_FORMAT_PRINTF(3, 0); +/** + * Print a fatal error message. Internal function. + */ +static void lf_vprint_fatal_error(const char* format, va_list args) { + _lf_message_print(1, "FATAL ERROR: ", format, args, LOG_LEVEL_ERROR); +} + /** * Internal implementation of the next few reporting functions. */ @@ -107,21 +114,35 @@ void _lf_message_print( // If we make multiple calls to printf(), then the results could be // interleaved between threads. // vprintf() is a version that takes an arg list rather than multiple args. - size_t length = strlen(prefix) + strlen(format) + 32; - char* message = (char*) malloc(length + 1); + char* message; if (_lf_my_fed_id < 0) { + size_t length = strlen(prefix) + strlen(format) + 32; + message = (char*) malloc(length + 1); snprintf(message, length, "%s%s\n", prefix, format); } else { - snprintf(message, length, "Federate %d: %s%s\n", - _lf_my_fed_id, prefix, format); +#if defined STANDALONE_RTI + size_t length = strlen(prefix) + strlen(format) + 37; + message = (char*) malloc(length + 1); + snprintf(message, length, "RTI: %s%s\n", + prefix, format); +#else + // Get the federate name from the top-level environment, which by convention is the first. + environment_t *envs; + _lf_get_environments(&envs); + char* name = envs->name; + size_t length = strlen(prefix) + strlen(format) + +strlen(name) + 32; + message = (char*) malloc(length + 1); + // If the name has prefix "federate__", strip that out. + if (strncmp(name, "federate__", 10) == 0) name += 10; + + snprintf(message, length, "Fed %d (%s): %s%s\n", + _lf_my_fed_id, name, prefix, format); +#endif // STANDALONE_RTI } if (print_message_function == NULL) { - if (is_error) { - vfprintf(stderr, message, args); - } else { - vfprintf(stdout, message, args); - } + // NOTE: Send all messages to stdout, not to stderr, so that ordering makes sense. + vfprintf(stdout, message, args); } else { (*print_message_function)(message, args); } @@ -129,14 +150,6 @@ void _lf_message_print( } } -/** - * Report an informational message on stdout with - * a newline appended at the end. - * If this execution is federated, then - * the message will be prefaced by "Federate n: ", - * where n is the federate ID. - * The arguments are just like printf(). - */ void lf_print(const char* format, ...) { va_list args; va_start (args, format); @@ -144,21 +157,10 @@ void lf_print(const char* format, ...) { va_end (args); } -/** - * varargs alternative of "lf_print" - */ void lf_vprint(const char* format, va_list args) { _lf_message_print(0, "", format, args, LOG_LEVEL_INFO); } -/** - * Report an log message on stdout with the prefix - * "LOG: " and a newline appended - * at the end. If this execution is federated, then - * the message will be prefaced by "Federate n: ", - * where n is the federate ID. - * The arguments are just like printf(). - */ void lf_print_log(const char* format, ...) { va_list args; va_start (args, format); @@ -166,22 +168,10 @@ void lf_print_log(const char* format, ...) { va_end (args); } -/** - * varargs alternative of "lf_print_log" - */ void lf_vprint_log(const char* format, va_list args) { _lf_message_print(0, "LOG: ", format, args, LOG_LEVEL_LOG); } - -/** - * Report an debug message on stdout with the prefix - * "DEBUG: " and a newline appended - * at the end. If this execution is federated, then - * the message will be prefaced by "Federate n: ", - * where n is the federate ID. - * The arguments are just like printf(). - */ void lf_print_debug(const char* format, ...) { va_list args; va_start (args, format); @@ -189,17 +179,10 @@ void lf_print_debug(const char* format, ...) { va_end (args); } -/** - * varargs alternative of "lf_print_debug" - */ void lf_vprint_debug(const char* format, va_list args) { _lf_message_print(0, "DEBUG: ", format, args, LOG_LEVEL_DEBUG); } -/** - * Report an error with the prefix "ERROR: " and a newline appended - * at the end. The arguments are just like printf(). - */ void lf_print_error(const char* format, ...) { va_list args; va_start (args, format); @@ -207,17 +190,10 @@ void lf_print_error(const char* format, ...) { va_end (args); } -/** - * varargs alternative of "lf_print_error" - */ void lf_vprint_error(const char* format, va_list args) { _lf_message_print(1, "ERROR: ", format, args, LOG_LEVEL_ERROR); } -/** - * Report a warning with the prefix "WARNING: " and a newline appended - * at the end. The arguments are just like printf(). - */ void lf_print_warning(const char* format, ...) { va_list args; va_start (args, format); @@ -225,60 +201,28 @@ void lf_print_warning(const char* format, ...) { va_end (args); } -/** - * varargs alternative of "lf_print_warning" - */ void lf_vprint_warning(const char* format, va_list args) { _lf_message_print(1, "WARNING: ", format, args, LOG_LEVEL_WARNING); } -/** - * Report an error with the prefix "ERROR: " and a newline appended - * at the end, then exit with the failure code EXIT_FAILURE. - * The arguments are just like printf(). - */ void lf_print_error_and_exit(const char* format, ...) { va_list args; va_start (args, format); - lf_vprint_error_and_exit(format, args); + lf_vprint_fatal_error(format, args); va_end (args); + fflush(stdout); exit(EXIT_FAILURE); } -/** - * Report an error with the prefix "ERROR: " and a newline appended - * at the end, then exit with the failure code EXIT_FAILURE. - * The arguments are just like printf(). - */ -void lf_assert(bool condition, const char* format, ...) { - if (!condition) { - va_list args; - va_start (args, format); - lf_vprint_error_and_exit(format, args); - va_end (args); - exit(EXIT_FAILURE); - } -} - -/** - * varargs alternative of "lf_print_error_and_exit" - */ -void lf_vprint_error_and_exit(const char* format, va_list args) { - _lf_message_print(1, "FATAL ERROR: ", format, args, LOG_LEVEL_ERROR); +void lf_print_error_system_failure(const char* format, ...) { + va_list args; + va_start (args, format); + lf_vprint_error(format, args); + va_end (args); + lf_print_error_and_exit("Error %d: %s", errno, strerror(errno)); + exit(EXIT_FAILURE); } -/** - * Register a function to display messages. After calling this, - * all messages passed to the above print functions will be - * printed using the specified function rather than printf - * if their log level is greater than the specified level. - * The level should be one of LOG_LEVEL_ERROR, LOG_LEVEL_WARNING, - * LOG_LEVEL_INFO, LOG_LEVEL_LOG, or LOG_LEVEL_DEBUG. - * - * @param function The print message function or NULL to revert - * to using printf. - * @param log_level The level of messages to redirect. - */ void lf_register_print_function(print_message_function_t* function, int log_level) { print_message_function = function; print_message_level = log_level; diff --git a/docs/README.md b/docs/README.md index 3ece865a8f..2174ca6644 100644 --- a/docs/README.md +++ b/docs/README.md @@ -17,11 +17,9 @@ To build the doc files locally in your clone of the reactor-c repo, we use sphin - Install `python3`, `pip3` and `doxygen` - Install the required Python modules: - - `pip3 install sphinx` - - `pip3 install sphinx_sitemap` - - `pip3 install sphinx-rtd-theme` - - `pip3 install breathe` - - `pip3 install exhale` +``` + pip3 install sphinx sphinx_sitemap sphinx-rtd-theme breathe exhale +``` ### Build Documentation Files diff --git a/include/core/environment.h b/include/core/environment.h index db97d4f052..8670b82135 100644 --- a/include/core/environment.h +++ b/include/core/environment.h @@ -38,11 +38,12 @@ #include "lf_types.h" #include "platform.h" +#include "trace.h" // Forward declarations so that a pointers can appear in the environment struct. typedef struct lf_scheduler_t lf_scheduler_t; -typedef struct trace_t trace_t; typedef struct mode_environment_t mode_environment_t; +typedef struct enclave_info_t enclave_info_t; /** * @brief The global environment. @@ -66,6 +67,8 @@ typedef struct mode_environment_t mode_environment_t; */ typedef struct environment_t { bool initialized; + bool execution_started; // Events at the start tag have been pulled from the event queue. + char *name; int id; tag_t current_tag; tag_t stop_tag; @@ -88,6 +91,7 @@ typedef struct environment_t { int reset_reactions_size; mode_environment_t* modes; trace_t* trace; + int worker_thread_count; #if defined(LF_SINGLE_THREADED) pqueue_t *reaction_q; #else @@ -103,6 +107,9 @@ typedef struct environment_t { tag_t** _lf_intended_tag_fields; int _lf_intended_tag_fields_size; #endif // FEDERATED +#ifdef LF_ENCLAVES // TODO: Consider dropping #ifdef + enclave_info_t *enclave_info; +#endif } environment_t; #if defined(MODAL_REACTORS) @@ -120,6 +127,7 @@ struct mode_environment_t { */ int environment_init( environment_t* env, + const char * name, int id, int num_workers, int num_timers, @@ -145,4 +153,12 @@ void environment_init_tags( environment_t *env, instant_t start_time, interval_t duration ); +/** + * @brief Will update the argument to point to the beginning of the array of environments in this program + * @note Is code-generated by the compiler + * @param envs A double pointer which will be dereferenced and modified + * @return int The number of environments in the array + */ +int _lf_get_environments(environment_t **envs); + #endif diff --git a/include/core/federated/clock-sync.h b/include/core/federated/clock-sync.h index eb3e4c3418..0106afc54e 100644 --- a/include/core/federated/clock-sync.h +++ b/include/core/federated/clock-sync.h @@ -149,9 +149,9 @@ uint16_t setup_clock_synchronization_with_rti(void); * Failing to complete this protocol is treated as a catastrophic * error that causes the federate to exit. * - * @param rti_socket_TCP The rti's socket + * @param rti_socket_TCP Pointer to the RTI's socket */ -void synchronize_initial_physical_clock_with_rti(int rti_socket_TCP); +void synchronize_initial_physical_clock_with_rti(int* rti_socket_TCP); /** * Handle a clock synchroninzation message T1 coming from the RTI. diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index aca041e5b0..e035d94c01 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -1,32 +1,12 @@ /** * @file - * @author Edward A. Lee (eal@berkeley.edu) - * - * @section LICENSE -Copyright (c) 2020, The University of California at Berkeley. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - * @section DESCRIPTION - * Data structures and functions used and defined in federate.c. + * @author Soroush Bateni + * @author Peter Donovan + * @author Edward A. Lee + * @author Anirudh Rengarajsm + * @copyright (c) 2020-2023, The University of California at Berkeley. + * License: BSD 2-clause + * @brief Data structures and functions used and defined in federate.c. */ #ifndef FEDERATE_H @@ -43,13 +23,16 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ADVANCE_MESSAGE_INTERVAL MSEC(10) #endif +////////////////////////////////////////////////////////////////////////////////// +// Data types + /** * Structure that a federate instance uses to keep track of its own state. */ typedef struct federate_instance_t { /** * The TCP socket descriptor for this federate to communicate with the RTI. - * This is set by connect_to_rti(), which must be called before other + * This is set by lf_connect_to_rti(), which must be called before other * functions that communicate with the rti are called. */ int socket_TCP_RTI; @@ -59,14 +42,6 @@ typedef struct federate_instance_t { */ lf_thread_t RTI_socket_listener; - /** - * Thread responsible for setting ports to absent by an STAA offset if they - * aren't already known. - */ - #ifdef FEDERATED_DECENTRALIZED - lf_thread_t staaSetter; - #endif - /** * Number of inbound physical connections to the federate. * This can be either physical connections, or logical connections @@ -92,7 +67,7 @@ typedef struct federate_instance_t { * An array that holds the socket descriptors for inbound * connections from each federate. The index will be the federate * ID of the remote sending federate. This is initialized at startup - * to -1 and is set to a socket ID by handle_p2p_connections_from_federates() + * to -1 and is set to a socket ID by lf_handle_p2p_connections_from_federates() * when the socket is opened. * * @note There will not be an inbound socket unless a physical connection @@ -107,7 +82,7 @@ typedef struct federate_instance_t { * An array that holds the socket descriptors for outbound direct * connections to each remote federate. The index will be the federate * ID of the remote receiving federate. This is initialized at startup - * to -1 and is set to a socket ID by connect_to_federate() + * to -1 and is set to a socket ID by lf_connect_to_federate() * when the socket is opened. * * @note This federate will not open an outbound socket unless a physical @@ -126,7 +101,7 @@ typedef struct federate_instance_t { /** * A socket descriptor for the socket server of the federate. - * This is assigned in create_server(). + * This is assigned in lf_create_server(). * This socket is used to listen to incoming physical connections from * remote federates. Once an incoming connection is accepted, the * opened socket will be stored in @@ -135,26 +110,21 @@ typedef struct federate_instance_t { int server_socket; /** - * The port used for the server socket - * to listen for messages from other federates. - * The federate informs the RTI of this port once - * it has created its socket server by sending - * an ADDRESS_AD message (@see rti.h). + * The port used for the server socket to listen for messages from other federates. + * The federate informs the RTI of this port once it has created its socket server by + * sending an ADDRESS_AD message (@see rti.h). */ int server_port; /** - * Most recent TIME_ADVANCE_GRANT received from the RTI, or NEVER if none - * has been received. - * This is used to communicate between the listen_to_rti_TCP thread and the - * main federate thread. - * This variable should only be accessed while holding the mutex lock. + * Most recent tag advance grant (TAG) received from the RTI, or NEVER if none + * has been received. This variable should only be accessed while holding the + * mutex lock on the top-level environment. */ tag_t last_TAG; /** - * Indicates whether the last TAG received is provisional or an ordinary - * TAG. + * Indicates whether the last TAG received is provisional or an ordinary TAG. * If the last TAG has been provisional, network port absent reactions must be inserted. * This variable should only be accessed while holding the mutex lock. */ @@ -180,13 +150,12 @@ typedef struct federate_instance_t { bool received_stop_request_from_rti; /** - * A record of the most recently sent LTC (logical tag complete) message. + * A record of the most recently sent LTC (latest tag complete) message. * In some situations, federates can send logical_tag_complete for * the same tag twice or more in-a-row to the RTI. For example, when * _lf_next() returns without advancing tag. To prevent overwhelming * the RTI with extra messages, record the last sent logical tag - * complete message and check against it in - * _lf_logical_tag_complete(). + * complete message and check against it in lf_latest_tag_complete(). * * @note Here, the underlying assumption is that the TCP stack will * deliver the Logical TAG Complete message to the RTI eventually @@ -207,15 +176,25 @@ typedef struct federate_instance_t { */ instant_t min_delay_from_physical_action_to_federate_output; - // Trace object + /** + * Trace object for this federate, used if tracing is enabled. + */ trace_t* trace; + + #ifdef FEDERATED_DECENTRALIZED + /** + * Thread responsible for setting ports to absent by an STAA offset if they + * aren't already known. + */ + lf_thread_t staaSetter; + #endif } federate_instance_t; #ifdef FEDERATED_DECENTRALIZED -typedef struct staa { +typedef struct staa_t { lf_action_base_t** actions; size_t STAA; - size_t numActions; + size_t num_actions; } staa_t; #endif @@ -226,31 +205,39 @@ typedef struct federation_metadata_t { char* rti_user; } federation_metadata_t; -extern lf_mutex_t outbound_socket_mutex; -extern lf_cond_t port_status_changed; -extern lf_cond_t logical_time_changed; +typedef enum parse_rti_code_t { + SUCCESS, + INVALID_PORT, + INVALID_HOST, + INVALID_USER, + FAILED_TO_PARSE +} parse_rti_code_t; + +////////////////////////////////////////////////////////////////////////////////// +// Global variables /** -* Generated function that sends information about connections between this federate and -* other federates where messages are routed through the RTI. Currently, this -* only includes logical connections when the coordination is centralized. This -* information is needed for the RTI to perform the centralized coordination. -* @see MSG_TYPE_NEIGHBOR_STRUCTURE in net_common.h -*/ -void send_neighbor_structure_to_RTI(int); + * Mutex lock held while performing socket write and close operations. + */ +extern lf_mutex_t lf_outbound_socket_mutex; /** - * @brief Spawns a thread to iterate through STAA structs, setting its associated ports absent - * at an offset if the port is not present with a value by a certain physical time. - * + * Condition variable for blocking on unkonwn federate input ports. */ -#ifdef FEDERATED_DECENTRALIZED -void spawn_staa_thread(void); -#endif +extern lf_cond_t lf_port_status_changed; + +/** + * Condition variable for blocking on tag advance in +*/ +extern lf_cond_t lf_current_tag_changed; + +////////////////////////////////////////////////////////////////////////////////// +// Public functions (in alphabetical order) /** - * Connect to the federate with the specified id. This established - * connection will then be used in functions such as send_timed_message() + * @brief Connect to the federate with the specified id. + * + * The established connection will then be used in functions such as lf_send_tagged_message() * to send messages directly to the specified federate. * This function first sends an MSG_TYPE_ADDRESS_QUERY message to the RTI to obtain * the IP address and port number of the specified federate. It then attempts @@ -260,145 +247,109 @@ void spawn_staa_thread(void); * refer to the socket for communicating directly with the federate. * @param remote_federate_id The ID of the remote federate. */ -void connect_to_federate(uint16_t); - -/** - * Send a logical tag complete (LTC) message to the RTI - * unless an equal or later LTC has previously been sent. - * This function assumes the caller holds the mutex lock. - * - * @param tag_to_send The tag to send. - */ -void _lf_logical_tag_complete(tag_t); +void lf_connect_to_federate(uint16_t); /** - * Connect to the RTI at the specified host and port and return - * the socket descriptor for the connection. If this fails, the - * program exits. If it succeeds, it sets the _fed.socket_TCP_RTI global - * variable to refer to the socket for communicating with the RTI. + * @brief Connect to the RTI at the specified host and port. + * + * This will return the socket descriptor for the connection. + * If port_number is 0, then start at DEFAULT_PORT and increment + * the port number on each attempt. If an attempt fails, wait CONNECT_RETRY_INTERVAL + * and try again. If it fails after CONNECT_MAX_RETRIES, the program exits. + * If it succeeds, it sets the _fed.socket_TCP_RTI global variable to refer to + * the socket for communicating with the RTI. * @param hostname A hostname, such as "localhost". - * @param port_number A port number. + * @param port_number A port number or 0 to start with the default. */ -void connect_to_rti(const char*, int); +void lf_connect_to_rti(const char* hostname, int port_number); /** - * Thread that listens for inputs from other federates. - * This thread listens for messages of type MSG_TYPE_P2P_MESSAGE, - * MSG_TYPE_P2P_TAGGED_MESSAGE, or MSG_TYPE_PORT_ABSENT (@see net_common.h) from the specified - * peer federate and calls the appropriate handling function for - * each message type. If an error occurs or an EOF is received - * from the peer, then this procedure sets the corresponding - * socket in _fed.sockets_for_inbound_p2p_connections - * to -1 and returns, terminating the thread. - * @param fed_id_ptr A pointer to a uint16_t containing federate ID being listened to. - * This procedure frees the memory pointed to before returning. + * @brief Create a server to listen to incoming P2P connections. + * + * Such connections are used for physical connections or any connection if using + * decentralized coordination. This function only handles the creation of the server socket. + * The bound port for the server socket is then sent to the RTI by sending an + * MSG_TYPE_ADDRESS_ADVERTISEMENT message (@see net_common.h). + * This function expects no response from the RTI. + * + * If a port is specified by the user, that will be used. + * Otherwise, a random port will be assigned. If the bind fails, + * it will retry after PORT_BIND_RETRY_INTERVAL until it has tried + * PORT_BIND_RETRY_LIMIT times. Then it will fail. + * + * @param specified_port The port specified by the user or 0 to use a random port. */ -void* listen_to_federates(void*); +void lf_create_server(int specified_port); /** - * Create a server to listen to incoming physical - * connections from remote federates. This function - * only handles the creation of the server socket. - * The reserved port for the server socket is then - * sent to the RTI by sending an MSG_TYPE_ADDRESS_ADVERTISEMENT message - * (@see net_common.h). This function expects no response - * from the RTI. - * - * If a port is specified by the user, that will be used - * as the only possibility for the server. This function - * will fail if that port is not available. If a port is not - * specified, the STARTING_PORT (@see net_common.h) will be used. - * The function will keep incrementing the port in this case - * until the number of tries reaches PORT_RANGE_LIMIT. - * - * @note This function is similar to create_server(...) in rti.c. - * However, it contains specific log messages for the peer to - * peer connections between federates. It also additionally - * sends an address advertisement (MSG_TYPE_ADDRESS_ADVERTISEMENT) message to the - * RTI informing it of the port. - * - * @param specified_port The specified port by the user. + * @brief Enqueue port absent reactions. + * + * These reactions will send a MSG_TYPE_PORT_ABSENT + * message to downstream federates if a given network output port is not present. + * @param env The environment of the federate */ -void create_server(int specified_port); +void lf_enqueue_port_absent_reactions(environment_t* env); /** - * Thread to accept connections from other federates that send this federate - * messages directly (not through the RTI). This thread starts a thread for - * each accepted socket connection and, once it has opened all expected + * @brief Thread to accept connections from other federates. + * + * This thread accepts connections from federates that send messages directly + * to this one (not through the RTI). This thread starts a thread for + * each accepted socket connection to read messages and, once it has opened all expected * sockets, exits. * @param ignored No argument needed for this thread. */ -void* handle_p2p_connections_from_federates(void*); +void* lf_handle_p2p_connections_from_federates(void*); /** - * Send a port absent message to federate with fed_ID, informing the - * remote federate that the current federate will not produce an event - * on this network port at the current logical time. + * @brief Send a latest tag complete (LTC) signal to the RTI. + * + * This avoids the send if an equal or later LTC has previously been sent. + * + * This function assumes the caller holds the mutex lock + * on the top-level environment. * - * @param env The environment in which we are executing - * @param additional_delay The offset applied to the timestamp - * using after. The additional delay will be greater or equal to zero - * if an after is used on the connection. If no after is given in the - * program, -1 is passed. - * @param port_ID The ID of the receiving port. - * @param fed_ID The fed ID of the receiving federate. - */ -void send_port_absent_to_federate(environment_t* env, interval_t, unsigned short, unsigned short); - -/** - * Enqueue port absent reactions that will send a PORT_ABSENT - * message to downstream federates if a given network output port is not present. + * @param tag_to_send The tag to send. */ -void enqueue_port_absent_reactions(environment_t* env); +void lf_latest_tag_complete(tag_t); /** - * @brief Wait until inputs statuses are known up to and including the specified level. - * Specifically, wait until the specified level is less that the max level allowed to - * advance (MLAA). - * @param env The environment (which should always be the top-level environment). - * @param level The level to which we would like to advance. + * @brief Parse the address of the RTI and store them into the global federation_metadata struct. + * @return a parse_rti_code_t indicating the result of the parse. */ -void stall_advance_level_federation(environment_t* env, size_t level); +parse_rti_code_t lf_parse_rti_addr(const char* rti_addr); /** - * @brief Update the max level allowed to advance (MLAA). - * If the specified tag is greater than the current_tag of the top-level environment - * (or equal an is_provisional is false), then set the MLAA to MAX_INT and return. - * This removes any barriers on execution at the current tag due to network inputs. - * Otherwise, set the MLAA to the minimum level over all (non-physical) network input ports - * where the status of the input port is not known at that current_tag. + * @brief Reset the status fields on network input ports to unknown or absent. * - * This function assumes that the caller holds the mutex. - * - * @param tag The latest TAG or PTAG received by this federate. - * @param is_provisional Whether the tag was provisional. - * @return True if the MLAA changed. + * This will reset to absent if the last_known_status_tag field of the port + * is greater than or equal to the current tag of the top-level environment. + * This should be overriden to present if an event gets scheduled. + * Otherwise, set the status to unknown. + * @note This function must be called at the beginning of each + * logical time. */ -bool update_max_level(tag_t tag, bool is_provisional); +void lf_reset_status_fields_on_input_port_triggers(); /** - * Send a message to another federate directly or via the RTI. - * This method assumes that the caller does not hold the outbound_socket_mutex lock, + * @brief Send a message to another federate. + * + * This function is used for physical connections + * between federates. If the socket connection to the remote federate or the RTI has been broken, + * then this returns -1 without sending. Otherwise, it returns 0. + * + * This method assumes that the caller does not hold the lf_outbound_socket_mutex lock, * which it acquires to perform the send. * - * If the socket connection to the remote federate or the RTI has been broken, - * then this returns 0 without sending. Otherwise, it returns 1. - * - * @note This function is similar to send_timed_message() except that it - * does not deal with time and timed_messages. - * - * @param message_type The type of the message being sent. - * Currently can be MSG_TYPE_TAGGED_MESSAGE for messages sent via - * RTI or MSG_TYPE_P2P_TAGGED_MESSAGE for messages sent between - * federates. + * @param message_type The type of the message being sent (currently only MSG_TYPE_P2P_MESSAGE). * @param port The ID of the destination port. * @param federate The ID of the destination federate. - * @param next_destination_str The name of the next destination in string format + * @param next_destination_str The name of the next destination in string format (for reporting). * @param length The message length. * @param message The message. - * @return 1 if the message has been sent, 0 otherwise. + * @return 0 if the message has been sent, -1 otherwise. */ -int send_message(int message_type, +int lf_send_message(int message_type, unsigned short port, unsigned short federate, const char* next_destination_str, @@ -406,70 +357,201 @@ int send_message(int message_type, unsigned char* message); /** - * Send the specified timestamped message to the specified port in the - * specified federate via the RTI or directly to a federate depending on - * the given socket. The timestamp is calculated as current_logical_time + - * additional delay which is greater than or equal to zero. - * The port should be an input port of a reactor in - * the destination federate. This version does include the timestamp - * in the message. The caller can reuse or free the memory after this returns. + * @brief Send information about connections to the RTI. + * + * This is a generated function that sends information about connections between this federate + * and other federates where messages are routed through the RTI. Currently, this + * only includes logical connections when the coordination is centralized. This + * information is needed for the RTI to perform the centralized coordination. + * @see MSG_TYPE_NEIGHBOR_STRUCTURE in net_common.h + */ +void lf_send_neighbor_structure_to_RTI(int); + +/** + * @brief Send a next event tag (NET) signal. + * + * If this federate depends on upstream federates or sends data to downstream + * federates, then send to the RTI a NET, which will give the tag of the + * earliest event on the event queue, or, if the queue is empty, the timeout + * time, or, if there is no timeout, FOREVER. * - * If the socket connection to the remote federate or the RTI has been broken, - * then this returns 0 without sending. Otherwise, it returns 1. + * If there are network outputs that + * depend on physical actions, then insert a dummy event to ensure this federate + * advances its tag so that downstream federates can make progress. * - * This method assumes that the caller does not hold the outbound_socket_mutex lock, - * which it acquires to perform the send. + * A NET is a promise saying that, absent network inputs, this federate will + * not produce an output message with tag earlier than the NET value. + * + * If there are upstream federates, then after sending a NET, this will block + * until either the RTI grants the advance to the requested time or the wait + * for the response from the RTI is interrupted by a change in the event queue + * (e.g., a physical action triggered or a network message arrived). + * If there are no upstream federates, then it will not wait for a TAG + * (which won't be forthcoming anyway) and returns the earliest tag on the event queue. + * + * If the federate has neither upstream nor downstream federates, then this + * returns the specified tag immediately without sending anything to the RTI. + * + * If there is at least one physical action somewhere in the federate that can + * trigger an output to a downstream federate, then the NET is required to be + * less than the current physical time. If physical time is less than the + * earliest event in the event queue (or the event queue is empty), then this + * function will insert a dummy event with a tag equal to the current physical + * time (and a microstep of 0). This will enforce advancement of tag for this + * federate and causes a NET message to be sent repeatedly as physical time + * advances with the time interval between messages controlled by the target + * parameter coordination-options: {advance-message-interval timevalue}. It will + * stop creating dummy events if and when its event queue has an event with a + * timestamp less than physical time. + * + * If wait_for_reply is false, then this function will simply send the + * specified tag and return that tag immediately. This is useful when a + * federate is shutting down and will not be sending any more messages at all. + * + * In all cases, this returns either the specified tag or + * another tag when it is safe to advance logical time to the returned tag. + * The returned tag may be less than the specified tag if there are upstream + * federates and either the RTI responds with a lesser tag or + * the wait for a response from the RTI is interrupted by a + * change in the event queue. + * + * This function is used in centralized coordination only. + * + * This function assumes the caller holds the mutex lock. + * + * @param env The environment of the federate + * @param tag The tag. + * @param wait_for_reply If true, wait for a reply. + */ +tag_t lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply); + +/** + * @brief Send a port absent message. + * + * This informs the remote federate that it will not receive a message with tag less than the + * current tag of the specified environment delayed by the additional_delay. + * + * @param env The environment from which to get the current tag. + * @param additional_delay The after delay of the connection or NEVER if none. + * @param port_ID The ID of the receiving port. + * @param fed_ID The fed ID of the receiving federate. + */ +void lf_send_port_absent_to_federate( + environment_t* env, + interval_t additional_delay, + unsigned short port_ID, + unsigned short fed_ID); + +/** + * @brief Send a MSG_TYPE_STOP_REQUEST message to the RTI. + * + * The payload is the specified tag plus one microstep. If this federate has previously + * received a stop request from the RTI, then do not send the message and + * return 1. Return -1 if the socket is disconnected. Otherwise, return 0. + * @return 0 if the message is sent. + */ +int lf_send_stop_request_to_rti(tag_t stop_tag); + +/** + * @brief Send a tagged message to the specified port of the specified federate. + * + * The tag will be the current tag of the specified environment delayed by the specified additional_delay. + * If the delayed tag falls after the timeout time, then the message is not sent and -1 is returned. + * The caller can reuse or free the memory storing the message after this returns. + * + * If the message fails to send (e.g. the socket connection is broken), then the + * response depends on the message_type. For MSG_TYPE_TAGGED_MESSAGE, the message is + * supposed to go via the RTI, and failure to communicate with the RTI is a critical failure. + * In this case, the program will exit with an error message. If the message type is + * MSG_TYPE_P2P_TAGGED_MESSAGE, then the failure is not critical. It may be due to the + * remote federate having exited, for example, because its safe-to-process offset led it + * to believe that there were no messages forthcoming. In this case, on failure to send + * the message, this function returns -11. * - * @note This function is similar to send_message() except that it - * sends timed messages and also contains logics related to time. + * This method assumes that the caller does not hold the lf_outbound_socket_mutex lock, + * which it acquires to perform the send. * - * @param env The environment in which we are executing - * @param additional_delay The offset applied to the timestamp - * using after. The additional delay will be greater or equal to zero - * if an after is used on the connection. If no after is given in the - * program, -1 is passed. - * @param message_type The type of the message being sent. - * Currently can be MSG_TYPE_TAGGED_MESSAGE for messages sent via - * RTI or MSG_TYPE_P2P_TAGGED_MESSAGE for messages sent between - * federates. + * @param env The environment from which to get the current tag. + * @param additional_delay The after delay on the connection or NEVER is there is none. + * @param message_type The type of the message being sent. Currently can be + * MSG_TYPE_TAGGED_MESSAGE for messages sent via the RTI or MSG_TYPE_P2P_TAGGED_MESSAGE + * for messages sent directly between federates. * @param port The ID of the destination port. * @param federate The ID of the destination federate. * @param next_destination_str The next destination in string format (RTI or federate) * (used for reporting errors). * @param length The message length. * @param message The message. - * @return 1 if the message has been sent, 0 otherwise. + * @return 0 if the message has been sent, 1 otherwise. + */ +int lf_send_tagged_message( + environment_t* env, + interval_t additional_delay, + int message_type, + unsigned short port, + unsigned short federate, + const char* next_destination_str, + size_t length, + unsigned char* message); + +/** + * @brief Set the federation_id of this federate. + * @param fid The federation ID. + */ +void lf_set_federation_id(const char* fid); + +/** + * @brief Set the trace object for this federate (used when tracing is enabled). + * + * @param The trace object. + */ +void lf_set_federation_trace_object(trace_t * trace); + +#ifdef FEDERATED_DECENTRALIZED +/** + * @brief Spawn a thread to iterate through STAA structs. + * + * This will set their associated ports absent + * at an offset if the port is not present with a value by a certain physical time. + */ +void lf_spawn_staa_thread(void); +#endif + +/** + * @brief Wait until inputs statuses are known up to and including the specified level. + * + * Specifically, wait until the specified level is less that the max level allowed to + * advance (MLAA). + * @param env The environment (which should always be the top-level environment). + * @param level The level to which we would like to advance. */ -int send_timed_message(environment_t*, - interval_t, - int, - unsigned short, - unsigned short, - const char*, - size_t, - unsigned char*); +void lf_stall_advance_level_federation(environment_t* env, size_t level); /** - * Synchronize the start with other federates via the RTI. + * @brief Synchronize the start with other federates via the RTI. + * * This assumes that a connection to the RTI is already made * and _lf_rti_socket_TCP is valid. It then sends the current logical * time to the RTI and waits for the RTI to respond with a specified * time. It starts a thread to listen for messages from the RTI. */ -void synchronize_with_other_federates(); +void lf_synchronize_with_other_federates(); /** - * Wait until the status of network port "port_ID" is known. - * - * In decentralized coordination mode, the wait time is capped by STAA + STA, - * after which the status of the port is presumed to be absent. + * @brief Update the max level allowed to advance (MLAA). + * + * If the specified tag is greater than the current_tag of the top-level environment + * (or equal and is_provisional is false), then set the MLAA to INT_MAX and return. + * This removes any barriers on execution at the current tag due to network inputs. + * Otherwise, set the MLAA to the minimum level over all (non-physical) network input ports + * where the status of the input port is not known at that current_tag. * - * This function assumes the holder does not hold a mutex. + * This function assumes that the caller holds the mutex. * - * @param env The environment in which we are executing - * @param port_ID The ID of the network port - * @param STAA The safe-to-assume-absent threshold for the port + * @param tag The latest TAG or PTAG received by this federate. + * @param is_provisional Whether the tag was provisional. + * @return True if the MLAA changed. */ -void wait_until_port_status_known(environment_t* env, int portID, interval_t STAA); +bool lf_update_max_level(tag_t tag, bool is_provisional); #endif // FEDERATE_H diff --git a/include/core/federated/net_common.h b/include/core/federated/network/net_common.h similarity index 86% rename from include/core/federated/net_common.h rename to include/core/federated/network/net_common.h index 38001cc0ba..9ea720fd77 100644 --- a/include/core/federated/net_common.h +++ b/include/core/federated/network/net_common.h @@ -37,12 +37,9 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Each federate attempts to connect with an RTI at the IP address * put into its code by the code generator (i.e., it attempts to - * open a TCP connection). It starts by trying the - * port number given by STARTING_PORT and increments the port number - * from there until it successfully connects. The maximum port number - * it will try before giving up is STARTING_PORT + PORT_RANGE_LIMIT. - * - * FIXME: What if a port is specified in the "at" of the federated statement? + * open a TCP connection). If an explicit port is given in the `at` clause + * on the `federated reactor` statement, it will use that port. Otherwise, it will + * use DEFAULT_PORT. * * When it has successfully opened a TCP connection, the first message it sends * to the RTI is a MSG_TYPE_FED_IDS message, which contains the ID of this federate @@ -137,9 +134,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * parameter of the target is "decentralized" and the federate has * inbound connections from other federates, then it starts a socket * server to listen for incoming connections from those federates. - * It attempts to create the server at the port given by STARTING_PORT, - * and if this fails, increments the port number from there until a - * port is available. It then sends to the RTI an MSG_TYPE_ADDRESS_ADVERTISEMENT message + * It then sends to the RTI an MSG_TYPE_ADDRESS_ADVERTISEMENT message * with the port number as a payload. The federate then creates a thread * to listen for incoming socket connections and messages. * @@ -156,18 +151,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Physical connections also use the above P2P sockets between * federates even if the coordination is centralized. * - * Note: Peer-to-peer sockets can be closed by the downstream federate. - * For example, when a downstream federate reaches its stop time, then - * it will stop accepting physical messages. To achieve an orderly shutdown, - * the downstream federate sends a MSG_TYPE_CLOSE_REQUEST message to the upstream - * one and the upstream federate handles closing the socket. This way, any - * messages that are in the middle of being sent while the downstream - * federate shuts down will successfully traverse the socket, even if - * only to be ignored by the downstream federate. It is valid to ignore - * such messages if the connection is physical or if the coordination is - * decentralized and the messages arrive after the STP offset of the - * downstream federate (i.e., they are "tardy"). - * * Afterward, the federates and the RTI decide on a common start time by having * each federate report a reading of its physical clock to the RTI on a * `MSG_TYPE_TIMESTAMP`. The RTI broadcasts the maximum of these readings plus @@ -180,7 +163,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * each federate has a valid event at the start tag (start time, 0) and it will * inform the RTI of this event. * Subsequently, at the conclusion of each tag, each federate will send a - * `MSG_TYPE_LOGICAL_TAG_COMPLETE` followed by a `MSG_TYPE_NEXT_EVENT_TAG` (see + * `MSG_TYPE_LATEST_TAG_COMPLETE` followed by a `MSG_TYPE_NEXT_EVENT_TAG` (see * the comment for each message for further explanation). Each federate would * have to wait for a `MSG_TYPE_TAG_ADVANCE_GRANT` or a * `MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT` before it can advance to a @@ -208,7 +191,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define UDP_TIMEOUT_TIME SEC(1) - /** * Size of the buffer used for messages sent between federates. * This is used by both the federates and the rti, so message lengths @@ -217,63 +199,61 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define FED_COM_BUFFER_SIZE 256u /** - * Number of nanoseconds that elapse between a federate's attempts - * to connect to the RTI. + * Time between a federate's attempts to connect to the RTI. */ -#define CONNECT_RETRY_INTERVAL 2000000000LL +#define CONNECT_RETRY_INTERVAL MSEC(500) /** * Bound on the number of retries to connect to the RTI. * A federate will retry every CONNECT_RETRY_INTERVAL seconds - * this many times before giving up. E.g., 500 retries every - * 2 seconds results in retrying for about 16 minutes. + * this many times before giving up. + */ +#define CONNECT_MAX_RETRIES 100 + +/** + * Maximum number of port addresses that a federate will try to connect to the RTI on. + * If you are using automatic ports begining at DEFAULT_PORT, this puts an upper bound + * on the number of RTIs that can be running on the same host. */ -#define CONNECT_NUM_RETRIES 500 +#define MAX_NUM_PORT_ADDRESSES 16 /** - * Number of nanoseconds that a federate waits before asking + * Time that a federate waits before asking * the RTI again for the port and IP address of a federate * (an MSG_TYPE_ADDRESS_QUERY message) after the RTI responds that it - * does not know. + * does not know. This allows time for federates to start separately. */ -#define ADDRESS_QUERY_RETRY_INTERVAL 100000000LL +#define ADDRESS_QUERY_RETRY_INTERVAL MSEC(250) /** - * Number of nanoseconds that a federate waits before trying - * another port for the RTI. This is to avoid overwhelming - * the OS and the socket with too many calls. - * FIXME: Is this too small? + * Time to wait before re-attempting to bind to a port. + * When a process closes, the network stack typically waits between 30 and 120 + * seconds before releasing the port. This is to allow for delayed packets so + * that a new process does not receive packets from a previous process. + * Here, we limit the retries to 60 seconds. */ -#define PORT_KNOCKING_RETRY_INTERVAL 10000LL +#define PORT_BIND_RETRY_INTERVAL SEC(1) /** - * Default starting port number for the RTI and federates' socket server. - * Unless a specific port has been specified by the LF program in the "at" - * for the RTI, when the federates start up, they will attempt - * to open a socket server - * on this port, and, if this fails, increment the port number and - * try again. The number of increments is limited by PORT_RANGE_LIMIT. - * FIXME: Clarify what happens if a specific port has been given in "at". + * Number of attempts to bind to a port before giving up. */ -#define STARTING_PORT 15045u +#define PORT_BIND_RETRY_LIMIT 60 /** - * Number of ports to try to connect to. Unless the LF program specifies - * a specific port number to use, the RTI or federates will attempt to start - * a socket server on port STARTING_PORT. If that port is not available (e.g., - * another RTI is running or has recently exited), then it will try the - * next port, STARTING_PORT+1, and keep incrementing the port number up to this - * limit. If no port between STARTING_PORT and STARTING_PORT + PORT_RANGE_LIMIT - * is available, then the RTI or the federate will fail to start. This number, therefore, - * limits the number of RTIs and federates that can be simultaneously - * running on any given machine without assigning specific port numbers. + * Default port number for the RTI. + * Unless a specific port has been specified by the LF program in the "at" + * for the RTI or on the command line, when the RTI starts up, it will attempt + * to open a socket server on this port. */ -#define PORT_RANGE_LIMIT 1024 +#define DEFAULT_PORT 15045u /** * Delay the start of all federates by this amount. - * FIXME: More. - * FIXME: Should use the latency estimates that were + * This helps ensure that the federates do not start at the same time. + * Each federate has provided its current physical time to the RTI, and + * the RTI has picked the largest of these. It will add this quantity + * and declare that to be the start time. + * FIXME: This could use the latency estimates that were * acquired during initial clock synchronization. */ #define DELAY_START SEC(1) @@ -323,7 +303,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * to the RTI. This is its first message to the RTI. * The RTI will respond with either MSG_TYPE_REJECT, MSG_TYPE_ACK, or MSG_TYPE_UDP_PORT. * If the federate is a C target LF program, the generated federate - * code does this by calling synchronize_with_other_federates(), + * code does this by calling lf_synchronize_with_other_federates(), * passing to it its federate ID. */ #define MSG_TYPE_FED_IDS 1 @@ -397,20 +377,23 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_MESSAGE 3 -/** Byte identifying that the federate is ending its execution. */ +/** + * Byte identifying that the federate or the RTI is ending its execution. + */ #define MSG_TYPE_RESIGN 4 -/** Byte identifying a timestamped message to forward to another federate. - * The next two bytes will be the ID of the destination reactor port. - * The next two bytes are the destination federate ID. - * The four bytes after that will be the length of the message. - * The next eight bytes will be the timestamp of the message. - * The next four bytes will be the microstep of the message. - * The remaining bytes are the message. +/** + * Byte identifying a timestamped message to forward to another federate. + * The next two bytes will be the ID of the destination reactor port. + * The next two bytes are the destination federate ID. + * The four bytes after that will be the length of the message. + * The next eight bytes will be the timestamp of the message. + * The next four bytes will be the microstep of the message. + * The remaining bytes are the message. * - * With centralized coordination, all such messages flow through the RTI. - * With decentralized coordination, tagged messages are sent peer-to-peer - * between federates and are marked with MSG_TYPE_P2P_TAGGED_MESSAGE. + * With centralized coordination, all such messages flow through the RTI. + * With decentralized coordination, tagged messages are sent peer-to-peer + * between federates and are marked with MSG_TYPE_P2P_TAGGED_MESSAGE. */ #define MSG_TYPE_TAGGED_MESSAGE 5 @@ -451,12 +434,12 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT 8 /** - * Byte identifying a logical tag complete (LTC) message sent by a federate + * Byte identifying a latest tag complete (LTC) message sent by a federate * to the RTI. * The next eight bytes will be the timestep of the completed tag. * The next four bytes will be the microsteps of the completed tag. */ -#define MSG_TYPE_LOGICAL_TAG_COMPLETE 9 +#define MSG_TYPE_LATEST_TAG_COMPLETE 9 /////////// Messages used in lf_request_stop() /////////////// //// Overview of the algorithm: @@ -598,14 +581,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_P2P_TAGGED_MESSAGE 17 -/** - * Byte identifying a message that a downstream federate sends to its - * upstream counterpart to request that the socket connection be closed. - * This is the only message that should flow upstream on such socket - * connections. - */ -#define MSG_TYPE_CLOSE_REQUEST 18 - //////////////////////////////////////////////// /** * Physical clock synchronization messages according to PTP. @@ -685,6 +660,11 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define MSG_TYPE_NEIGHBOR_STRUCTURE 24 #define MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE 9 +/** + * Byte identifying that the federate or the RTI has failed. + */ +#define MSG_TYPE_FAILED 25 + ///////////////////////////////////////////// //// Rejection codes diff --git a/include/core/federated/net_util.h b/include/core/federated/network/net_util.h similarity index 74% rename from include/core/federated/net_util.h rename to include/core/federated/network/net_util.h index cc621115d7..6346e21d38 100644 --- a/include/core/federated/net_util.h +++ b/include/core/federated/network/net_util.h @@ -48,8 +48,11 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include "../platform.h" -#include "../tag.h" +#include "../../platform.h" +#include "../../tag.h" + +#define NUM_SOCKET_RETRIES 10 +#define DELAY_BETWEEN_SOCKET_RETRIES MSEC(100) #define HOST_LITTLE_ENDIAN 1 #define HOST_BIG_ENDIAN 2 @@ -62,24 +65,55 @@ int host_is_big_endian(void); #ifdef FEDERATED +/** + * Mutex protecting socket close operations. + */ +extern lf_mutex_t socket_mutex; /** * @brief Create an IPv4 TCP socket with Nagle's algorithm disabled * (TCP_NODELAY) and Delayed ACKs disabled (TCP_QUICKACK). Exits application * on any error. * - * @return int + * @return The socket ID (a file descriptor). */ int create_real_time_tcp_socket_errexit(); +/** + * Read the specified number of bytes from the specified socket into the specified buffer. + * If an error occurs during this reading, return -1 and set errno to indicate + * the cause of the error. If the read succeeds in reading the specified number of bytes, + * return 0. If an EOF occurs before reading the specified number of bytes, return 1. + * This function repeats the read attempt until the specified number of bytes + * have been read, an EOF is read, or an error occurs. Specifically, errors EAGAIN, + * EWOULDBLOCK, and EINTR are not considered errors and instead trigger + * another attempt. A delay between attempts is given by DELAY_BETWEEN_SOCKET_RETRIES. + * @param socket The socket ID. + * @param num_bytes The number of bytes to read. + * @param buffer The buffer into which to put the bytes. + * @return 0 for success, 1 for EOF, and -1 for an error. + */ +int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer); + +/** + * Read the specified number of bytes to the specified socket using read_from_socket + * and close the socket if an error occurs. If an error occurs, this will change the + * socket ID pointed to by the first argument to -1 and will return -1. + * @param socket Pointer to the socket ID. + * @param num_bytes The number of bytes to write. + * @param buffer The buffer from which to get the bytes. + * @return 0 for success, -1 for failure. + */ +int read_from_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer); + /** * Read the specified number of bytes from the specified socket into the * specified buffer. If a disconnect or an EOF occurs during this * reading, then if format is non-null, report an error and exit. + * If the mutex argument is non-NULL, release the mutex before exiting. * If format is null, then report the error, but do not exit. - * This function takes a formatted - * string and additional optional arguments similar to printf(format, ...) - * that is appended to the error messages. + * This function takes a formatted string and additional optional arguments + * similar to printf(format, ...) that is appended to the error messages. * @param socket The socket ID. * @param num_bytes The number of bytes to read. * @param buffer The buffer into which to put the bytes. @@ -88,88 +122,72 @@ int create_real_time_tcp_socket_errexit(); * @return The number of bytes read, or 0 if an EOF is received, or * a negative number for an error. */ -ssize_t read_from_socket_errexit( - int socket, +void read_from_socket_fail_on_error( + int* socket, size_t num_bytes, unsigned char* buffer, + lf_mutex_t* mutex, char* format, ...); -ssize_t write_to_socket(int socket, size_t num_bytes, unsigned char* buffer); - /** - * Read the specified number of bytes from the specified socket into the - * specified buffer. If a disconnect occurs during this - * reading, return a negative number. If an EOF occurs during this - * reading, return 0. Otherwise, return the number of bytes read. - * This is a version of read_from_socket_errexit() that does not error out. + * Without blocking, peek at the specified socket and, if there is + * anything on the queue, put its first byte at the specified address and return 1. + * If there is nothing on the queue, return 0, and if an error occurs, + * return -1. * @param socket The socket ID. - * @param num_bytes The number of bytes to read. - * @param buffer The buffer into which to put the bytes. - * @return The number of bytes read or 0 when EOF is received or negative for an error. + * @param result Pointer to where to put the first byte available on the socket. */ -ssize_t read_from_socket(int socket, size_t num_bytes, unsigned char* buffer); +ssize_t peek_from_socket(int socket, unsigned char* result); /** * Write the specified number of bytes to the specified socket from the - * specified buffer. If a disconnect or an EOF occurs during this - * reading, report an error and exit, unless the format string is NULL, - * in which case, report an error and return. This function takes a formatted - * string and additional optional arguments similar to printf(format, ...) - * that is appended to the error messages. + * specified buffer. If an error occurs, return -1 and set errno to indicate + * the cause of the error. If the write succeeds, return 0. + * This function repeats the attempt until the specified number of bytes + * have been written or an error occurs. Specifically, errors EAGAIN, + * EWOULDBLOCK, and EINTR are not considered errors and instead trigger + * another attempt. A delay between attempts is given by + * DELAY_BETWEEN_SOCKET_RETRIES. * @param socket The socket ID. * @param num_bytes The number of bytes to write. * @param buffer The buffer from which to get the bytes. - * @param mutex If non-NULL, the mutex to unlock before exiting. - * @param format A format string for error messages, followed by any number of - * fields that will be used to fill the format string as in printf, or NULL - * to prevent exit on error. - * @return The number of bytes written, or 0 if an EOF was received, or a negative - * number if an error occurred. + * @return 0 for success, -1 for failure. */ -ssize_t write_to_socket_with_mutex( - int socket, - size_t num_bytes, - unsigned char* buffer, - lf_mutex_t* mutex, - char* format, ...); +int write_to_socket(int socket, size_t num_bytes, unsigned char* buffer); /** - * Write the specified number of bytes to the specified socket from the - * specified buffer. If a disconnect or an EOF occurs during this - * reading, report an error and exit, unless the format string is NULL, - * in which case, report an error and return. This function takes a formatted - * string and additional optional arguments similar to printf(format, ...) - * that is appended to the error messages. - * @param socket The socket ID. + * Write the specified number of bytes to the specified socket using write_to_socket + * and close the socket if an error occurs. If an error occurs, this will change the + * socket ID pointed to by the first argument to -1 and will return -1. + * @param socket Pointer to the socket ID. + * @param num_bytes The number of bytes to write. + * @param buffer The buffer from which to get the bytes. + * @return 0 for success, -1 for failure. + */ +int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer); + +/** + * Write the specified number of bytes to the specified socket using + * write_to_socket_close_on_error and exit with an error code if an error occurs. + * If the mutex argument is non-NULL, release the mutex before exiting. If the + * format argument is non-null, then use it an any additional arguments to form + * the error message using printf conventions. Otherwise, print a generic error + * message. + * @param socket Pointer to the socket ID. * @param num_bytes The number of bytes to write. * @param buffer The buffer from which to get the bytes. * @param mutex If non-NULL, the mutex to unlock before exiting. * @param format A format string for error messages, followed by any number of * fields that will be used to fill the format string as in printf, or NULL - * to prevent exit on error. - * @return The number of bytes written, or 0 if an EOF was received, or a negative - * number if an error occurred. + * to print a generic error message. */ -ssize_t write_to_socket_errexit( - int socket, +void write_to_socket_fail_on_error( + int* socket, size_t num_bytes, unsigned char* buffer, + lf_mutex_t* mutex, char* format, ...); -/** - * Write the specified number of bytes to the specified socket from the - * specified buffer. If a disconnect or an EOF occurs during this - * reading, return a negative number or 0 respectively. Otherwise, - * return the number of bytes written. - * This is a version of write_to_socket() that does not error out. - * @param socket The socket ID. - * @param num_bytes The number of bytes to write. - * @param buffer The buffer from which to get the bytes. - * @return The number of bytes written, or 0 if an EOF was received, or a negative - * number if an error occurred. - */ -int write_to_socket2(int socket, int num_bytes, unsigned char* buffer); - #endif // FEDERATED /** @@ -332,7 +350,7 @@ void encode_tag( ); /** - * A helper struct for passing rti_addr information between parse_rti_addr and extract_rti_addr_info + * A helper struct for passing rti_addr information between lf_parse_rti_addr and extract_rti_addr_info */ typedef struct rti_addr_info_t { char rti_host_str[256]; diff --git a/include/core/lf_types.h b/include/core/lf_types.h index a48d445fc7..eb626658ed 100644 --- a/include/core/lf_types.h +++ b/include/core/lf_types.h @@ -239,7 +239,8 @@ struct trigger_t { interval_t offset; // Minimum delay of an action. For a timer, this is also the maximum delay. interval_t period; // Minimum interarrival time of an action. For a timer, this is also the maximal interarrival time. bool is_physical; // Indicator that this denotes a physical action. - event_t* last; // Pointer to the last event that was scheduled for this action. + tag_t last_tag; // Tag of the last event that was scheduled for this action. + // This is only used for actions and will otherwise be NEVER. lf_spacing_policy_t policy; // Indicates which policy to use when an event is scheduled too early. port_status_t status; // Determines the status of the port at the current logical time. Therefore, this // value needs to be reset at the beginning of each logical time. diff --git a/include/core/reactor.h b/include/core/reactor.h index 38f090d7c6..9d36c66270 100644 --- a/include/core/reactor.h +++ b/include/core/reactor.h @@ -60,17 +60,21 @@ #define CONSTRUCTOR(classname) (new_ ## classname) #define SELF_STRUCT_T(classname) (classname ## _self_t) -//////////////////////////////////////////////////////////// -//// Macros for producing outputs. - -// NOTE: According to the "Swallowing the Semicolon" section on this page: -// https://gcc.gnu.org/onlinedocs/gcc-3.0.1/cpp_3.html -// the following macros should use an odd do-while construct to avoid -// problems with if ... else statements that do not use braces around the -// two branches. - -// Declarations for functions used by the macros. +/** + * Unless the "fast" option is given, an LF program will wait until + * physical time matches logical time before handling an event with + * a given logical time. The amount of time is less than this given + * threshold, then no wait will occur. The purpose of this is + * to prevent unnecessary delays caused by simply setting up and + * performing the wait. + */ +#define MIN_SLEEP_DURATION USEC(10) +/** + * Print an event from the event queue. + * This is a function of type pqueue_print_entry_f. + */ +void _lf_print_event(void* event); /** * Mark the given port's is_present field as true. This is_present field * will later be cleaned up by _lf_start_time_step. @@ -418,6 +422,39 @@ trigger_handle_t _lf_schedule(environment_t* env, trigger_t* trigger, interval_t void _lf_initialize_watchdog_mutexes(void); +/** + * @brief Get the array of ids of enclaves directly upstream of the specified enclave. + * This updates the specified result pointer to point to a statically allocated array of IDs + * and returns the length of the array. The implementation is code-generated. + * + * @param enclave_id The enclave for which to report upstream IDs. + * @param result The pointer to dereference and update to point to the resulting array. + * @return The number of direct upstream enclaves. + */ +int _lf_get_upstream_of(int enclave_id, int** result); + +/** + * @brief Get the array of ids of enclaves directly downstream of the specified enclave. + * This updates the specified result pointer to point to a statically allocated array of IDs + * and returns the length of the array. The implementation is code-generated. + * + * @param enclave_id The enclave for which to report downstream IDs. + * @param result The pointer to dereference and update to point to the resulting array. + * @return The number of direct downstream enclaves. + */ +int _lf_get_downstream_of(int enclave_id, int** result); + +/** + * @brief Retrive the delays on the connections to direct upstream enclaves. + * This updates the result pointer to point to a statically allocated array of delays. + * The implementation is code-generated. + * + * @param enclave_id The enclave for which to search for upstream delays. + * @param result The pointer to dereference and update to point to the resulting array. + * @return int The number of direct upstream enclaves. + */ +int _lf_get_upstream_delay_of(int enclave_id, interval_t** result); + /** * Function (to be code generated) to terminate execution. * This will be invoked after all shutdown actions have completed. @@ -427,7 +464,6 @@ void terminate_execution(environment_t* env); void termination(); - /** * Schedule the specified action with an integer value at a later logical * time that depends on whether the action is logical or physical and @@ -523,18 +559,6 @@ trigger_handle_t _lf_schedule_value(lf_action_base_t* action, interval_t extra_d */ trigger_handle_t _lf_schedule_copy(lf_action_base_t* action, interval_t offset, void* value, size_t length); -// See reactor.h for doc. -int _lf_fd_send_stop_request_to_rti(tag_t stop_tag); - -/** - * @brief Will update the argument to point to the beginning of the array of environments in this program - * @note Is code-generated by the compiler - * @param envs A double pointer which will be dereferenced and modified - * @return int The number of environments in the array - */ -int _lf_get_environments(environment_t **envs); - - /** * @brief Will create and initialize the required number of environments for the program * @note Will be code generated by the compiler diff --git a/include/core/reactor_common.h b/include/core/reactor_common.h index be74165b7b..29fb73c58b 100644 --- a/include/core/reactor_common.h +++ b/include/core/reactor_common.h @@ -15,28 +15,15 @@ extern unsigned int _lf_number_of_workers; extern bool fast; extern instant_t duration; -extern bool _lf_execution_started; extern bool keepalive_specified; extern interval_t _lf_fed_STA_offset; +/** Flag used to disable cleanup operations on normal termination. */ +extern bool _lf_normal_termination; + extern int default_argc; extern const char** default_argv; -#ifdef FEDERATED -void reset_status_fields_on_input_port_triggers(); -port_status_t determine_port_status_if_possible(int portID); -typedef enum parse_rti_code_t { - SUCCESS, - INVALID_PORT, - INVALID_HOST, - INVALID_USER, - FAILED_TO_PARSE -} parse_rti_code_t; -parse_rti_code_t parse_rti_addr(const char* rti_addr); -void set_federation_id(const char* fid); -void set_federation_trace_object(trace_t * trace); -#endif - extern struct allocation_record_t* _lf_reactors_to_free; void* _lf_new_reactor(size_t size); void _lf_free(struct allocation_record_t** head); @@ -61,7 +48,7 @@ event_t* _lf_create_dummy_events( event_t* next, microstep_t offset ); -int _lf_schedule_at_tag(environment_t* env, trigger_t* trigger, tag_t tag, lf_token_t* token); +trigger_handle_t _lf_schedule_at_tag(environment_t* env, trigger_t* trigger, tag_t tag, lf_token_t* token); trigger_handle_t _lf_schedule(environment_t* env, trigger_t* trigger, interval_t extra_delay, lf_token_t* token); trigger_handle_t _lf_insert_reactions_for_trigger(environment_t* env, trigger_t* trigger, lf_token_t* token); diff --git a/include/core/tag.h b/include/core/tag.h index 14c2dc5104..e38ea7de52 100644 --- a/include/core/tag.h +++ b/include/core/tag.h @@ -31,13 +31,16 @@ #define WEEKS(t) (t * 604800000000000LL) #define NEVER LLONG_MIN +#define NEVER_MICROSTEP 0u #define FOREVER LLONG_MAX -#define NEVER_TAG (tag_t) { .time = LLONG_MIN, .microstep = 0u } +#define FOREVER_MICROSTEP UINT_MAX +#define NEVER_TAG (tag_t) { .time = NEVER, .microstep = NEVER_MICROSTEP } // Need a separate initializer expression to comply with some C compilers -#define NEVER_TAG_INITIALIZER { LLONG_MIN, 0u } -#define FOREVER_TAG (tag_t) { .time = LLONG_MAX, .microstep = UINT_MAX } +#define NEVER_TAG_INITIALIZER { NEVER, NEVER_MICROSTEP } +#define FOREVER_TAG (tag_t) { .time = FOREVER, .microstep = FOREVER_MICROSTEP } // Need a separate initializer expression to comply with some C compilers -#define FOREVER_TAG_INITIALIZER { LLONG_MAX, UINT_MAX } +#define FOREVER_TAG_INITIALIZER { FOREVER, FOREVER_MICROSTEP } +#define ZERO_TAG (tag_t) { .time = 0LL, .microstep = 0u } // Convenience for converting times #define BILLION 1000000000LL @@ -80,6 +83,19 @@ typedef struct { */ tag_t lf_tag(void* env); +/** + * Add two tags. If either tag has has NEVER or FOREVER in its time field, then + * return NEVER_TAG or FOREVER_TAG, respectively. Also return NEVER_TAG or FOREVER_TAG + * if the result underflows or overflows when adding the times. + * If the microstep overflows, also return FOREVER_TAG. + * If the time field of the second tag is greater than 0, then the microstep of the first tag + * is reset to 0 before adding. This models the delay semantics in LF and makes this + * addition operation non-commutative. + * @param a The first tag. + * @param b The second tag. + */ +tag_t lf_tag_add(tag_t a, tag_t b); + /** * Compare two tags. Return -1 if the first is less than * the second, 0 if they are equal, and +1 if the first is diff --git a/include/core/threaded/reactor_threaded.h b/include/core/threaded/reactor_threaded.h index 0053112d06..f0f3d424b7 100644 --- a/include/core/threaded/reactor_threaded.h +++ b/include/core/threaded/reactor_threaded.h @@ -17,7 +17,7 @@ void try_advance_level(environment_t* env, volatile size_t* next_reaction_level) * message to downstream federates if a given network output port is not present. * @param env The environment in which we are executing */ -void enqueue_port_absent_reactions(environment_t* env); +void lf_enqueue_port_absent_reactions(environment_t* env); /** * Raise a barrier to prevent the current tag for the specified environment from advancing @@ -79,7 +79,7 @@ void _lf_increment_tag_barrier_locked(environment_t *env, tag_t future_tag); void _lf_decrement_tag_barrier_locked(environment_t* env); int _lf_wait_on_tag_barrier(environment_t* env, tag_t proposed_tag); -void synchronize_with_other_federates(void); +void lf_synchronize_with_other_federates(void); bool wait_until(environment_t* env, instant_t logical_time_ns, lf_cond_t* condition); tag_t get_next_event_tag(environment_t* env); tag_t send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply); diff --git a/include/core/threaded/scheduler_instance.h b/include/core/threaded/scheduler_instance.h index 9761785097..8a8a40905a 100644 --- a/include/core/threaded/scheduler_instance.h +++ b/include/core/threaded/scheduler_instance.h @@ -42,7 +42,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define NUMBER_OF_WORKERS 1 #endif // NUMBER_OF_WORKERS -#include "semaphore.h" +#include "lf_semaphore.h" #include #define DEFAULT_MAX_REACTION_LEVEL 100 diff --git a/include/core/trace.h b/include/core/trace.h index 357f22b118..d88abc291a 100644 --- a/include/core/trace.h +++ b/include/core/trace.h @@ -76,9 +76,10 @@ typedef enum worker_wait_ends, scheduler_advancing_time_starts, scheduler_advancing_time_ends, - federated, // Everything above this is tracing federated interactions. + federated, // Everything below this is for tracing federated interactions. // Sending messages send_ACK, + send_FAILED, send_TIMESTAMP, send_NET, send_LTC, @@ -100,6 +101,7 @@ typedef enum send_ADR_QR, // Receiving messages receive_ACK, + receive_FAILED, receive_TIMESTAMP, receive_NET, receive_LTC, @@ -142,6 +144,7 @@ static const char *trace_event_names[] = { "Federated marker", // Sending messages "Sending ACK", + "Sending FAILED", "Sending TIMESTAMP", "Sending NET", "Sending LTC", @@ -163,6 +166,7 @@ static const char *trace_event_names[] = { "Sending ADR_QR", // Receiving messages "Receiving ACK", + "Receiving FAILED", "Receiving TIMESTAMP", "Receiving NET", "Receiving LTC", @@ -436,10 +440,15 @@ void tracepoint_reaction_deadline_missed(trace_t* trace, reaction_t *reaction, i */ void stop_trace(trace_t* trace); +/** + * Version of stop_trace() that does not lock the trace mutex. + */ +void stop_trace_locked(trace_t* trace); + //////////////////////////////////////////////////////////// //// For federated execution -#ifdef FEDERATED +#if defined(FEDERATED) || defined(LF_ENCLAVES) /** * Trace federate sending a message to the RTI. @@ -479,6 +488,11 @@ void tracepoint_federate_to_federate(trace_t* trace, trace_event_t event_type, i */ void tracepoint_federate_from_federate(trace_t* trace, trace_event_t event_type, int fed_id, int partner_id, tag_t *tag); +#else +#define tracepoint_federate_to_rti(...); +#define tracepoint_federate_from_rti(...); +#define tracepoint_federate_to_federate(...); +#define tracepoint_federate_from_federate(...); #endif // FEDERATED //////////////////////////////////////////////////////////// @@ -504,9 +518,13 @@ void tracepoint_rti_to_federate(trace_t* trace, trace_event_t event_type, int fe */ void tracepoint_rti_from_federate(trace_t* trace, trace_event_t event_type, int fed_id, tag_t* tag); +#else +#define tracepoint_rti_to_federate(...); +#define tracepoint_rti_from_federate(...) ; #endif // RTI_TRACE #else +typedef struct trace_t trace_t; // empty definition in case we compile without tracing #define _lf_register_trace_event(...) @@ -531,6 +549,7 @@ void tracepoint_rti_from_federate(trace_t* trace, trace_event_t event_type, int #define start_trace(...) #define stop_trace(...) +#define stop_trace_locked(...) #define trace_new(...) NULL #define trace_free(...) diff --git a/include/core/utils/semaphore.h b/include/core/utils/lf_semaphore.h similarity index 100% rename from include/core/utils/semaphore.h rename to include/core/utils/lf_semaphore.h diff --git a/include/core/utils/pqueue.h b/include/core/utils/pqueue.h index 5c3e7fe2b4..edfd4968c9 100644 --- a/include/core/utils/pqueue.h +++ b/include/core/utils/pqueue.h @@ -1,230 +1,99 @@ -/* - * Copyright (c) 2014, Volkan Yazıcı - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * Modified by Marten Lohstroh (May, 2019). - * Changes: - * - Require implementation of a pqueue_eq_elem_f function to determine - * whether two elements are equal or not; and - * - The provided pqueue_eq_elem_f implementation is used to test and - * search for equal elements present in the queue; and - * - Removed capability to reassign priorities. - */ - /** - * @file pqueue.h - * @brief Priority Queue function declarations - * - * @{ + * @file pqueue.h + * @author Marten Lohstroh + * @author Edward A. Lee + * @copyright (c) 2020-2023, The University of California at Berkeley. + * License: BSD 2-clause + * + * @brief Priority queue declarations for the event queue and reaction queue. */ - #ifndef PQUEUE_H #define PQUEUE_H -#include - -/** priority data type */ -typedef unsigned long long pqueue_pri_t; - -/** callback functions to get/set/compare the priority of an element */ -typedef pqueue_pri_t (*pqueue_get_pri_f)(void *a); -typedef void (*pqueue_set_pri_f)(void *a, pqueue_pri_t pri); -typedef int (*pqueue_cmp_pri_f)(pqueue_pri_t next, pqueue_pri_t curr); -typedef int (*pqueue_eq_elem_f)(void* next, void* curr); - -/** callback functions to get/set the position of an element */ -typedef size_t (*pqueue_get_pos_f)(void *a); -typedef void (*pqueue_set_pos_f)(void *a, size_t pos); - -/** debug callback function to print a entry */ -typedef void (*pqueue_print_entry_f)(void *a); - -/** the priority queue handle */ -typedef struct pqueue_t -{ - size_t size; /**< number of elements in this queue plus 1 */ - size_t avail; /**< slots available in this queue */ - size_t step; /**< growth stepping setting */ - pqueue_cmp_pri_f cmppri; /**< callback to compare priorities */ - pqueue_get_pri_f getpri; /**< callback to get priority of a node */ - pqueue_get_pos_f getpos; /**< callback to get position of a node */ - pqueue_set_pos_f setpos; /**< callback to set position of a node */ - pqueue_eq_elem_f eqelem; /**< callback to compare elements */ - pqueue_print_entry_f prt; /**< callback to print elements */ - void **d; /**< The actual queue in binary heap form */ -} pqueue_t; - -/** - * initialize the queue - * - * @param n the initial estimate of the number of queue items for which memory - * should be preallocated - * @param cmppri The callback function to run to compare two elements - * This callback should return 0 for 'lower' and non-zero - * for 'higher', or vice versa if reverse priority is desired - * @param getpri the callback function to run to set a score to an element - * @param getpos the callback function to get the current element's position - * @param setpos the callback function to set the current element's position - * - * @return the handle or NULL for insufficent memory - */ -pqueue_t * -pqueue_init(size_t n, - pqueue_cmp_pri_f cmppri, - pqueue_get_pri_f getpri, - pqueue_get_pos_f getpos, - pqueue_set_pos_f setpos, - pqueue_eq_elem_f eqelem, - pqueue_print_entry_f prt); - +#include "pqueue_base.h" /** - * free all memory used by the queue - * @param q the queue + * Return 1 if the first argument is greater than the second and zero otherwise. + * @param thiz First argument. + * @param that Second argument. */ -void pqueue_free(pqueue_t *q); - - -/** - * return the size of the queue. - * @param q the queue - */ -size_t pqueue_size(pqueue_t *q); +int in_reverse_order(pqueue_pri_t thiz, pqueue_pri_t that); /** - * Insert an element into the queue. - * @param q the queue - * @param e the element - * @return 0 on success + * Return 0 regardless of argument order. + * @param thiz First argument. + * @param that Second argument. */ -int pqueue_insert(pqueue_t *q, void *d); +int in_no_particular_order(pqueue_pri_t thiz, pqueue_pri_t that); /** - * Move an existing entry to a different priority. - * @param q the queue - * @param new_pri the new priority - * @param d the entry + * Return 1 if the two events have the same trigger. + * @param event1 A pointer to an event_t. + * @param event2 A pointer to an event_t. */ -void -pqueue_change_priority(pqueue_t *q, - pqueue_pri_t new_pri, - void *d); - +int event_matches(void* event1, void* event2); /** - * Pop the highest-ranking item from the queue. - * @param q the queue - * @return NULL on error, otherwise the entry + * Return 1 if the two arguments are identical pointers. + * @param a First argument. + * @param b Second argument. */ -void *pqueue_pop(pqueue_t *q); +int reaction_matches(void* a, void* b); /** - * @brief Empty 'src' into 'dest'. - * - * As an optimization, this function might swap 'src' and 'dest'. - * - * @param dest The queue to fill up - * @param src The queue to empty + * Report a priority equal to the time of the given event. + * This is used for sorting pointers to event_t structs in the event queue. + * @param a A pointer to an event_t. */ -void pqueue_empty_into(pqueue_t** dest, pqueue_t** src); +pqueue_pri_t get_event_time(void *event); /** - * Find the highest-ranking item with the same priority that matches the - * supplied entry. - * @param q the queue - * @param e the entry to compare against - * @return NULL if no matching event has been found, otherwise the entry + * Report a priority equal to the index of the given reaction. + * Used for sorting pointers to reaction_t structs in the + * blocked and executing queues. + * @param reaction A pointer to a reaction_t. */ -void* pqueue_find_equal_same_priority(pqueue_t *q, void *e); +pqueue_pri_t get_reaction_index(void *reaction_t); /** - * Find the highest-ranking item with priority up to and including the given - * maximum priority that matches the supplied entry. - * @param q the queue - * @param e the entry to compare against - * @param max_priority the maximum priority to consider - * @return NULL if no matching event has been found, otherwise the entry + * Return the given event's position in the queue. + * @param event A pointer to an event_t. */ -void* pqueue_find_equal(pqueue_t *q, void *e, pqueue_pri_t max_priority); +size_t get_event_position(void *event); /** - * Remove an item from the queue. - * @param q the queue - * @param e the entry - * @return 0 on success + * Return the given reaction's position in the queue. + * @param reaction A pointer to a reaction_t. */ -int pqueue_remove(pqueue_t *q, void *e); +size_t get_reaction_position(void *reaction); /** - * Access highest-ranking item without removing it. - * @param q the queue - * @return NULL on error, otherwise the entry + * Set the given event's position in the queue. + * @param event A pointer to an event_t + * @param pos The position. */ -void *pqueue_peek(pqueue_t *q); +void set_event_position(void *event, size_t pos); /** - * Print the queue. - * @internal - * DEBUG function only - * @param q the queue - * @param the callback function to print the entry + * Set the given reaction's position in the queue. + * @param event A pointer to a reaction_t. + * @param pos The position. */ -void -pqueue_print(pqueue_t *q, - pqueue_print_entry_f print); +void set_reaction_position(void *reaction, size_t pos); /** - * Dump the queue and it's internal structure. - * @internal - * debug function only - * @param q the queue - * @param the callback function to print the entry + * Print some information about the given reaction. + * This only prints something if logging is set to DEBUG. + * @param reaction A pointer to a reaction_t. */ -void -pqueue_dump(pqueue_t *q, - pqueue_print_entry_f print); +void print_reaction(void *reaction); /** - * Check that the all entries are in the right order, etc. - * @internal - * debug function only - * @param q the queue + * Print some information about the given event. + * This only prints something if logging is set to DEBUG. + * @param event A pointer to an event_t. */ -int pqueue_is_valid(pqueue_t *q); - -// ********** Priority Queue Support Start -int in_reverse_order(pqueue_pri_t thiz, pqueue_pri_t that); -int in_no_particular_order(pqueue_pri_t thiz, pqueue_pri_t that); -int event_matches(void* next, void* curr); -int reaction_matches(void* next, void* curr); -pqueue_pri_t get_event_time(void *a); -pqueue_pri_t get_reaction_index(void *a); -size_t get_event_position(void *a); -size_t get_reaction_position(void *a); -void set_event_position(void *a, size_t pos); -void set_reaction_position(void *a, size_t pos); -void print_reaction(void *reaction); void print_event(void *event); #endif /* PQUEUE_H */ -/** @} */ diff --git a/include/core/utils/pqueue_base.h b/include/core/utils/pqueue_base.h new file mode 100644 index 0000000000..210cc0eece --- /dev/null +++ b/include/core/utils/pqueue_base.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2014, Volkan Yazıcı + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Modified by Marten Lohstroh (May, 2019). + * Changes: + * - Require implementation of a pqueue_eq_elem_f function to determine + * whether two elements are equal or not; and + * - The provided pqueue_eq_elem_f implementation is used to test and + * search for equal elements present in the queue; and + * - Removed capability to reassign priorities. + * + * @brief Priority Queue function declarations used as a base for Lingua Franca priority queues. + * + * @{ + */ + + +#ifndef PQUEUE_BASE_H +#define PQUEUE_BASE_H + +#include + +/** Priority data type. */ +typedef unsigned long long pqueue_pri_t; + +/** Callback to get the priority of an element. */ +typedef pqueue_pri_t (*pqueue_get_pri_f)(void *a); + +/** Callback to compare two priorities. */ +typedef int (*pqueue_cmp_pri_f)(pqueue_pri_t next, pqueue_pri_t curr); + +/** Callback to determine whether two elements are equivalent. */ +typedef int (*pqueue_eq_elem_f)(void* next, void* curr); + +/** Callback functions to get the position of an element. */ +typedef size_t (*pqueue_get_pos_f)(void *a); + +/** Callback functions to set the position of an element. */ +typedef void (*pqueue_set_pos_f)(void *a, size_t pos); + +/** Debug callback function to print a entry. */ +typedef void (*pqueue_print_entry_f)(void *a); + +/** The priority queue handle. */ +typedef struct pqueue_t +{ + size_t size; /**< number of elements in this queue plus 1 */ + size_t avail; /**< slots available in this queue */ + size_t step; /**< growth stepping setting */ + pqueue_cmp_pri_f cmppri; /**< callback to compare priorities */ + pqueue_get_pri_f getpri; /**< callback to get priority of a node */ + pqueue_get_pos_f getpos; /**< callback to get position of a node */ + pqueue_set_pos_f setpos; /**< callback to set position of a node */ + pqueue_eq_elem_f eqelem; /**< callback to compare elements */ + pqueue_print_entry_f prt; /**< callback to print elements */ + void **d; /**< The actual queue in binary heap form */ +} pqueue_t; + +/** + * @brief Allocate and initialize a priority queue. + * + * @param n the initial estimate of the number of queue items for which memory + * should be preallocated + * @param cmppri The callback function to run to compare two elements + * This callback should return 0 for 'lower' and non-zero + * for 'higher', or vice versa if reverse priority is desired + * @param getpri the callback function to run to set a score to an element + * @param getpos the callback function to get the current element's position + * @param setpos the callback function to set the current element's position + * @param eqelem the callback function to check equivalence of entries + * @param prt the callback function to print an element + * + * @return The handle or NULL for insufficent memory. + */ +pqueue_t * +pqueue_init(size_t n, + pqueue_cmp_pri_f cmppri, + pqueue_get_pri_f getpri, + pqueue_get_pos_f getpos, + pqueue_set_pos_f setpos, + pqueue_eq_elem_f eqelem, + pqueue_print_entry_f prt); + +/** + * free all memory used by the queue + * @param q the queue + */ +void pqueue_free(pqueue_t *q); + +/** + * return the size of the queue. + * @param q the queue + */ +size_t pqueue_size(pqueue_t *q); + +/** + * Insert an element into the queue. + * @param q the queue + * @param e the element + * @return 0 on success + */ +int pqueue_insert(pqueue_t *q, void *d); + +/** + * Move an existing entry to a different priority. + * @param q the queue + * @param new_pri the new priority + * @param d the entry + */ +void +pqueue_change_priority(pqueue_t *q, + pqueue_pri_t new_pri, + void *d); + +/** + * Pop the highest-ranking item from the queue. + * @param q the queue + * @return NULL on error, otherwise the entry + */ +void *pqueue_pop(pqueue_t *q); + +/** + * @brief Empty 'src' into 'dest'. + * + * As an optimization, this function might swap 'src' and 'dest'. + * + * @param dest The queue to fill up + * @param src The queue to empty + */ +void pqueue_empty_into(pqueue_t** dest, pqueue_t** src); + +/** + * Find the highest-ranking item with the same priority that matches the + * supplied entry. + * @param q the queue + * @param e the entry to compare against + * @return NULL if no matching event has been found, otherwise the entry + */ +void* pqueue_find_equal_same_priority(pqueue_t *q, void *e); + +/** + * Find the highest-ranking item with priority up to and including the given + * maximum priority that matches the supplied entry. + * @param q the queue + * @param e the entry to compare against + * @param max_priority the maximum priority to consider + * @return NULL if no matching event has been found, otherwise the entry + */ +void* pqueue_find_equal(pqueue_t *q, void *e, pqueue_pri_t max_priority); + +/** + * Remove an item from the queue. + * @param q the queue + * @param e the entry + * @return 0 on success + */ +int pqueue_remove(pqueue_t *q, void *e); + +/** + * Access highest-ranking item without removing it. + * @param q the queue + * @return NULL on error, otherwise the entry + */ +void *pqueue_peek(pqueue_t *q); + + +/** + * Print the contents of the queue. + * @param q The queue. + * @param print The callback function to print the entry or NULL to use the default. + */ +void pqueue_print(pqueue_t *q, pqueue_print_entry_f print); + +/** + * Dump the queue and it's internal structure. + * @internal + * debug function only + * @param q the queue + * @param the callback function to print the entry + */ +void +pqueue_dump(pqueue_t *q, + pqueue_print_entry_f print); + +/** + * Check that the all entries are in the right order, etc. + * @internal + * debug function only + * @param q the queue + */ +int pqueue_is_valid(pqueue_t *q); + +#endif /* PQUEUE_BASE_H */ +/** @} */ diff --git a/include/core/utils/pqueue_tag.h b/include/core/utils/pqueue_tag.h new file mode 100644 index 0000000000..ad4ac84d13 --- /dev/null +++ b/include/core/utils/pqueue_tag.h @@ -0,0 +1,178 @@ +/** + * @file tag_pqueue.h + * @author Byeonggil Jun + * @author Edward A. Lee + * @copyright (c) 2023, The University of California at Berkeley + * License in [BSD 2-clause](https://github.com/lf-lang/reactor-c/blob/main/LICENSE.md) + * @brief Priority queue that uses tags for sorting. + * + * This file extends the pqueue infrastructure with support for queues that are sorted + * by tag instead of by a long long. Elements in this queue are structs of type + * pqueue_tag_element_t or a derived struct, as explained below. What you put onto the + * queue is a pointer to a tagged_element_t struct. That pointer, when cast to pqueue_pri_t, + * an alias for long long, also serves as the "priority" for the queue. + */ + +#ifndef PQUEUE_TAG_H +#define PQUEUE_TAG_H + +#include "pqueue_base.h" +#include "tag.h" + +/** + * @brief The type for an element in a priority queue that is sorted by tag. + * + * In this design, a pointer to this struct is also a "priority" (it can be + * cast to pqueue_pri_t). The actual priority is the tag field of the struct, + * in that the queue is sorted from least tag to largest. + * + * If your struct is dynamically allocated using malloc or calloc, and you + * would like the memory freed when the queue is freed, then set the is_dynamic + * field of the element to a non-zero value. + * + * For a priority queue that contains only tags with no payload, you can + * avoid creating the element struct by using the functions + * pqueue_tag_insert_tag, pqueue_tag_insert_if_no_match, and pqueue_tag_pop_tag. + * + * To customize the element you put onto the queue, for example to carry + * a payload, you can create your own element struct type by simply declaring + * the first field to be a pqueue_tag_element_t. For example, if you want an + * element of the queue to include a pointer to your own payload, you can + * declare the following struct type: + *
+ *     typedef struct {
+ *         pqueue_tag_element_t base;
+ *         my_type* my_payload;
+ *     } my_element_type_t;
+ * 
+ * When inserting your struct into the queue, simply cast your pointer + * to (pqueue_tag_element_t*). When accessing your struct from the queue, + * simply cast the result to (my_element_type_t*); + */ +typedef struct { + tag_t tag; + size_t pos; // Needed by any pqueue element. + int is_dynamic; // Non-zero to free this struct when the queue is freed. +} pqueue_tag_element_t; + +/** + * @brief Type of a priority queue sorted by tags. + */ +typedef pqueue_t pqueue_tag_t; + +/** + * @brief Create a priority queue sorted by tags. + * + * The elements of the priority queue will be of type pqueue_tag_element_t. + * The caller should call pqueue_tag_free() when finished with the queue. + * @return A dynamically allocated priority queue or NULL if memory allocation fails. + */ +pqueue_tag_t* pqueue_tag_init(size_t initial_size); + +/** + * @brief Free all memory used by the queue including elements that are marked dynamic. + * + * @param q The queue. + */ +void pqueue_tag_free(pqueue_tag_t *q); + +/** + * @brief Return the size of the queue. + * + * @param q The queue. + */ +size_t pqueue_tag_size(pqueue_tag_t *q); + +/** + * @brief Insert an element into the queue. + * + * @param q The queue. + * @param e The element to insert. + * @return 0 on success + */ +int pqueue_tag_insert(pqueue_tag_t* q, pqueue_tag_element_t* d); + +/** + * @brief Insert a tag into the queue. + * + * This automatically creates a dynamically allocated element in the queue + * and ensures that if the element is still on the queue when pqueue_tag_free + * is called, then that memory will be freed. + * @param q The queue. + * @param t The tag to insert. + * @return 0 on success + */ +int pqueue_tag_insert_tag(pqueue_tag_t* q, tag_t t); + +/** + * @brief Insert a tag into the queue if the tag is not already in the queue. + * + * This automatically creates a dynamically allocated element in the queue + * and ensures that if the element is still on the queue when pqueue_tag_free + * is called, then that memory will be freed. + * @param q The queue. + * @param t The tag to insert. + * @return 0 on success, 1 otherwise. + */ +int pqueue_tag_insert_if_no_match(pqueue_tag_t* q, tag_t t); + +/** + * @brief Return the first item with the specified tag or NULL if there is none. + * @param q The queue. + * @param t The tag. + * @return An entry with the specified tag or NULL if there isn't one. + */ +pqueue_tag_element_t* pqueue_tag_find_with_tag(pqueue_tag_t *q, tag_t t); + +/** + * @brief Return highest-ranking item (the one with the least tag) without removing it. + * @param q The queue. + * @return NULL on if the queue is empty, otherwise the entry. + */ +pqueue_tag_element_t* pqueue_tag_peek(pqueue_tag_t* q); + +/** + * @brief Return the least tag in the queue or FOREVER if the queue is empty. + * @param q The queue. + * @return The least tag in the queue or FOREVER if the queue is empty. + */ +tag_t pqueue_tag_peek_tag(pqueue_tag_t* q); + +/** + * @brief Pop the least-tag element from the queue. + * + * If the entry was dynamically allocated, then it is now up to the caller + * to ensure that it is freed. It will not be freed by pqueue_tag_free. + * @param q The queue. + * @return NULL on error, otherwise the entry + */ +pqueue_tag_element_t* pqueue_tag_pop(pqueue_tag_t* q); + +/** + * @brief Pop the least-tag element from the queue and return its tag. + * + * If the queue is empty, return FOREVER_TAG. This function handles freeing + * the element struct if it was dynamically allocated. + * @param q The queue. + * @return NULL on error, otherwise the entry + */ +tag_t pqueue_tag_pop_tag(pqueue_tag_t* q); + +/** + * @brief Remove an item from the queue. + * + * @param q The queue. + * @param e The entry to remove. + */ +void pqueue_tag_remove(pqueue_tag_t* q, pqueue_tag_element_t* e); + +/** + * @brief Remove items from the queue with tags up to and including the specified tag. + * + * If the specified tag is FOREVER_TAG, then all items will be removed. + * @param q The queue. + * @param t The specified tag. + */ +void pqueue_tag_remove_up_to(pqueue_tag_t* q, tag_t t); + +#endif // PQUEUE_TAG_H diff --git a/include/core/utils/util.h b/include/core/utils/util.h index debab132f4..728880e0f0 100644 --- a/include/core/utils/util.h +++ b/include/core/utils/util.h @@ -35,6 +35,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include // Defines va_list #include +#include // Defines int64_t // To silence warnings about a function being a candidate for format checking // with gcc, add an attribute. @@ -50,10 +51,10 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Holds generic statistical data */ typedef struct lf_stat_ll { - long long average; - long long standard_deviation; - long long variance; - long long max; + int64_t average; + int64_t standard_deviation; + int64_t variance; + int64_t max; } lf_stat_ll; /** @@ -124,12 +125,9 @@ extern int _lf_my_fed_id; int lf_fed_id(void); /** - * Report an informational message on stdout with - * a newline appended at the end. - * If this execution is federated, then - * the message will be prefaced by "Federate n: ", - * where n is the federate ID. - * The arguments are just like printf(). + * Report an informational message on stdout with a newline appended at the end. + * If this execution is federated, then the message will be prefaced by identifying + * information for the federate. The arguments are just like printf(). */ void lf_print(const char* format, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); @@ -139,12 +137,9 @@ void lf_print(const char* format, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); void lf_vprint(const char* format, va_list args) ATTRIBUTE_FORMAT_PRINTF(1, 0); /** - * Report an log message on stdout with the prefix - * "LOG: " and a newline appended - * at the end. If this execution is federated, then - * the message will be prefaced by "Federate n: ", - * where n is the federate ID. - * The arguments are just like printf(). + * Report an log message on stdout with the prefix "LOG: " and a newline appended + * at the end. If this execution is federated, then the message will be prefaced by + * identifying information for the federate. The arguments are just like printf(). */ void lf_print_log(const char* format, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); @@ -176,12 +171,9 @@ void lf_vprint_log(const char* format, va_list args) ATTRIBUTE_FORMAT_PRINTF(1, } } while (0) /** - * Report an debug message on stdout with the prefix - * "DEBUG: " and a newline appended - * at the end. If this execution is federated, then - * the message will be prefaced by "Federate n: ", - * where n is the federate ID. - * The arguments are just like printf(). + * Report an debug message on stdout with the prefix "DEBUG: " and a newline appended + * at the end. If this execution is federated, then the message will be prefaced by + * identifying information for the federate. The arguments are just like printf(). */ void lf_print_debug(const char* format, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); @@ -248,9 +240,10 @@ void lf_vprint_warning(const char* format, va_list args) ATTRIBUTE_FORMAT_PRINTF void lf_print_error_and_exit(const char* format, ...) ATTRIBUTE_FORMAT_PRINTF(1, 2); /** - * A shorthand for checking if a condition is true and if not, print an error and exit. + * Report an error and exit just like lf_print_error_and_exit(), but + * also print the system error message associated with the error. */ -void lf_assert(bool condition, const char* format, ...) ATTRIBUTE_FORMAT_PRINTF(2, 3); +void lf_print_error_system_failure(const char* format, ...); /** * varargs alternative of "lf_print_error_and_exit" @@ -280,4 +273,42 @@ typedef void(print_message_function_t)(const char*, va_list); */ void lf_register_print_function(print_message_function_t* function, int log_level); +/** + * Assertion handling. LF_ASSERT can be used as a shorthand for verifying + * a condition and calling `lf_print_error_and_exit` if it is not true. + * The LF_ASSERT version requires that the condition evaluate to true + * (non-zero), whereas the LF_ASSERTN version requires that the condition + * evaluate to false (zero). + * These are optimized away if the NDEBUG flag is defined. + */ +#if defined(NDEBUG) +#define LF_ASSERT(condition, format, ...) (void)(condition) +#define LF_ASSERTN(condition, format, ...) (void)(condition) +#else +#define LF_ASSERT(condition, format, ...) \ + do { \ + if (!(condition)) { \ + lf_print_error_and_exit(format, ##__VA_ARGS__); \ + } \ + } while(0) +#define LF_ASSERTN(condition, format, ...) \ + do { \ + if (condition) { \ + lf_print_error_and_exit(format, ##__VA_ARGS__); \ + } \ + } while(0) +#endif // NDEBUG + +/** + * Checking mutex locking and unlocking. + * This is optimized away if the NDEBUG flag is defined. + */ +#define LF_MUTEX_INIT(mutex) LF_ASSERTN(lf_mutex_init(&mutex), "Mutex init failed.") + +#define LF_MUTEX_LOCK(mutex) LF_ASSERTN(lf_mutex_lock(&mutex), "Mutex lock failed.") + +#define LF_MUTEX_UNLOCK(mutex) LF_ASSERTN(lf_mutex_unlock(&mutex), "Mutex unlock failed.") + +#define LF_COND_INIT(cond, mutex) LF_ASSERTN(lf_cond_init(&cond, &mutex), "Condition variable init failed.") + #endif /* UTIL_H */ diff --git a/lingua-franca-ref.txt b/lingua-franca-ref.txt index 31439c5114..ab711f93f5 100644 --- a/lingua-franca-ref.txt +++ b/lingua-franca-ref.txt @@ -1,2 +1 @@ -stm32 - +stm32 \ No newline at end of file diff --git a/python/lib/pythontarget.c b/python/lib/pythontarget.c index 47baa685a1..551f9a71e8 100644 --- a/python/lib/pythontarget.c +++ b/python/lib/pythontarget.c @@ -280,7 +280,7 @@ PyObject* py_main(PyObject* self, PyObject* py_args) { // Store a reference to the top-level environment int num_environments = _lf_get_environments(&top_level_environment); - lf_assert(num_environments == 1, "Python target only supports programs with a single environment/enclave"); + LF_ASSERT(num_environments == 1, "Python target only supports programs with a single environment/enclave"); Py_BEGIN_ALLOW_THREADS lf_reactor_c_main(argc, argv); diff --git a/test/general/utils/pqueue_test.c b/test/general/utils/pqueue_test.c new file mode 100644 index 0000000000..e0f252c7d1 --- /dev/null +++ b/test/general/utils/pqueue_test.c @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include "pqueue_tag.h" +#include "tag.h" + +static void trivial(void) { + // Create an event queue. + pqueue_tag_t* q = pqueue_tag_init(1); + assert(q != NULL); + assert(pqueue_is_valid((pqueue_t*)q)); + pqueue_print((pqueue_t*)q, NULL); + pqueue_tag_free(q); +} + +static void insert_on_queue(pqueue_tag_t* q) { + tag_t t1 = {.time = USEC(3), .microstep = 0}; + tag_t t2 = {.time = USEC(2), .microstep = 1}; + tag_t t3 = {.time = USEC(2), .microstep = 0}; + tag_t t4 = {.time = USEC(1), .microstep = 2}; + assert(!pqueue_tag_insert_tag(q, t1)); + assert(!pqueue_tag_insert_tag(q, t2)); + assert(!pqueue_tag_insert_tag(q, t3)); + + assert(!pqueue_tag_insert_if_no_match(q, t4)); + assert(pqueue_tag_insert_if_no_match(q, t1)); + assert(pqueue_tag_insert_if_no_match(q, t4)); + printf("======== Contents of the queue:\n"); + pqueue_print((pqueue_t*)q, NULL); + assert(pqueue_tag_size(q) == 4); +} + +static void find_from_queue(pqueue_tag_t* q) { + tag_t t1 = {.time = USEC(3), .microstep = 0}; + tag_t t2 = {.time = USEC(2), .microstep = 1}; + tag_t t3 = {.time = USEC(2), .microstep = 0}; + tag_t t4 = {.time = USEC(1), .microstep = 2}; + tag_t t5 = {.time = USEC(0), .microstep = 0}; + tag_t t6 = {.time = USEC(3), .microstep = 2}; + assert(pqueue_tag_find_with_tag(q, t1) != NULL); + assert(pqueue_tag_find_with_tag(q, t2) != NULL); + assert(pqueue_tag_find_with_tag(q, t3) != NULL); + assert(pqueue_tag_find_with_tag(q, t4) != NULL); + assert(pqueue_tag_find_with_tag(q, t5) == NULL); + assert(pqueue_tag_find_with_tag(q, t6) == NULL); +} + +static void insert_if_no_match(pqueue_tag_t* q) { + int size = pqueue_tag_size(q); + tag_t t1 = {.time = USEC(3), .microstep = 0}; + tag_t t4 = {.time = USEC(1), .microstep = 2}; + // Return value is non-zero on failure to insert: + assert(pqueue_tag_insert_if_no_match(q, t1)); + assert(pqueue_tag_insert_if_no_match(q, t4)); + assert(size == pqueue_tag_size(q)); +} + +static void pop_from_queue(pqueue_tag_t* q) { + tag_t t1_back = pqueue_tag_pop_tag(q); + assert(t1_back.time == USEC(1)); + assert(t1_back.microstep == 2); + tag_t t2_back = pqueue_tag_pop_tag(q); + assert(t2_back.time == USEC(2)); + assert(t2_back.microstep == 0); + tag_t t3_back = pqueue_tag_pop_tag(q); + assert(t3_back.time == USEC(2)); + assert(t3_back.microstep == 1); + tag_t t4_back = pqueue_tag_pop_tag(q); + assert(t4_back.time == USEC(3)); + assert(t4_back.microstep == 0); +} + +static void pop_empty(pqueue_tag_t* q) { + assert(pqueue_tag_size(q) == 0); + assert(pqueue_tag_pop(q) == NULL); +} + +static void remove_from_queue(pqueue_tag_t* q, pqueue_tag_element_t* e1, pqueue_tag_element_t* e2) { + assert(pqueue_tag_insert(q, e1) == 0); + assert(pqueue_tag_insert(q, e2) == 0); + pqueue_tag_remove(q, e1); + assert(pqueue_tag_peek(q) == e2); + assert(pqueue_tag_size(q) == 1); +} + +int main(int argc, char *argv[]) { + trivial(); + // Create an event queue. + pqueue_tag_t* q = pqueue_tag_init(2); + + insert_on_queue(q); + find_from_queue(q); + insert_if_no_match(q); + pop_from_queue(q); + pop_empty(q); + + pqueue_tag_element_t e1 = {.tag = {.time = USEC(3), .microstep = 0}, .pos = 0, .is_dynamic = 0}; + pqueue_tag_element_t e2 = {.tag = {.time = USEC(2), .microstep = 0}, .pos = 0, .is_dynamic = 0}; + + remove_from_queue(q, &e1, &e2); + + pqueue_tag_free(q); +} diff --git a/test/src_gen_stub.c b/test/src_gen_stub.c index 6ac14c84ed..46f4510966 100644 --- a/test/src_gen_stub.c +++ b/test/src_gen_stub.c @@ -1,8 +1,22 @@ #include #include "tag.h" +#include "environment.h" + +/** + * This file enables unit tests to run without there having been an actual code generation + * from a Lingua Franca program. It defines (mostly empty) functions that would normally be + * code generated. Of course, this strategy will only work for tests that do not actually + * need functional versions of these functions. + */ + +environment_t _env; void _lf_initialize_trigger_objects() {} void terminate_execution() {} void _lf_set_default_command_line_options() {} void _lf_initialize_watchdog_mutexes() {} void logical_tag_complete(tag_t tag_to_send) {} +int _lf_get_environments(environment_t ** envs) { + *envs = &_env; + return 1; +} \ No newline at end of file diff --git a/util/tracing/Makefile b/util/tracing/Makefile index b20292d00a..15fd0c13e1 100644 --- a/util/tracing/Makefile +++ b/util/tracing/Makefile @@ -37,4 +37,4 @@ install: trace_to_csv trace_to_chrome trace_to_influxdb chmod +x $(BIN_INSTALL_PATH)/fedsd clean: - rm -f *.o + rm -f *.o trace_to_chrome trace_to_influxdb trace_to_csv diff --git a/util/tracing/trace_to_csv.c b/util/tracing/trace_to_csv.c index 9f237adb4c..d1002269b9 100644 --- a/util/tracing/trace_to_csv.c +++ b/util/tracing/trace_to_csv.c @@ -55,13 +55,13 @@ int table_size; * Print a usage message. */ void usage() { - printf("\nUsage: trace_to_csv [options] trace_file_root (without .lft extension)\n\n"); - /* No options yet: + printf("\nUsage: trace_to_csv [options] trace_file_root (with .lft extension)\n\n"); printf("\nOptions: \n\n"); - printf(" -f, --fast [true | false]\n"); - printf(" Whether to wait for physical time to match logical time.\n\n"); + printf(" -s, --start [time_spec] [units]\n"); + printf(" The target time to begin tracing.\n\n"); + printf(" -e, --end [time_spec] [units]\n"); + printf(" The target time to stop tracing.\n\n"); printf("\n\n"); - */ } /** @@ -100,7 +100,7 @@ instant_t latest_time = 0LL; * Read a trace in the trace_file and write it to the output_file as CSV. * @return The number of records read or 0 upon seeing an EOF. */ -size_t read_and_write_trace() { +size_t read_and_write_trace(instant_t trace_start_time, instant_t trace_end_time) { int trace_length = read_trace(); if (trace_length == 0) return 0; // Write each line. @@ -116,156 +116,161 @@ size_t read_and_write_trace() { if (trigger_name == NULL) { trigger_name = "NO TRIGGER"; } - fprintf(output_file, "%s, %s, %d, %d, %lld, %d, %lld, %s, %lld\n", - trace_event_names[trace[i].event_type], - reactor_name, - trace[i].src_id, - trace[i].dst_id, - trace[i].logical_time - start_time, - trace[i].microstep, - trace[i].physical_time - start_time, - trigger_name, - trace[i].extra_delay - ); - // Update summary statistics. - if (trace[i].physical_time > latest_time) { - latest_time = trace[i].physical_time; - } - if (object_instance >= 0 && summary_stats[NUM_EVENT_TYPES + object_instance] == NULL) { - summary_stats[NUM_EVENT_TYPES + object_instance] = (summary_stats_t*)calloc(1, sizeof(summary_stats_t)); - } - if (trigger_instance >= 0 && summary_stats[NUM_EVENT_TYPES + trigger_instance] == NULL) { - summary_stats[NUM_EVENT_TYPES + trigger_instance] = (summary_stats_t*)calloc(1, sizeof(summary_stats_t)); - } + if ((trace[i].logical_time - start_time) >= trace_start_time + && (trace[i].logical_time - start_time) < trace_end_time) { + fprintf(output_file, "%s, %s, %d, %d, %lld, %d, %lld, %s, %lld\n", + trace_event_names[trace[i].event_type], + reactor_name, + trace[i].src_id, + trace[i].dst_id, + trace[i].logical_time - start_time, + trace[i].microstep, + trace[i].physical_time - start_time, + trigger_name, + trace[i].extra_delay + ); + // Update summary statistics. + if (trace[i].physical_time > latest_time) { + latest_time = trace[i].physical_time; + } + if (object_instance >= 0 && summary_stats[NUM_EVENT_TYPES + object_instance] == NULL) { + summary_stats[NUM_EVENT_TYPES + object_instance] = (summary_stats_t*)calloc(1, sizeof(summary_stats_t)); + } + if (trigger_instance >= 0 && summary_stats[NUM_EVENT_TYPES + trigger_instance] == NULL) { + summary_stats[NUM_EVENT_TYPES + trigger_instance] = (summary_stats_t*)calloc(1, sizeof(summary_stats_t)); + } - summary_stats_t* stats = NULL; - interval_t exec_time; - reaction_stats_t* rstats; - int index; + summary_stats_t* stats = NULL; + interval_t exec_time; + reaction_stats_t* rstats; + int index; - // Count of event type. - if (summary_stats[trace[i].event_type] == NULL) { - summary_stats[trace[i].event_type] = (summary_stats_t*)calloc(1, sizeof(summary_stats_t)); - } - summary_stats[trace[i].event_type]->event_type = trace[i].event_type; - summary_stats[trace[i].event_type]->description = trace_event_names[trace[i].event_type]; - summary_stats[trace[i].event_type]->occurrences++; - - switch(trace[i].event_type) { - case reaction_starts: - case reaction_ends: - // This code relies on the mutual exclusion of reactions in a reactor - // and the ordering of reaction_starts and reaction_ends events. - if (trace[i].dst_id >= MAX_NUM_REACTIONS) { - fprintf(stderr, "WARNING: Too many reactions. Not all will be shown in summary file.\n"); - continue; - } - stats = summary_stats[NUM_EVENT_TYPES + object_instance]; - stats->description = reactor_name; - if (trace[i].dst_id >= stats->num_reactions_seen) { - stats->num_reactions_seen = trace[i].dst_id + 1; - } - rstats = &stats->reactions[trace[i].dst_id]; - if (trace[i].event_type == reaction_starts) { - rstats->latest_start_time = trace[i].physical_time; - } else { - rstats->occurrences++; - exec_time = trace[i].physical_time - rstats->latest_start_time; - rstats->latest_start_time = 0LL; - rstats->total_exec_time += exec_time; - if (exec_time > rstats->max_exec_time) { - rstats->max_exec_time = exec_time; + // Count of event type. + if (summary_stats[trace[i].event_type] == NULL) { + summary_stats[trace[i].event_type] = (summary_stats_t*)calloc(1, sizeof(summary_stats_t)); + } + summary_stats[trace[i].event_type]->event_type = trace[i].event_type; + summary_stats[trace[i].event_type]->description = trace_event_names[trace[i].event_type]; + summary_stats[trace[i].event_type]->occurrences++; + + switch(trace[i].event_type) { + case reaction_starts: + case reaction_ends: + // This code relies on the mutual exclusion of reactions in a reactor + // and the ordering of reaction_starts and reaction_ends events. + if (trace[i].dst_id >= MAX_NUM_REACTIONS) { + fprintf(stderr, "WARNING: Too many reactions. Not all will be shown in summary file.\n"); + continue; } - if (exec_time < rstats->min_exec_time || rstats->min_exec_time == 0LL) { - rstats->min_exec_time = exec_time; + stats = summary_stats[NUM_EVENT_TYPES + object_instance]; + stats->description = reactor_name; + if (trace[i].dst_id >= stats->num_reactions_seen) { + stats->num_reactions_seen = trace[i].dst_id + 1; } - } - break; - case schedule_called: - if (trigger_instance < 0) { - // No trigger. Do not report. - continue; - } - stats = summary_stats[NUM_EVENT_TYPES + trigger_instance]; - stats->description = trigger_name; - break; - case user_event: - // Although these are not exec times and not reactions, - // commandeer the first entry in the reactions array to track values. - stats = summary_stats[NUM_EVENT_TYPES + object_instance]; - stats->description = reactor_name; - break; - case user_value: - // Although these are not exec times and not reactions, - // commandeer the first entry in the reactions array to track values. - stats = summary_stats[NUM_EVENT_TYPES + object_instance]; - stats->description = reactor_name; - rstats = &stats->reactions[0]; - rstats->occurrences++; - // User values are stored in the "extra_delay" field, which is an interval_t. - interval_t value = trace[i].extra_delay; - rstats->total_exec_time += value; - if (value > rstats->max_exec_time) { - rstats->max_exec_time = value; - } - if (value < rstats->min_exec_time || rstats->min_exec_time == 0LL) { - rstats->min_exec_time = value; - } - break; - case worker_wait_starts: - case worker_wait_ends: - case scheduler_advancing_time_starts: - case scheduler_advancing_time_ends: - // Use the reactions array to store data. - // There will be two entries per worker, one for waits on the - // reaction queue and one for waits while advancing time. - index = trace[i].src_id * 2; - // Even numbered indices are used for waits on reaction queue. - // Odd numbered indices for waits for time advancement. - if (trace[i].event_type == scheduler_advancing_time_starts - || trace[i].event_type == scheduler_advancing_time_ends) { - index++; - } - if (object_table_size + index >= table_size) { - fprintf(stderr, "WARNING: Too many workers. Not all will be shown in summary file.\n"); - continue; - } - stats = summary_stats[NUM_EVENT_TYPES + object_table_size + index]; - if (stats == NULL) { - stats = (summary_stats_t*)calloc(1, sizeof(summary_stats_t)); - summary_stats[NUM_EVENT_TYPES + object_table_size + index] = stats; - } - // num_reactions_seen here will be used to store the number of - // entries in the reactions array, which is twice the number of workers. - if (index >= stats->num_reactions_seen) { - stats->num_reactions_seen = index; - } - rstats = &stats->reactions[index]; - if (trace[i].event_type == worker_wait_starts - || trace[i].event_type == scheduler_advancing_time_starts - ) { - rstats->latest_start_time = trace[i].physical_time; - } else { + rstats = &stats->reactions[trace[i].dst_id]; + if (trace[i].event_type == reaction_starts) { + rstats->latest_start_time = trace[i].physical_time; + } else { + rstats->occurrences++; + exec_time = trace[i].physical_time - rstats->latest_start_time; + rstats->latest_start_time = 0LL; + rstats->total_exec_time += exec_time; + if (exec_time > rstats->max_exec_time) { + rstats->max_exec_time = exec_time; + } + if (exec_time < rstats->min_exec_time || rstats->min_exec_time == 0LL) { + rstats->min_exec_time = exec_time; + } + } + break; + case schedule_called: + if (trigger_instance < 0) { + // No trigger. Do not report. + continue; + } + stats = summary_stats[NUM_EVENT_TYPES + trigger_instance]; + stats->description = trigger_name; + break; + case user_event: + // Although these are not exec times and not reactions, + // commandeer the first entry in the reactions array to track values. + stats = summary_stats[NUM_EVENT_TYPES + object_instance]; + stats->description = reactor_name; + break; + case user_value: + // Although these are not exec times and not reactions, + // commandeer the first entry in the reactions array to track values. + stats = summary_stats[NUM_EVENT_TYPES + object_instance]; + stats->description = reactor_name; + rstats = &stats->reactions[0]; rstats->occurrences++; - exec_time = trace[i].physical_time - rstats->latest_start_time; - rstats->latest_start_time = 0LL; - rstats->total_exec_time += exec_time; - if (exec_time > rstats->max_exec_time) { - rstats->max_exec_time = exec_time; + // User values are stored in the "extra_delay" field, which is an interval_t. + interval_t value = trace[i].extra_delay; + rstats->total_exec_time += value; + if (value > rstats->max_exec_time) { + rstats->max_exec_time = value; } - if (exec_time < rstats->min_exec_time || rstats->min_exec_time == 0LL) { - rstats->min_exec_time = exec_time; + if (value < rstats->min_exec_time || rstats->min_exec_time == 0LL) { + rstats->min_exec_time = value; } - } - break; - default: - // No special summary statistics for the rest. - break; - } - // Common stats across event types. - if (stats != NULL) { - stats->occurrences++; - stats->event_type = trace[i].event_type; + break; + case worker_wait_starts: + case worker_wait_ends: + case scheduler_advancing_time_starts: + case scheduler_advancing_time_ends: + // Use the reactions array to store data. + // There will be two entries per worker, one for waits on the + // reaction queue and one for waits while advancing time. + index = trace[i].src_id * 2; + // Even numbered indices are used for waits on reaction queue. + // Odd numbered indices for waits for time advancement. + if (trace[i].event_type == scheduler_advancing_time_starts + || trace[i].event_type == scheduler_advancing_time_ends) { + index++; + } + if (object_table_size + index >= table_size) { + fprintf(stderr, "WARNING: Too many workers. Not all will be shown in summary file.\n"); + continue; + } + stats = summary_stats[NUM_EVENT_TYPES + object_table_size + index]; + if (stats == NULL) { + stats = (summary_stats_t*)calloc(1, sizeof(summary_stats_t)); + summary_stats[NUM_EVENT_TYPES + object_table_size + index] = stats; + } + // num_reactions_seen here will be used to store the number of + // entries in the reactions array, which is twice the number of workers. + if (index >= stats->num_reactions_seen) { + stats->num_reactions_seen = index; + } + rstats = &stats->reactions[index]; + if (trace[i].event_type == worker_wait_starts + || trace[i].event_type == scheduler_advancing_time_starts + ) { + rstats->latest_start_time = trace[i].physical_time; + } else { + rstats->occurrences++; + exec_time = trace[i].physical_time - rstats->latest_start_time; + rstats->latest_start_time = 0LL; + rstats->total_exec_time += exec_time; + if (exec_time > rstats->max_exec_time) { + rstats->max_exec_time = exec_time; + } + if (exec_time < rstats->min_exec_time || rstats->min_exec_time == 0LL) { + rstats->min_exec_time = exec_time; + } + } + break; + default: + // No special summary statistics for the rest. + break; + } + // Common stats across event types. + if (stats != NULL) { + stats->occurrences++; + stats->event_type = trace[i].event_type; + } + } else { + // Out of scope. } } return trace_length; @@ -394,17 +399,97 @@ void write_summary_file() { } } -int main(int argc, char* argv[]) { - if (argc != 2) { - usage(); - exit(0); +instant_t string_to_instant(const char* time_spec, const char* units) { + instant_t duration; + #if defined(PLATFORM_ARDUINO) + duration = atol(time_spec); + #else + duration = atoll(time_spec); + #endif + // A parse error returns 0LL, so check to see whether that is what is meant. + if (duration == 0LL && strncmp(time_spec, "0", 1) != 0) { + // Parse error. + printf("Invalid time value: %s", time_spec); + return -1; + } + if (strncmp(units, "sec", 3) == 0) { + duration = SEC(duration); + } else if (strncmp(units, "msec", 4) == 0) { + duration = MSEC(duration); + } else if (strncmp(units, "usec", 4) == 0) { + duration = USEC(duration); + } else if (strncmp(units, "nsec", 4) == 0) { + duration = NSEC(duration); + } else if (strncmp(units, "min", 3) == 0) { + duration = MINUTE(duration); + } else if (strncmp(units, "hour", 4) == 0) { + duration = HOUR(duration); + } else if (strncmp(units, "day", 3) == 0) { + duration = DAY(duration); + } else if (strncmp(units, "week", 4) == 0) { + duration = WEEK(duration); + } else { + // Invalid units. + printf("Invalid time units: %s", units); + return -1; + } + return duration; +} + +int process_args(int argc, const char* argv[], char** root, instant_t* start_time, instant_t* end_time) { + int i = 1; + while (i < argc){ + const char* arg = argv[i++]; + if (strcmp(strrchr(arg, '\0') - 4, ".lft") == 0) { + // Open the trace file. + trace_file = open_file(arg, "r"); + if (trace_file == NULL) exit(1); + *root = root_name(arg); + } else if (strcmp(arg, "-s") == 0) { + // sscanf(argv[++i], "%ld", start_time); + if (argc < i + 2) { + printf("-s needs time value and unit."); + usage(); + return -1; + } + const char* time_spec = argv[i++]; + const char* units = argv[i++]; + *start_time = string_to_instant(time_spec, units); + if (*start_time == -1) { + usage(); + return -1; + } + } else if (strcmp(arg, "-e") == 0) { + if (argc < i + 2) { + printf("-e needs time value and unit."); + usage(); + return -1; + } + const char* time_spec = argv[i++]; + const char* units = argv[i++]; + *end_time = string_to_instant(time_spec, units); + if (*end_time == -1) { + usage(); + return -1; + } + } else { + usage(); + exit(0); + } + } + return 0; +} + +int main(int argc, const char* argv[]) { + instant_t trace_start_time = NEVER; + instant_t trace_end_time = FOREVER; + char* root; + + if (process_args(argc, argv, &root, &trace_start_time, &trace_end_time) != 0) { + return -1; } - // Open the trace file. - trace_file = open_file(argv[1], "r"); - if (trace_file == NULL) exit(1); // Construct the name of the csv output file and open it. - char* root = root_name(argv[1]); char csv_filename[strlen(root) + 5]; strcpy(csv_filename, root); strcat(csv_filename, ".csv"); @@ -427,10 +512,10 @@ int main(int argc, char* argv[]) { // Write a header line into the CSV file. fprintf(output_file, "Event, Reactor, Source, Destination, Elapsed Logical Time, Microstep, Elapsed Physical Time, Trigger, Extra Delay\n"); - while (read_and_write_trace() != 0) {}; + while (read_and_write_trace(trace_start_time, trace_end_time) != 0) {}; write_summary_file(); // File closing is handled by termination function. } -} +} \ No newline at end of file diff --git a/util/tracing/visualization/README.md b/util/tracing/visualization/README.md index c3ca6ad1ab..61eee36adb 100644 --- a/util/tracing/visualization/README.md +++ b/util/tracing/visualization/README.md @@ -38,4 +38,10 @@ It is also possible to operate on specific files. In such a case, run `fedsd` wi fedsd -r -f ``` +If the trace is too long, the target time interval can be specified. Running `fedsd` with `-s ` will show the messages with the tag later than or equal to the start time and with `-e ` will show the messages with the tag strictly earlier than the end_time. + +```bash +fedsd -s -e +``` + The output is an html file named `trace_svg.html` (in the current directory) that contains the sequence of interactions between the federates and the RTI. diff --git a/util/tracing/visualization/fedsd.py b/util/tracing/visualization/fedsd.py index d9d44253ba..b35e96dd7b 100644 --- a/util/tracing/visualization/fedsd.py +++ b/util/tracing/visualization/fedsd.py @@ -41,6 +41,7 @@ # communication rendering prune_event_name = { "Sending ACK": "ACK", + "Sending FAILED": "FAILED", "Sending TIMESTAMP": "TIMESTAMP", "Sending NET": "NET", "Sending LTC": "LTC", @@ -61,6 +62,7 @@ "Sending ADR_AD": "ADR_AD", "Sending ADR_QR": "ADR_QR", "Receiving ACK": "ACK", + "Receiving FAILED": "FAILED", "Receiving TIMESTAMP": "TIMESTAMP", "Receiving NET": "NET", "Receiving LTC": "LTC", @@ -100,11 +102,15 @@ help='RTI\'s lft trace file.') parser.add_argument('-f','--federates', nargs='+', help='List of the federates\' lft trace files.') +parser.add_argument('-s', '--start', type=str, nargs=2, + help='Start time of visualization in elapsed logical time. [time_value time_unit]') +parser.add_argument('-e', '--end', type=str, nargs=2, + help='End time of visualization in elapsed logical time. [time_value time_unit]') # Events matching at the sender and receiver ends depend on whether they are tagged # (the elapsed logical time and microstep have to be the same) or not. # Set of tagged events (messages) -non_tagged_messages = {'FED_ID', 'ACK', 'REJECT', 'ADR_RQ', 'ADR_AD', 'MSG', 'P2P_MSG'} +non_tagged_messages = {'FED_ID', 'ACK', 'RESIGN', 'FAILED', 'REJECT', 'ADR_RQ', 'ADR_AD', 'MSG', 'P2P_MSG'} ################################################################################ @@ -372,7 +378,7 @@ def command_is_in_path(command): return True return False -def convert_lft_file_to_csv(lft_file): +def convert_lft_file_to_csv(lft_file, start_time, end_time): ''' Call trace_to_csv command to convert the given binary lft trace file to csv format. @@ -382,7 +388,14 @@ def convert_lft_file_to_csv(lft_file): * File: the converted csv file, if the conversion succeeds, and empty string otherwise. * String: the error message, in case the conversion did not succeed, and empty string otherwise. ''' - convert_process = subprocess.run(['trace_to_csv', lft_file], stdout=subprocess.DEVNULL) + + subprocess_args = ['trace_to_csv', lft_file] + if (start_time != None): + subprocess_args.extend(['-s', start_time[0], start_time[1]]) + if (end_time != None): + subprocess_args.extend(['-e', end_time[0], end_time[1]]) + + convert_process = subprocess.run(subprocess_args, stdout=subprocess.DEVNULL) if (convert_process.returncode == 0): csv_file = os.path.splitext(lft_file)[0] + '.csv' @@ -390,7 +403,7 @@ def convert_lft_file_to_csv(lft_file): else: return '', str(convert_process.stderr) -def get_and_convert_lft_files(rti_lft_file, federates_lft_files): +def get_and_convert_lft_files(rti_lft_file, federates_lft_files, start_time, end_time): ''' Check if the passed arguments are valid, in the sense that the files do exist. If not arguments were passed, then look up the local lft files. @@ -428,7 +441,7 @@ def get_and_convert_lft_files(rti_lft_file, federates_lft_files): # Now, convert lft files to csv if (rti_lft_file): - rti_csv_file, error = convert_lft_file_to_csv(rti_lft_file) + rti_csv_file, error = convert_lft_file_to_csv(rti_lft_file, start_time, end_time) if (not rti_csv_file): print('Fedsf: Error converting the RTI\'s lft file: ' + error) else: @@ -436,7 +449,7 @@ def get_and_convert_lft_files(rti_lft_file, federates_lft_files): federates_csv_files = [] for file in federates_lft_files: - fed_csv_file, error = convert_lft_file_to_csv(file) + fed_csv_file, error = convert_lft_file_to_csv(file, start_time, end_time) if (not fed_csv_file): print('Fedsf: Error converting the federate lft file ' + file + ': ' + error) else: @@ -459,7 +472,7 @@ def get_and_convert_lft_files(rti_lft_file, federates_lft_files): # Look up the lft files and transform them to csv files - rti_csv_file, federates_csv_files = get_and_convert_lft_files(args.rti, args.federates) + rti_csv_file, federates_csv_files = get_and_convert_lft_files(args.rti, args.federates, args.start, args.end) # The RTI and each of the federates have a fixed x coordinate. They will be # saved in a dict @@ -659,7 +672,7 @@ def get_and_convert_lft_files(rti_lft_file, federates_lft_files): # FIXME: Using microseconds is hardwired here. physical_time = f'{int(row["physical_time"]/1000):,}' - if (row['event'] in {'FED_ID', 'ACK', 'REJECT', 'ADR_RQ', 'ADR_AD', 'MSG', 'P2P_MSG'}): + if (row['event'] in {'FED_ID', 'ACK', 'FAILED', 'REJECT', 'ADR_RQ', 'ADR_AD', 'MSG', 'P2P_MSG'}): label = row['event'] else: label = row['event'] + '(' + f'{int(row["logical_time"]):,}' + ', ' + str(row['microstep']) + ')' @@ -696,4 +709,4 @@ def get_and_convert_lft_files(rti_lft_file, federates_lft_files): # Write to a csv file, just to double check trace_df.to_csv('all.csv', index=True) - print('Fedsd: Successfully generated the sequence diagram in trace_svg.html.') + print('Fedsd: Successfully generated the sequence diagram in trace_svg.html.') \ No newline at end of file