diff --git a/Dockerfile b/Dockerfile index 0e7550e89..73d6ca71d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,14 +7,15 @@ WORKDIR /home/ubuntu/software/jasminegraph COPY ./GraphSAGE ./GraphSAGE COPY ./build.sh ./build.sh -COPY ./run-docker.sh ./run-docker.sh COPY ./CMakeLists.txt ./CMakeLists.txt -COPY ./src_python ./src_python COPY ./main.h ./main.h COPY ./main.cpp ./main.cpp COPY ./src ./src RUN sh build.sh + +COPY ./run-docker.sh ./run-docker.sh +COPY ./src_python ./src_python COPY ./conf ./conf ENTRYPOINT ["/home/ubuntu/software/jasminegraph/run-docker.sh"] diff --git a/run-docker.sh b/run-docker.sh index 5703b7ebf..a44c03e1d 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -65,3 +65,7 @@ if [ $MODE -eq 1 ]; then else ./JasmineGraph "docker" $MODE $HOST_NAME $MASTERIP $SERVER_PORT $SERVER_DATA_PORT $ENABLE_NMON fi + +if [ "$TESTING" = "true" ]; then + chmod -R go+w /tmp/jasminegraph +fi diff --git a/src/server/JasmineGraphInstanceService.cpp b/src/server/JasmineGraphInstanceService.cpp index acae39c2c..73001a738 100644 --- a/src/server/JasmineGraphInstanceService.cpp +++ b/src/server/JasmineGraphInstanceService.cpp @@ -4482,15 +4482,17 @@ void JasmineGraphInstanceService::initServer(string trainData){ std::vector vc; std::transform(trainargs.begin(), trainargs.end(), std::back_inserter(vc), converter); + std::string log_file = "/tmp/jasminegraph/fl_server_" + partitionID + ".log"; std::string path = "cd " + utils.getJasmineGraphProperty("org.jasminegraph.fl.location") + " && "; std::string command = path + "python3.8 fl_server.py "+ utils.getJasmineGraphProperty("org.jasminegraph.fl.weights") + " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir") + " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir")+ " "+ graphID + " 0 " + utils.getJasmineGraphProperty("org.jasminegraph.fl_clients") - + " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.epochs") +" localhost 5000 > " - + "/home/ubuntu/software/jasminegraph/logs/server_logs-" + Utils::getCurrentTimestamp() + ".txt"; + + " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.epochs") +" localhost 5000" + + " >>" + log_file + " 2>&1"; instance_logger.log("Executing : " + command, "info"); int exit_status = system(command.c_str()); + chmod(log_file.c_str(), 0666); if (exit_status == -1) { instance_logger.error("Failed executing python server for query"); } @@ -4570,16 +4572,18 @@ void JasmineGraphInstanceService::initClient(string trainData){ std::vector vc; std::transform(trainargs.begin(), trainargs.end(), std::back_inserter(vc), converter); + std::string log_file = "/tmp/jasminegraph/fl_client_" + partitionID + ".log"; std::string path = "cd " + utils.getJasmineGraphProperty("org.jasminegraph.fl.location") + " && "; std::string command = path + "python3.8 fl_client.py "+ utils.getJasmineGraphProperty("org.jasminegraph.fl.weights") + " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir") + " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir")+ " "+ graphID + " " + partitionID + " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.epochs") + " localhost " + utils.getJasmineGraphProperty("org.jasminegraph.fl.org.port") - + " > /home/ubuntu/software/jasminegraph/logs/client_logs_" + partitionID + "-" + Utils::getCurrentTimestamp() + ".txt"; + + " >>" + log_file + " 2>&1"; instance_logger.log("Executing : " + command, "info"); int exit_status = system(command.c_str()); + chmod(log_file.c_str(), 0666); if (exit_status == -1) { instance_logger.error("Could not start python client"); } @@ -4593,14 +4597,16 @@ void JasmineGraphInstanceService::mergeFiles(string trainData){ string partitionID = trainargs[2]; int exit_status; + std::string log_file = "/tmp/jasminegraph/merge_" + partitionID + ".log"; std::string path = "cd " + utils.getJasmineGraphProperty("org.jasminegraph.fl.location") + " && "; std::string command = path + "python3.8 merge.py "+ utils.getJasmineGraphProperty("org.jasminegraph.server.instance.datafolder")+ " " + utils.getJasmineGraphProperty("org.jasminegraph.server.instance.trainedmodelfolder") + " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir") + " " + graphID + " " + partitionID - + " > /home/ubuntu/software/jasminegraph/logs/merge_logs" + partitionID + "-" + Utils::getCurrentTimestamp() + ".txt"; + + " >>" + log_file + " 2>&1"; instance_logger.log("Executing : " + command, "info"); exit_status = system(command.c_str()); + chmod(log_file.c_str(), 0666); if (exit_status == -1) { instance_logger.error("Merge Command Execution Failed for Graph ID - Patition ID: " + graphID + " - " + partitionID + "; Error : " + strerror(errno)); } diff --git a/src/server/JasmineGraphServer.cpp b/src/server/JasmineGraphServer.cpp index 047bf7b89..ec23c4169 100644 --- a/src/server/JasmineGraphServer.cpp +++ b/src/server/JasmineGraphServer.cpp @@ -16,6 +16,7 @@ limitations under the License. #include #include #include +#include #include "JasmineGraphServer.h" #include "JasmineGraphInstance.h" #include "../util/Utils.h" @@ -329,6 +330,14 @@ void JasmineGraphServer::startRemoteWorkers(std::vector workerPortsVector, char *env_testing = getenv("TESTING"); bool is_testing = (env_testing != NULL && strcasecmp(env_testing, "true") == 0); for (int i =0 ; i < workerPortsVector.size() ; i++) { + std::string worker_logdir = "/tmp/jasminegraph/worker_" + to_string(i); + if (access(worker_logdir.c_str(), F_OK) != 0) { + if (mkdir(worker_logdir.c_str(), 0777)) { + server_logger.error("Couldn't create worker log dir: " + worker_logdir); + } + } else { + chmod(worker_logdir.c_str(), 0777); + } if (masterHost == host || host == "localhost") { if (is_testing) { serverStartScript = "docker run -p " + @@ -336,12 +345,13 @@ void JasmineGraphServer::startRemoteWorkers(std::vector workerPortsVector, std::to_string(workerPortsVector.at(i)) + " -p " + std::to_string(workerDataPortsVector.at(i)) + ":" + std::to_string(workerDataPortsVector.at(i)) + + " -v " + worker_logdir + ":/tmp/jasminegraph" + " -e WORKER_ID=" + to_string(i) + " jasminegraph:test --MODE 2 --HOST_NAME " + host + " --MASTERIP " + masterHost + " --SERVER_PORT " + std::to_string(workerPortsVector.at(i)) + " --SERVER_DATA_PORT " + std::to_string(workerDataPortsVector.at(i)) + " --ENABLE_NMON " + enableNmon + - " >/tmp/worker_logs/worker_" + to_string(i) + ".log 2>&1"; + " >" + worker_logdir + "/worker.log 2>&1"; } else { serverStartScript = "docker run -v " + instanceDataFolder + ":" + instanceDataFolder + " -v " + aggregateDataFolder + ":" + aggregateDataFolder + @@ -370,7 +380,7 @@ void JasmineGraphServer::startRemoteWorkers(std::vector workerPortsVector, " --MASTERIP " + masterHost + " --SERVER_PORT " + std::to_string(workerPortsVector.at(i)) + " --SERVER_DATA_PORT " + std::to_string(workerDataPortsVector.at(i)) + " --ENABLE_NMON " + enableNmon + - " >/tmp/worker_logs/worker_" + to_string(i) + ".log 2>&1"; + " >" + worker_logdir + "/worker.log 2>&1"; } else { serverStartScript = "docker -H ssh://" + host + " run -v " + instanceDataFolder + ":" + instanceDataFolder + " -v " + aggregateDataFolder + ":" + aggregateDataFolder + diff --git a/test-docker.sh b/test-docker.sh index b9eaebd12..dc6278a39 100755 --- a/test-docker.sh +++ b/test-docker.sh @@ -18,13 +18,20 @@ mkdir "$LOG_DIR" BUILD_LOG="${LOG_DIR}/build.log" RUN_LOG="${LOG_DIR}/run_master.log" TEST_LOG="${LOG_DIR}/test.log" +WORKER_LOG_DIR="/tmp/jasminegraph" +rm -rf "${WORKER_LOG_DIR}" +mkdir -p "${WORKER_LOG_DIR}" stop_and_remove_containers() { - if [ "$(docker ps -q)" ]; then + if [ "$(docker ps -a -q)" ]; then docker ps -a -q | xargs docker rm -f &>/dev/null else echo "No containers to stop and remove." fi + docker run -v '/tmp/jasminegraph:/tmp/jasminegraph' --entrypoint /bin/bash jasminegraph:test -c 'rm -rf /tmp/jasminegraph/*' || echo 'Not removing existing tmp logs' + if [ "$(docker ps -a -q)" ]; then + docker ps -a -q | xargs docker rm -f &>/dev/null + fi } build_and_run_docker() { @@ -39,13 +46,12 @@ build_and_run_docker() { rm -rf "${TEST_ROOT}/env" exit "$build_status" fi - docker compose -f "${TEST_ROOT}/docker-compose.yml" up |& tee "$RUN_LOG" &>/dev/null & + docker compose -f "${TEST_ROOT}/docker-compose.yml" up >"$RUN_LOG" 2>&1 & } cd "$TEST_ROOT" rm -rf env cp -r env_init env -mkdir -p env/logs cd "$PROJECT_ROOT" build_and_run_docker @@ -77,23 +83,44 @@ if [ "$exit_code" == '124' ]; then fi cd "$TEST_ROOT" -for f in env/logs/*; do - fname="$(basename ${f})" - cp "$f" "${LOG_DIR}/run_${fname}" +for d in "${WORKER_LOG_DIR}"/worker_*; do + echo + worker_name="$(basename ${d})" + cp -r "$d" "${LOG_DIR}/${worker_name}" done + cd "$LOG_DIR" if [ "$exit_code" != '0' ]; then echo echo -e '\e[33;1mMaster log:\e[0m' cat "$RUN_LOG" - for f in run_worker_*; do + for d in worker_*; do + cd "${LOG_DIR}/${d}" echo - echo -e '\e[33;1m'"${f:4:-4}"' log:\e[0m' - cat "$f" + echo -e '\e[33;1m'"${d}"' log:\e[0m' + cat worker.log + + for f in merge_*.log; do + echo + echo -e '\e[33;1m'"${d} ${f::-4}"' log:\e[0m' + cat "$f" + done + + for f in fl_client_*.log; do + echo + echo -e '\e[33;1m'"${d} ${f::-4}"' log:\e[0m' + cat "$f" + done + + for f in fl_server_*.log; do + echo + echo -e '\e[33;1m'"${d} ${f::-4}"' log:\e[0m' + cat "$f" + done done fi -rm -rf "${TEST_ROOT}/env" stop_and_remove_containers +rm -rf "${TEST_ROOT}/env" "${WORKER_LOG_DIR}" exit "$exit_code" diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml index 5f132eb42..7523ef618 100644 --- a/tests/integration/docker-compose.yml +++ b/tests/integration/docker-compose.yml @@ -10,7 +10,7 @@ services: - './env/databases/metadb:/home/ubuntu/software/jasminegraph/metadb' - './env/databases/performancedb:/home/ubuntu/software/jasminegraph/performancedb' - './env/data:/var/tmp/data' - - './env/logs:/tmp/worker_logs' + - '/tmp/jasminegraph:/tmp/jasminegraph' environment: - TESTING=true networks: