Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve merge, fl_client, and fl_server logs #190

Merged
merged 1 commit into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ WORKDIR /home/ubuntu/software/jasminegraph
COPY ./GraphSAGE ./GraphSAGE

COPY ./build.sh ./build.sh
COPY ./run-docker.sh ./run-docker.sh
COPY ./CMakeLists.txt ./CMakeLists.txt
COPY ./src_python ./src_python
COPY ./main.h ./main.h
COPY ./main.cpp ./main.cpp
COPY ./src ./src

RUN sh build.sh

COPY ./run-docker.sh ./run-docker.sh
COPY ./src_python ./src_python
COPY ./conf ./conf

ENTRYPOINT ["/home/ubuntu/software/jasminegraph/run-docker.sh"]
Expand Down
4 changes: 4 additions & 0 deletions run-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,7 @@ if [ $MODE -eq 1 ]; then
else
./JasmineGraph "docker" $MODE $HOST_NAME $MASTERIP $SERVER_PORT $SERVER_DATA_PORT $ENABLE_NMON
fi

if [ "$TESTING" = "true" ]; then
chmod -R go+w /tmp/jasminegraph
fi
14 changes: 10 additions & 4 deletions src/server/JasmineGraphInstanceService.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4482,15 +4482,17 @@ void JasmineGraphInstanceService::initServer(string trainData){
std::vector<char *> vc;
std::transform(trainargs.begin(), trainargs.end(), std::back_inserter(vc), converter);

std::string log_file = "/tmp/jasminegraph/fl_server_" + partitionID + ".log";
std::string path = "cd " + utils.getJasmineGraphProperty("org.jasminegraph.fl.location") + " && ";
std::string command = path + "python3.8 fl_server.py "+ utils.getJasmineGraphProperty("org.jasminegraph.fl.weights") + " "
+ utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir")
+ " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir")+ " "+ graphID + " 0 "
+ utils.getJasmineGraphProperty("org.jasminegraph.fl_clients")
+ " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.epochs") +" localhost 5000 > "
+ "/home/ubuntu/software/jasminegraph/logs/server_logs-" + Utils::getCurrentTimestamp() + ".txt";
+ " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.epochs") +" localhost 5000"
+ " >>" + log_file + " 2>&1";
instance_logger.log("Executing : " + command, "info");
int exit_status = system(command.c_str());
chmod(log_file.c_str(), 0666);
if (exit_status == -1) {
instance_logger.error("Failed executing python server for query");
}
Expand Down Expand Up @@ -4570,16 +4572,18 @@ void JasmineGraphInstanceService::initClient(string trainData){
std::vector<char *> vc;
std::transform(trainargs.begin(), trainargs.end(), std::back_inserter(vc), converter);

std::string log_file = "/tmp/jasminegraph/fl_client_" + partitionID + ".log";
std::string path = "cd " + utils.getJasmineGraphProperty("org.jasminegraph.fl.location") + " && ";
std::string command = path + "python3.8 fl_client.py "+ utils.getJasmineGraphProperty("org.jasminegraph.fl.weights") + " "
+ utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir")
+ " " + utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir")+ " "+ graphID + " " + partitionID + " "
+ utils.getJasmineGraphProperty("org.jasminegraph.fl.epochs")
+ " localhost " + utils.getJasmineGraphProperty("org.jasminegraph.fl.org.port")
+ " > /home/ubuntu/software/jasminegraph/logs/client_logs_" + partitionID + "-" + Utils::getCurrentTimestamp() + ".txt";
+ " >>" + log_file + " 2>&1";

instance_logger.log("Executing : " + command, "info");
int exit_status = system(command.c_str());
chmod(log_file.c_str(), 0666);
if (exit_status == -1) {
instance_logger.error("Could not start python client");
}
Expand All @@ -4593,14 +4597,16 @@ void JasmineGraphInstanceService::mergeFiles(string trainData){
string partitionID = trainargs[2];
int exit_status;

std::string log_file = "/tmp/jasminegraph/merge_" + partitionID + ".log";
std::string path = "cd " + utils.getJasmineGraphProperty("org.jasminegraph.fl.location") + " && ";
std::string command = path + "python3.8 merge.py "+ utils.getJasmineGraphProperty("org.jasminegraph.server.instance.datafolder")+ " "
+ utils.getJasmineGraphProperty("org.jasminegraph.server.instance.trainedmodelfolder") + " "
+ utils.getJasmineGraphProperty("org.jasminegraph.fl.dataDir") + " " + graphID + " " + partitionID
+ " > /home/ubuntu/software/jasminegraph/logs/merge_logs" + partitionID + "-" + Utils::getCurrentTimestamp() + ".txt";
+ " >>" + log_file + " 2>&1";

instance_logger.log("Executing : " + command, "info");
exit_status = system(command.c_str());
chmod(log_file.c_str(), 0666);
if (exit_status == -1) {
instance_logger.error("Merge Command Execution Failed for Graph ID - Patition ID: " + graphID + " - " + partitionID + "; Error : " + strerror(errno));
}
Expand Down
14 changes: 12 additions & 2 deletions src/server/JasmineGraphServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <sys/stat.h>
#include "JasmineGraphServer.h"
#include "JasmineGraphInstance.h"
#include "../util/Utils.h"
Expand Down Expand Up @@ -329,19 +330,28 @@ void JasmineGraphServer::startRemoteWorkers(std::vector<int> workerPortsVector,
char *env_testing = getenv("TESTING");
bool is_testing = (env_testing != NULL && strcasecmp(env_testing, "true") == 0);
for (int i =0 ; i < workerPortsVector.size() ; i++) {
std::string worker_logdir = "/tmp/jasminegraph/worker_" + to_string(i);
if (access(worker_logdir.c_str(), F_OK) != 0) {
if (mkdir(worker_logdir.c_str(), 0777)) {
server_logger.error("Couldn't create worker log dir: " + worker_logdir);
}
} else {
chmod(worker_logdir.c_str(), 0777);
}
if (masterHost == host || host == "localhost") {
if (is_testing) {
serverStartScript = "docker run -p " +
std::to_string(workerPortsVector.at(i)) + ":" +
std::to_string(workerPortsVector.at(i)) + " -p " +
std::to_string(workerDataPortsVector.at(i)) + ":" +
std::to_string(workerDataPortsVector.at(i)) +
" -v " + worker_logdir + ":/tmp/jasminegraph" +
" -e WORKER_ID=" + to_string(i) +
" jasminegraph:test --MODE 2 --HOST_NAME " + host +
" --MASTERIP " + masterHost + " --SERVER_PORT " +
std::to_string(workerPortsVector.at(i)) + " --SERVER_DATA_PORT " +
std::to_string(workerDataPortsVector.at(i)) + " --ENABLE_NMON " + enableNmon +
" >/tmp/worker_logs/worker_" + to_string(i) + ".log 2>&1";
" >" + worker_logdir + "/worker.log 2>&1";
} else {
serverStartScript = "docker run -v " + instanceDataFolder + ":" + instanceDataFolder +
" -v " + aggregateDataFolder + ":" + aggregateDataFolder +
Expand Down Expand Up @@ -370,7 +380,7 @@ void JasmineGraphServer::startRemoteWorkers(std::vector<int> workerPortsVector,
" --MASTERIP " + masterHost + " --SERVER_PORT " +
std::to_string(workerPortsVector.at(i)) + " --SERVER_DATA_PORT " +
std::to_string(workerDataPortsVector.at(i)) + " --ENABLE_NMON " + enableNmon +
" >/tmp/worker_logs/worker_" + to_string(i) + ".log 2>&1";
" >" + worker_logdir + "/worker.log 2>&1";
} else {
serverStartScript = "docker -H ssh://" + host + " run -v " + instanceDataFolder + ":" + instanceDataFolder +
" -v " + aggregateDataFolder + ":" + aggregateDataFolder +
Expand Down
47 changes: 37 additions & 10 deletions test-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@ mkdir "$LOG_DIR"
BUILD_LOG="${LOG_DIR}/build.log"
RUN_LOG="${LOG_DIR}/run_master.log"
TEST_LOG="${LOG_DIR}/test.log"
WORKER_LOG_DIR="/tmp/jasminegraph"
rm -rf "${WORKER_LOG_DIR}"
mkdir -p "${WORKER_LOG_DIR}"

stop_and_remove_containers() {
if [ "$(docker ps -q)" ]; then
if [ "$(docker ps -a -q)" ]; then
docker ps -a -q | xargs docker rm -f &>/dev/null
else
echo "No containers to stop and remove."
fi
docker run -v '/tmp/jasminegraph:/tmp/jasminegraph' --entrypoint /bin/bash jasminegraph:test -c 'rm -rf /tmp/jasminegraph/*' || echo 'Not removing existing tmp logs'
if [ "$(docker ps -a -q)" ]; then
docker ps -a -q | xargs docker rm -f &>/dev/null
fi
}

build_and_run_docker() {
Expand All @@ -39,13 +46,12 @@ build_and_run_docker() {
rm -rf "${TEST_ROOT}/env"
exit "$build_status"
fi
docker compose -f "${TEST_ROOT}/docker-compose.yml" up |& tee "$RUN_LOG" &>/dev/null &
docker compose -f "${TEST_ROOT}/docker-compose.yml" up >"$RUN_LOG" 2>&1 &
}

cd "$TEST_ROOT"
rm -rf env
cp -r env_init env
mkdir -p env/logs
cd "$PROJECT_ROOT"
build_and_run_docker

Expand Down Expand Up @@ -77,23 +83,44 @@ if [ "$exit_code" == '124' ]; then
fi

cd "$TEST_ROOT"
for f in env/logs/*; do
fname="$(basename ${f})"
cp "$f" "${LOG_DIR}/run_${fname}"
for d in "${WORKER_LOG_DIR}"/worker_*; do
echo
worker_name="$(basename ${d})"
cp -r "$d" "${LOG_DIR}/${worker_name}"
done

cd "$LOG_DIR"
if [ "$exit_code" != '0' ]; then
echo
echo -e '\e[33;1mMaster log:\e[0m'
cat "$RUN_LOG"

for f in run_worker_*; do
for d in worker_*; do
cd "${LOG_DIR}/${d}"
echo
echo -e '\e[33;1m'"${f:4:-4}"' log:\e[0m'
cat "$f"
echo -e '\e[33;1m'"${d}"' log:\e[0m'
cat worker.log

for f in merge_*.log; do
echo
echo -e '\e[33;1m'"${d} ${f::-4}"' log:\e[0m'
cat "$f"
done

for f in fl_client_*.log; do
echo
echo -e '\e[33;1m'"${d} ${f::-4}"' log:\e[0m'
cat "$f"
done

for f in fl_server_*.log; do
echo
echo -e '\e[33;1m'"${d} ${f::-4}"' log:\e[0m'
cat "$f"
done
done
fi

rm -rf "${TEST_ROOT}/env"
stop_and_remove_containers
rm -rf "${TEST_ROOT}/env" "${WORKER_LOG_DIR}"
exit "$exit_code"
2 changes: 1 addition & 1 deletion tests/integration/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
- './env/databases/metadb:/home/ubuntu/software/jasminegraph/metadb'
- './env/databases/performancedb:/home/ubuntu/software/jasminegraph/performancedb'
- './env/data:/var/tmp/data'
- './env/logs:/tmp/worker_logs'
- '/tmp/jasminegraph:/tmp/jasminegraph'
environment:
- TESTING=true
networks:
Expand Down