diff --git a/admin/templates/configuration/etc/log4cxx.index.properties b/admin/templates/configuration/etc/log4cxx.index.properties new file mode 100644 index 0000000000..b34769a14e --- /dev/null +++ b/admin/templates/configuration/etc/log4cxx.index.properties @@ -0,0 +1,18 @@ +# +# Configuration file for log4cxx +# can be used for unit test +# by launching next command before unit tests: +# export LSST_LOG_CONFIG=$HOME/.lsst/log4cxx.unittest.properties +# + +log4j.rootLogger=INFO, CONSOLE +#log4j.rootLogger=DEBUG, CONSOLE +#log4j.rootLogger=WARN, CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +#log4j.appender.CONSOLE.layout.ConversionPattern=[%d{yyyy-MM-ddTHH:mm:ss.SSSZ}] [%t] %-5p %c{2} (%F:%L) - %m%n +log4j.appender.CONSOLE.layout.ConversionPattern=[%d{ddTHH:mm:ss.SSSZ}] [%t] %-5p %c{2} (%F:%L) - %m%n + +# Tune log at the module level +#log4j.logger.lsst.qserv.util=DEBUG diff --git a/admin/templates/configuration/etc/log4cxx.index_master.properties b/admin/templates/configuration/etc/log4cxx.index_master.properties new file mode 100644 index 0000000000..0a3b0c6da5 --- /dev/null +++ b/admin/templates/configuration/etc/log4cxx.index_master.properties @@ -0,0 +1,18 @@ +# +# Configuration file for log4cxx +# can be used for unit test +# by launching next command before unit tests: +# export LSST_LOG_CONFIG=$HOME/.lsst/log4cxx.unittest.properties +# + +#log4j.rootLogger=INFO, CONSOLE +log4j.rootLogger=DEBUG, CONSOLE +#log4j.rootLogger=WARN, CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +#log4j.appender.CONSOLE.layout.ConversionPattern=[%d{yyyy-MM-ddTHH:mm:ss.SSSZ}] [%t] %-5p %c{2} (%F:%L) - %m%n +log4j.appender.CONSOLE.layout.ConversionPattern=[%d{ddTHH:mm:ss.SSSZ}] [%t] %-5p %c{2} (%F:%L) - %m%n + +# Tune log at the module level +#log4j.logger.lsst.qserv.util=DEBUG diff --git a/admin/tools/docker/index/container/buildContainers.README b/admin/tools/docker/index/container/buildContainers.README new file mode 100644 index 0000000000..8c412da52c --- /dev/null +++ b/admin/tools/docker/index/container/buildContainers.README @@ -0,0 +1,22 @@ +Invoking the following command from the qserv directory should build and push the containers and is +useful for breaking the build into smaller commands when there are problems. + +It helps to do a "rm -rf bin share build" before running as docker copies everything in +the qserv directory and includes it in the base container. This saves a couple of GB +in both the initial copy and pushing the containers. + + +docker build -f admin/tools/docker/index/container/dev/Dockerfile -t qserv/indexbase:dev . && \ +cd admin/tools/docker/index/container/dev/worker/ && docker build -t qserv/indexworker:dev . && \ +cd ../master/ && docker build -t qserv/indexmaster:dev . && \ +cd ../clientNum/ && docker build -t qserv/indexclientnum:dev . && \ +cd ../../../../../../../../qserv +docker push qserv/indexmaster:dev && docker push qserv/indexworker:dev && docker push qserv/indexclientnum:dev + + +Useful kubernetes commands: +kubectl apply -f admin/tools/docker/index/index-k8-m.yaml +kubectl delete -f admin/tools/docker/index/index-k8-m.yaml +kubectl get pods +kubectl logs -f imaster-sts-0 | grep -i keycount +kubectl logs -f iclientnum2-sts-0 | egrep "DONE|INSERT|LOOK" diff --git a/admin/tools/docker/index/container/buildContainers.bash b/admin/tools/docker/index/container/buildContainers.bash new file mode 100644 index 0000000000..bc47c659b1 --- /dev/null +++ b/admin/tools/docker/index/container/buildContainers.bash @@ -0,0 +1,24 @@ +#! /bin/bash + +set -e + +# qserv/admin/tools/docker/loader/container/buildContainers.bash +# cd back to base qserv directory as the Dockerfile COPY needs the entire project +# in the docker context. +cd ../../../../../../qserv +docker build -f admin/tools/docker/index/container/dev/Dockerfile -t qserv/indexbase:dev . + +# go to individual directories to minimize the size of docker's context copy +# worker +cd admin/tools/docker/index/container/dev/worker/ && docker build -t qserv/indexworker:dev . +#docker build -f admin/tools/docker/index/container/dev/worker/Dockerfile -t qserv/indexworker:dev . + +# master +cd ../master/ && docker build -t qserv/indexmaster:dev . +#docker build -f admin/tools/docker/index/container/dev/master/Dockerfile -t qserv/indexmaster:dev . + +# clientNum +cd ../clientNum/ && docker build -t qserv/indexclientnum:dev . +#docker build -f admin/tools/docker/index/container/dev/clientNum/Dockerfile -t qserv/indexclientnum:dev . + + diff --git a/admin/tools/docker/index/container/dev/Dockerfile b/admin/tools/docker/index/container/dev/Dockerfile new file mode 100644 index 0000000000..a5fd8583d3 --- /dev/null +++ b/admin/tools/docker/index/container/dev/Dockerfile @@ -0,0 +1,26 @@ +# docker build -f admin/tools/docker/index/container/dev/Dockerfile -t qserv/indexbase:dev . +# +# Using the development toolchain + +FROM qserv/qserv:dev + +USER 0 + +#RUN mv /usr/bin/sh /usr/bin/sh.old && ln -s /usr/bin/bash /usr/bin/sh +RUN yum update --assumeyes && yum install --assumeyes bind-utils gdb screen + +USER 1000 + +RUN mkdir /home/qserv/dev/ && \ + chown -R qserv:qserv /home/qserv + +COPY --chown=qserv:qserv . /home/qserv/dev/qserv + +RUN bash -lc "rm -rf /home/qserv/dev/qserv/build /home/qserv/dev/qserv/share /home/qserv/dev/qserv/bin && \ + cd /qserv/stack/ && source ./loadLSST.bash && \ + cd /home/qserv/dev/qserv && setup -r . -t qserv-dev && \ + printenv && \ + scons -j10 install && \ + mkdir -p /home/qserv/run && \ + qserv-configure.py --all -R /home/qserv/run" + diff --git a/admin/tools/docker/index/container/dev/clientNum/Dockerfile b/admin/tools/docker/index/container/dev/clientNum/Dockerfile new file mode 100644 index 0000000000..b4618d743e --- /dev/null +++ b/admin/tools/docker/index/container/dev/clientNum/Dockerfile @@ -0,0 +1,13 @@ +# +# +# cd ~/work/qserv/admin/tools/docker/index/container/dev/clientNum +# docker build -t qserv/indexclientnum:dev . +FROM qserv/indexbase:dev + +USER 0 + +RUN yum update --assumeyes && yum install --assumeyes bind-utils + +USER 1000 + +ENTRYPOINT ["/home/qserv/dev/qserv/admin/tools/docker/index/container/dev/clientNum/appClientNum.bash"] diff --git a/admin/tools/docker/index/container/dev/clientNum/appClientNum.bash b/admin/tools/docker/index/container/dev/clientNum/appClientNum.bash new file mode 100755 index 0000000000..5553004e5f --- /dev/null +++ b/admin/tools/docker/index/container/dev/clientNum/appClientNum.bash @@ -0,0 +1,23 @@ +#! /bin/bash -l +# admin/tools/docker/loader/container/dev/clientNum/appClientNum.bash + +_term() { + echo "Caught SIGTERM signal!" + kill -TERM "$child" 2>/dev/null +} + +trap _term SIGTERM +trap _term SIGKILL + +source /qserv/stack/loadLSST.bash +cd /home/qserv/dev/qserv +setup -r . -t qserv-dev + +export LSST_LOG_CONFIG=/home/qserv/dev/qserv/admin/templates/configuration/etc/log4cxx.index.properties + +echo appClientNum $1 $2 $3 + +/home/qserv/dev/qserv/build/loader/appClientNum $1 $2 /home/qserv/dev/qserv/core/modules/loader/config/$3 + +child=$! +wait "$child" diff --git a/admin/tools/docker/index/container/dev/clientNum/appClientNumScreen.bash b/admin/tools/docker/index/container/dev/clientNum/appClientNumScreen.bash new file mode 100755 index 0000000000..734bb3e65d --- /dev/null +++ b/admin/tools/docker/index/container/dev/clientNum/appClientNumScreen.bash @@ -0,0 +1,9 @@ +#! /bin/bash -l +# admin/tools/docker/loader/container/dev/clientNum/appClientNumScreen.bash + +echo appClientScreen $1 $2 $3 + +screen -dm /home/qserv/dev/qserv/admin/tools/docker/index/container/dev/clientNum/appClientNum $1 $2 $3 + + +tail -f /dev/null diff --git a/admin/tools/docker/index/container/dev/master/Dockerfile b/admin/tools/docker/index/container/dev/master/Dockerfile new file mode 100644 index 0000000000..f048cda80e --- /dev/null +++ b/admin/tools/docker/index/container/dev/master/Dockerfile @@ -0,0 +1,13 @@ +# Run the following build command from the qserv base directory (could be ~/work/qserv or ~/development/qserv) +# The COPY command can only access files below $PWD in the file tree. +# cd ~/work/qserv/admin/tools/docker/index/container/dev/master +# docker build -t qserv/indexmaster:dev . +FROM qserv/indexbase:dev + +USER 0 + +RUN yum update --assumeyes && yum install --assumeyes bind-utils + +USER 1000 + +ENTRYPOINT ["/home/qserv/dev/qserv/admin/tools/docker/index/container/dev/master/appMaster.bash"] diff --git a/admin/tools/docker/index/container/dev/master/appMaster.bash b/admin/tools/docker/index/container/dev/master/appMaster.bash new file mode 100755 index 0000000000..3054ad04a0 --- /dev/null +++ b/admin/tools/docker/index/container/dev/master/appMaster.bash @@ -0,0 +1,22 @@ +#! /bin/bash -l +# admin/tools/docker/loader/container/dev/master/appMaster.bash + +_term() { + echo "Caught SIGTERM signal!" + kill -TERM "$child" 2>/dev/null +} + +trap _term SIGTERM +trap _term SIGKILL + +source /qserv/stack/loadLSST.bash +cd /home/qserv/dev/qserv +setup -r . -t qserv-dev + +export LSST_LOG_CONFIG=/home/qserv/dev/qserv/admin/templates/configuration/etc/log4cxx.index_master.properties + +/home/qserv/dev/qserv/build/loader/appMaster /home/qserv/dev/qserv/core/modules/loader/config/master.cnf + +child=$! +echo "child ${child}" +wait "$child" diff --git a/admin/tools/docker/index/container/dev/worker/Dockerfile b/admin/tools/docker/index/container/dev/worker/Dockerfile new file mode 100644 index 0000000000..523361d001 --- /dev/null +++ b/admin/tools/docker/index/container/dev/worker/Dockerfile @@ -0,0 +1,13 @@ +# +# +# cd ~/work/qserv/admin/tools/docker/index/container/dev/worker +# docker build -t qserv/indexworker:dev . +FROM qserv/indexbase:dev + +USER 0 + +RUN yum update --assumeyes && yum install --assumeyes bind-utils + +USER 1000 + +ENTRYPOINT ["/home/qserv/dev/qserv/admin/tools/docker/index/container/dev/worker/appWorker.bash"] diff --git a/admin/tools/docker/index/container/dev/worker/appWorker.bash b/admin/tools/docker/index/container/dev/worker/appWorker.bash new file mode 100755 index 0000000000..324fec66ba --- /dev/null +++ b/admin/tools/docker/index/container/dev/worker/appWorker.bash @@ -0,0 +1,22 @@ +#! /bin/bash +# admin/tools/docker/loader/container/dev/worker/appWorker.bash + +_term() { + echo "Caught SIGTERM signal!" + kill -TERM "$child" 2>/dev/null +} + +trap _term SIGTERM +trap _term SIGKILL + +source /qserv/stack/loadLSST.bash +cd /home/qserv/dev/qserv +setup -r . -t qserv-dev + +export LSST_LOG_CONFIG=/home/qserv/dev/qserv/admin/templates/configuration/etc/log4cxx.index.properties + +/home/qserv/dev/qserv/build/loader/appWorker /home/qserv/dev/qserv/core/modules/loader/config/worker-k8s-a.cnf + +child=$! +echo "child ${child}" +wait "$child" diff --git a/admin/tools/docker/index/container/dev/worker/appWorkerScreen.bash b/admin/tools/docker/index/container/dev/worker/appWorkerScreen.bash new file mode 100755 index 0000000000..3893b5fe6a --- /dev/null +++ b/admin/tools/docker/index/container/dev/worker/appWorkerScreen.bash @@ -0,0 +1,7 @@ +#! /bin/bash +# admin/tools/docker/loader/container/dev/worker/appWorkerScreen.bash + + +screen -dm /home/qserv/dev/qserv/admin/tools/docker/index/container/dev/worker/appWorker.bash + +tail -f /dev/null diff --git a/admin/tools/docker/index/index-k8-100m.yaml b/admin/tools/docker/index/index-k8-100m.yaml new file mode 100644 index 0000000000..643c33978a --- /dev/null +++ b/admin/tools/docker/index/index-k8-100m.yaml @@ -0,0 +1,206 @@ +apiVersion: v1 +kind: Service +metadata: + name: imaster-svc + labels: + app: index +spec: + ports: + - port: 10042 + protocol: UDP + clusterIP: None + selector: + app: imaster-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: imaster-sts + labels: + app: index +spec: + serviceName: imaster-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: imaster-pod + template: + metadata: + labels: + app: imaster-pod + spec: + containers: + - name: imaster-ctr + image: qserv/indexmaster:dev + imagePullPolicy: Always + ports: + - containerPort: 10042 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iworker-svc + labels: + app: index +spec: + ports: + - port: 10043 + protocol: UDP + clusterIP: None + selector: + app: iworker-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iworker-sts + labels: + app: index +spec: + serviceName: iworker-svc + podManagementPolicy: Parallel + replicas: 3 + selector: + matchLabels: + app: iworker-pod + template: + metadata: + labels: + app: iworker-pod + spec: + containers: + - name: iworker-ctr + image: qserv/indexworker:dev + imagePullPolicy: Always + ports: + - containerPort: 10043 + protocol: UDP + - containerPort: 10143 + protocol: TCP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum-sts + labels: + app: index +spec: + serviceName: iclientnum-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum-pod + template: + metadata: + labels: + app: iclientnum-pod + spec: + containers: + - name: iclientnum-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["100000000", "1", "client-k8s-a1.cnf"] + ports: + - containerPort: 10050 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum2-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum2-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum2-sts + labels: + app: index +spec: + serviceName: iclientnum2-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum2-pod + template: + metadata: + labels: + app: iclientnum2-pod + spec: + containers: + - name: iclientnum2-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["200000001", "300000001", "client-k8s-a2.cnf"] + ports: + - containerPort: 10050 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum3-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum3-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum3-sts + labels: + app: index +spec: + serviceName: iclientnum3-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum3-pod + template: + metadata: + labels: + app: iclientnum3-pod + spec: + containers: + - name: iclientnum3-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["100000001", "200000000", "client-k8s-a3.cnf"] + ports: + - containerPort: 10050 + protocol: UDP + + diff --git a/admin/tools/docker/index/index-k8-10m.yaml b/admin/tools/docker/index/index-k8-10m.yaml new file mode 100644 index 0000000000..5b989a088c --- /dev/null +++ b/admin/tools/docker/index/index-k8-10m.yaml @@ -0,0 +1,415 @@ +apiVersion: v1 +kind: Service +metadata: + name: imaster-svc + labels: + app: index +spec: + ports: + - port: 10042 + protocol: UDP + clusterIP: None + selector: + app: imaster-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: imaster-sts + labels: + app: index +spec: + serviceName: imaster-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: imaster-pod + template: + metadata: + labels: + app: imaster-pod + spec: + containers: + - name: imaster-ctr + image: qserv/indexmaster:dev + imagePullPolicy: Always + ports: + - containerPort: 10042 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iworker-svc + labels: + app: index +spec: + ports: + - port: 10043 + protocol: UDP + clusterIP: None + selector: + app: iworker-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iworker-sts + labels: + app: index +spec: + serviceName: iworker-svc + podManagementPolicy: Parallel + replicas: 14 + selector: + matchLabels: + app: iworker-pod + template: + metadata: + labels: + app: iworker-pod + spec: + containers: + - name: iworker-ctr + image: qserv/indexworker:dev + imagePullPolicy: Always + ports: + - containerPort: 10043 + protocol: UDP + - containerPort: 10143 + protocol: TCP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum-sts + labels: + app: index +spec: + serviceName: iclientnum-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum-pod + template: + metadata: + labels: + app: iclientnum-pod + spec: + containers: + - name: iclientnum-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["10000000", "1", "client-k8s-a1.cnf"] + ports: + - containerPort: 10050 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum2-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum2-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum2-sts + labels: + app: index +spec: + serviceName: iclientnum2-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum2-pod + template: + metadata: + labels: + app: iclientnum2-pod + spec: + containers: + - name: iclientnum2-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["20000001", "30000001", "client-k8s-a2.cnf"] + ports: + - containerPort: 10050 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum3-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum3-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum3-sts + labels: + app: index +spec: + serviceName: iclientnum3-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum3-pod + template: + metadata: + labels: + app: iclientnum3-pod + spec: + containers: + - name: iclientnum3-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["10000001", "20000000", "client-k8s-a3.cnf"] + ports: + - containerPort: 10050 + protocol: UDP + +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum4-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum4-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum4-sts + labels: + app: index +spec: + serviceName: iclientnum4-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum4-pod + template: + metadata: + labels: + app: iclientnum4-pod + spec: + containers: + - name: iclientnum3-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["40000001", "50000000", "client-k8s-a1.cnf"] + ports: + - containerPort: 10050 + protocol: UDP + +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum5-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum5-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum5-sts + labels: + app: index +spec: + serviceName: iclientnum5-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum5-pod + template: + metadata: + labels: + app: iclientnum5-pod + spec: + containers: + - name: iclientnum5-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["50000001", "60000000", "client-k8s-a1.cnf"] + ports: + - containerPort: 10050 + protocol: UDP + +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum6-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum6-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum6-sts + labels: + app: index +spec: + serviceName: iclientnum6-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum6-pod + template: + metadata: + labels: + app: iclientnum6-pod + spec: + containers: + - name: iclientnum6-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["60000001", "70000000", "client-k8s-a1.cnf"] + ports: + - containerPort: 10050 + protocol: UDP + +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum7-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum7-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum7-sts + labels: + app: index +spec: + serviceName: iclientnum7-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum7-pod + template: + metadata: + labels: + app: iclientnum7-pod + spec: + containers: + - name: iclientnum7-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["70000001", "80000000", "client-k8s-a1.cnf"] + ports: + - containerPort: 10050 + protocol: UDP + +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum8-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum8-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum8-sts + labels: + app: index +spec: + serviceName: iclientnum8-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum8-pod + template: + metadata: + labels: + app: iclientnum8-pod + spec: + containers: + - name: iclientnum8-ctr + image: qserv/indexclientnum:dev + imagePullPolicy: Always + args: ["80000001", "90000000", "client-k8s-a1.cnf"] + ports: + - containerPort: 10050 + protocol: UDP + diff --git a/admin/tools/docker/index/index-k8-m.yaml b/admin/tools/docker/index/index-k8-m.yaml new file mode 100644 index 0000000000..ac8d5f337b --- /dev/null +++ b/admin/tools/docker/index/index-k8-m.yaml @@ -0,0 +1,206 @@ +apiVersion: v1 +kind: Service +metadata: + name: imaster-svc + labels: + app: index +spec: + ports: + - port: 10042 + protocol: UDP + clusterIP: None + selector: + app: imaster-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: imaster-sts + labels: + app: index +spec: + serviceName: imaster-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: imaster-pod + template: + metadata: + labels: + app: imaster-pod + spec: + containers: + - name: imaster-ctr + image: qserv/indexmaster:dev + imagePullPolicy: Always + ports: + - containerPort: 10042 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iworker-svc + labels: + app: index +spec: + ports: + - port: 10043 + protocol: UDP + clusterIP: None + selector: + app: iworker-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iworker-sts + labels: + app: index +spec: + serviceName: iworker-svc + podManagementPolicy: Parallel + replicas: 3 + selector: + matchLabels: + app: iworker-pod + template: + metadata: + labels: + app: iworker-pod + spec: + containers: + - name: iworker-ctr + image: qserv/indexworker:dev + imagePullPolicy: Always + ports: + - containerPort: 10043 + protocol: UDP + - containerPort: 10143 + protocol: TCP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum-sts + labels: + app: index +spec: + serviceName: iclientnum-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum-pod + template: + metadata: + labels: + app: iclientnum-pod + spec: + containers: + - name: iclientnum-ctr + image: qserv/indexclientnum:dev + args: ["1", "1000000", "client-k8s-a1.cnf"] + imagePullPolicy: Always + ports: + - containerPort: 10050 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum2-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum2-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum2-sts + labels: + app: index +spec: + serviceName: iclientnum2-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum2-pod + template: + metadata: + labels: + app: iclientnum2-pod + spec: + containers: + - name: iclientnum2-ctr + image: qserv/indexclientnum:dev + args: ["1000001", "2000000", "client-k8s-a2.cnf"] + imagePullPolicy: Always + ports: + - containerPort: 10050 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum3-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum3-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum3-sts + labels: + app: index +spec: + serviceName: iclientnum3-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum3-pod + template: + metadata: + labels: + app: iclientnum3-pod + spec: + containers: + - name: iclientnum3-ctr + image: qserv/indexclientnum:dev + args: ["2000001", "3000000", "client-k8s-a3.cnf"] + imagePullPolicy: Always + ports: + - containerPort: 10050 + protocol: UDP + + diff --git a/admin/tools/docker/index/index-k8.yaml b/admin/tools/docker/index/index-k8.yaml new file mode 100644 index 0000000000..2eeab49889 --- /dev/null +++ b/admin/tools/docker/index/index-k8.yaml @@ -0,0 +1,123 @@ +apiVersion: v1 +kind: Service +metadata: + name: imaster-svc + labels: + app: index +spec: + ports: + - port: 10042 + protocol: UDP + clusterIP: None + selector: + app: imaster-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: imaster-sts + labels: + app: index +spec: + serviceName: imaster-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: imaster-pod + template: + metadata: + labels: + app: imaster-pod + spec: + containers: + - name: imaster-ctr + image: qserv/indexmaster:dev + imagePullPolicy: Always + ports: + - containerPort: 10042 + protocol: UDP +--- +apiVersion: v1 +kind: Service +metadata: + name: iworker-svc + labels: + app: index +spec: + ports: + - port: 10043 + protocol: UDP + clusterIP: None + selector: + app: iworker-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iworker-sts + labels: + app: index +spec: + serviceName: iworker-svc + podManagementPolicy: Parallel + replicas: 3 + selector: + matchLabels: + app: iworker-pod + template: + metadata: + labels: + app: iworker-pod + spec: + containers: + - name: iworker-ctr + image: qserv/indexworker:dev + imagePullPolicy: Always + ports: + - containerPort: 10043 + protocol: UDP + - containerPort: 10143 + protocol: TCP +--- +apiVersion: v1 +kind: Service +metadata: + name: iclientnum-svc + labels: + app: index +spec: + ports: + - port: 10050 + protocol: UDP + clusterIP: None + selector: + app: iclientnum-pod +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: iclientnum-sts + labels: + app: index +spec: + serviceName: iclientnum-svc + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app: iclientnum-pod + template: + metadata: + labels: + app: iclientnum-pod + spec: + containers: + - name: iclientnum-ctr + image: qserv/indexclientnum:dev + args: ["1", "500000", "client-k8s-a1.cnf"] + imagePullPolicy: Always + ports: + - containerPort: 10050 + protocol: UDP + diff --git a/core/modules/SConscript b/core/modules/SConscript index 0959632683..2ae5be9a57 100644 --- a/core/modules/SConscript +++ b/core/modules/SConscript @@ -284,6 +284,11 @@ shlibs["qmetaLib"] = dict(mods="qmeta:python", libs="qserv_qmeta log", SHLIBPREFIX='', instDir="$python_prefix/lsst/qserv/qmeta") + +# library of tools for building binary applications of the loader subsystem +shlibs["loader"] = dict(mods="""loader""", + libs="""qserv_common util protobuf boost_filesystem boost_system + log log4cxx""") # get list of all modules all_modules = sorted(str(d) for d in Glob('*', source=True) if os.path.isdir(d.srcnode().abspath)) diff --git a/core/modules/loader/BufferUdp.cc b/core/modules/loader/BufferUdp.cc new file mode 100644 index 0000000000..02ec9c3698 --- /dev/null +++ b/core/modules/loader/BufferUdp.cc @@ -0,0 +1,202 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "BufferUdp.h" + +// qserv headers +#include "loader/LoaderMsg.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.BufferUdp"); +} + + +namespace lsst { +namespace qserv { +namespace loader { + + +MsgElement::Ptr BufferUdp::readFromSocket(boost::asio::ip::tcp::socket& socket, std::string const& note) { + // Repeatedly read a socket until a valid MsgElement is read, eof, or an error occurs. + for (;;) { + LOGS(_log, LOG_LVL_DEBUG, note << " readFromSocket"); + boost::system::error_code error; + + // If there's something in the buffer already, get it and return. + // This can happen when the previous read of socket read multiple elements. + MsgElement::Ptr msgElem = _safeRetrieve("1readFromSocket" + note); + if (msgElem != nullptr) { + return msgElem; + } + + size_t len = socket.read_some(boost::asio::buffer(_wCursor, getAvailableWriteLength()), error); + _wCursor += len; /// must advance the cursor. + + // EOF is only a problem if no MsgElement was retrieved. + // ??? This is definitely the case in UDP, EOF as nothing more will show up. + // ??? But TCP is another issue as EOF is returned when the connection is still live but + // ??? there's no data (len=0). Why does read_some set error to eof before the tcp connection is closed? + if (error == boost::asio::error::eof) { + LOGS(_log, LOG_LVL_WARN, "readFromSocket eof"); + break; // Connection closed cleanly by peer. + } else if (error && error != boost::asio::error::eof) { + throw LoaderMsgErr(ERR_LOC, "BufferUdp::readFromSocket note=" + note + " asio error=" + error.message()); + } + + /// Try to retrieve an element (there's no guarantee that an entire element got read in a single read. + // Store original cursor positions so they can be restored if the read fails. + msgElem = _safeRetrieve("2readFromSocket" + note); + if (msgElem != nullptr) { + return msgElem; + } + } + return nullptr; +} + + +bool BufferUdp::releaseOwnership() { + if (_ourBuffer) { + _ourBuffer = false; + return true; + } + return false; +} + + +bool BufferUdp::append(const void* in, size_t len) { + if (isAppendSafe(len)) { + memcpy(_wCursor, in, len); + _wCursor += len; + return true; + } + return false; +} + + +void BufferUdp::advanceWriteCursor(size_t len) { + _wCursor += len; + if (not isAppendSafe(0)) { + // The buffer has overflowed. + throw std::overflow_error("BufferUdp advanceWriteCursor beyond buffer len=" + + std::to_string(len)); + } +} + + +void BufferUdp::advanceReadCursor(size_t len) { + _rCursor += len; + if (_rCursor > _end) { + // Something read data outside of the buffer range. + throw std::overflow_error("BufferUdp advanceReadCursor beyond buffer len=" + + std::to_string(len)); + } +} + + +std::shared_ptr BufferUdp::_safeRetrieve(std::string const& note) { + auto wCursorOriginal = _wCursor; + auto rCursorOriginal = _rCursor; + // throwOnMissing=false since missing data is possible with TCP. + MsgElement::Ptr msgElem = MsgElement::retrieve(*this, note + " _safeRetrieve", false); + if (msgElem != nullptr) { + return msgElem; + } else { + _wCursor = wCursorOriginal; + _rCursor = rCursorOriginal; + } + return nullptr; +} + + +bool BufferUdp::isRetrieveSafe(size_t len) const { + auto newLen = (_rCursor + len); + return (newLen <= _end && newLen <= _wCursor); +} + + +bool BufferUdp::retrieve(void* out, size_t len) { + if (isRetrieveSafe(len)) { + memcpy(out, _rCursor, len); + _rCursor += len; + return true; + } + LOGS(_log, LOG_LVL_DEBUG, "BufferUdp::retrieve not safe len=" << len); + return false; +} + + +bool BufferUdp::retrieveString(std::string& out, size_t len) { + if (isRetrieveSafe(len)) { + const char* strEnd = _rCursor + len; + std::string str(_rCursor, strEnd); + _rCursor = strEnd; + out = str; + return true; + } + return false; +} + + +std::string BufferUdp::dumpStr(bool hexDump, bool charDump) const { + std::stringstream os; + dump(os, hexDump, charDump); + return os.str(); +} + + +std::ostream& BufferUdp::dump(std::ostream &os, bool hexDump, bool charDump) const { + os << "maxLength=" << _length; + os << " buffer=" << (void*)_buffer; + os << " wCurLen=" << getAvailableWriteLength(); + os << " wCursor=" << (void*)_wCursor; + os << " rCurLen=" << getBytesLeftToRead(); + os << " rCursor=" << (void*)_rCursor; + os << " end=" << (void*)_end; + + // hex dump + if (hexDump) { + os << "("; + for (const char* j=_buffer; j < _wCursor; ++j) { + os << std::hex << (int)*j << " "; + } + os << ")"; + } + + // character dump + if (charDump) { + os << "(" << std::string(_buffer, _wCursor) << ")"; + } + + return os; +} + + +std::ostream& operator<<(std::ostream& os, BufferUdp const& buf) { + return buf.dump(os, false, false); +} + +}}} // namespace lsst:qserv:loader diff --git a/core/modules/loader/BufferUdp.h b/core/modules/loader/BufferUdp.h new file mode 100644 index 0000000000..b83d063614 --- /dev/null +++ b/core/modules/loader/BufferUdp.h @@ -0,0 +1,171 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_BUFFERUDP_H +#define LSST_QSERV_LOADER_BUFFERUDP_H + +// system headers +#include +#include +#include +#include +#include +#include + +// third party headers +#include "boost/asio.hpp" + +namespace lsst { +namespace qserv { +namespace loader { + +class MsgElement; + + +/// A buffer for reading and writing. Nothing can be read from the buffer until +/// something has been written to it. +/// TODO: rename BufferUdp is not really accurate anymore. +class BufferUdp { +public: + using Ptr = std::shared_ptr; + + /// The absolute largest UDP message we would send. + /// Usually, they should be much smaller. + static size_t const MAX_MSG_SIZE_UDP = 6000; + + /// These are also being used for TCP messages, which can be much larger. + static size_t const MAX_MSG_SIZE_TCP = 10000000; + + /// Create the object with a new _buffer with 'length' bytes. + explicit BufferUdp(size_t length = MAX_MSG_SIZE_UDP) + : _buffer(new char[length]), _length(length), _ourBuffer(true) { + _setupBuffer(); + } + + /// Create a BufferUdp object that uses 'buf' as its buffer, with 'length' + /// indicating the number of bytes in the buffer. If 'buf' already + /// contains valid data, 'validBytes' must be set to how many bytes of the buffer + /// are valid. + /// If BufferUdp should take ownership of 'buf', i.e. delete 'buf' when it is done, + /// call makeOwnerOfBuffer(). + BufferUdp(char* buf, size_t length, size_t validBytes) : _buffer(buf), _length(length) { + _setupBuffer(); + advanceWriteCursor(validBytes); + } + + BufferUdp(BufferUdp const&) = delete; + BufferUdp& operator=(BufferUdp const&) = delete; + + ~BufferUdp() { if (_ourBuffer) { delete[] _buffer; } } + + /// Resets the cursors in the buffer so it is effectively empty. + void reset() { _setupBuffer(); } + + /// Return true only if this object owns the buffer. + bool releaseOwnership(); + + /// Make this object is responsible for deleting _buffer. + void makeOwnerOfBuffer() { _ourBuffer = true; } + + /// Return true if there's at least 'len' room left in the buffer. + bool isAppendSafe(size_t len) const { return (_wCursor + len) <= _end; } + + /// Append 'len' bytes at 'in' to the end of _buffer, but only if it is safe to do so. + bool append(const void* in, size_t len); + + /// Advance the write cursor. This is usually needed after some other object has been + /// allowed to write directly to the buffer. (boost::asio) + void advanceWriteCursor(size_t len); + + /// Advance the read cursor, which usually needs to be done after another object + /// has been allowed to read directly from the buffer. (boost::asio) + void advanceReadCursor(size_t len); + + /// Repeatedly read a socket until a valid MsgElement is read, eof, or an error occurs. + /// Errors throw LoaderMsgErr + std::shared_ptr readFromSocket(boost::asio::ip::tcp::socket& socket, std::string const& note); + + /// Return the total length of _buffer. + size_t getMaxLength() const { return _length; } + + /// Returns the number of bytes left to be read from the buffer. + int getBytesLeftToRead() const { return _wCursor - _rCursor; } + + /// Returns the amount of room left in the buffer after the write cursor. + size_t getAvailableWriteLength() const { return _end - _wCursor; } + + /// Returns a char* pointing to data to be read from the buffer. + const char* getReadCursor() const { return _rCursor; } + + /// Returns a char* pointing to where data should be written to the buffer. + char* getWriteCursor() const { return _wCursor; } + + /// Returns true if retrieving 'len' bytes from the buffer will not violate the buffer rules. + bool isRetrieveSafe(size_t len) const; + + /// Returns true if 'len' bytes could be copied to out without violating _buffer rules. + bool retrieve(void* out, size_t len); + + /// Returns true if 'len' bytes could be copied to 'out' without violating _buffer rules. + bool retrieveString(std::string& out, size_t len); + + /// Dumps basic data to a string. If 'hexDump' is true, include a complete dump of + /// _buffer in hex. + std::string dumpStr(bool hexDump=true) const { return dumpStr(hexDump, false); } + + /// Dumps basic data to a string. If 'hexDump' is true, include a complete dump of + /// _buffer in hex. If 'charDump' is true, include a complete dump of the buffer + /// in ascii. + std::string dumpStr(bool hexDump, bool charDump) const; + + std::ostream& dump(std::ostream &os, bool hexDump, bool charDump) const; + +private: + void _setupBuffer() { + _end = _buffer + _length; + _wCursor = _buffer; + _rCursor = _buffer; + } + + /// Parse the valid portion of _buffer (starting at _rCursor) and see if one + /// MsgElement is available. If so, return the element and advance _rCursor. + /// Otherwise return nullptr. + /// If a message is not recovered, the buffer is left effectively unchanged. + std::shared_ptr _safeRetrieve(std::string const& note); + + + char* _buffer; + size_t _length; ///< Number of bytes in the array (total capacity of array). + char* _end; ///< Immediately after the last element in the array. + char* _wCursor; ///< Where new elements will be appended to the array. + const char* _rCursor; ///< Where data is read from the buffer. + + bool _ourBuffer{false}; ///< true if this class object is responsible for deleting the buffer. +}; + +/// Print basic buffer information. Use BufferUdp::dump() directly if the buffer contents are needed. +std::ostream& operator<<(std::ostream& os, BufferUdp const& buf); + +}}} // namespace lsst:qserv:loader + +#endif // LSST_QSERV_LOADER_BUFFERUDP_H diff --git a/core/modules/loader/Central.cc b/core/modules/loader/Central.cc new file mode 100644 index 0000000000..2490d2e275 --- /dev/null +++ b/core/modules/loader/Central.cc @@ -0,0 +1,93 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// System headers +#include + +// Class header +#include "Central.h" + +// Third-party headers +#include "boost/asio.hpp" + +// qserv headers +#include "loader/LoaderMsg.h" +#include "proto/loader.pb.h" +#include "proto/ProtoImporter.h" + + +// LSST headers +#include "lsst/log/Log.h" + + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.Central"); +} + + +namespace lsst { +namespace qserv { +namespace loader { + +void Central::_initialize() { + // Order is important here. + _pool = util::ThreadPool::newThreadPool(_threadPoolSize, _queue); + + doList = std::make_shared(*this); + + std::thread t([this](){ _checkDoList(); }); + _checkDoListThread = std::move(t); +} + +Central::~Central() { + _loop = false; + _pool->shutdownPool(); + for (std::thread& thd : _ioServiceThreads) { + thd.join(); + } +} + + +void Central::run() { + std::thread thd([this]() { ioService.run(); }); + _ioServiceThreads.push_back(std::move(thd)); +} + + +void Central::_checkDoList() { + while(_loop) { + // Run and then sleep for a second. TODO A more advanced timer should be used + doList->checkList(); + usleep(_loopSleepTime); + } +} + + +std::ostream& operator<<(std::ostream& os, ChunkSubchunk const& csc) { + os << "chunk=" << csc.chunk << " subchunk=" << csc.subchunk; + return os; +} + + +}}} // namespace lsst::qserv::loader diff --git a/core/modules/loader/Central.h b/core/modules/loader/Central.h new file mode 100644 index 0000000000..94c2a21ac0 --- /dev/null +++ b/core/modules/loader/Central.h @@ -0,0 +1,174 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_CENTRAL_H +#define LSST_QSERV_LOADER_CENTRAL_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/ClientServer.h" +#include "loader/MasterServer.h" +#include "loader/MWorkerList.h" +#include "loader/WorkerServer.h" +#include "loader/WWorkerList.h" +#include "proto/loader.pb.h" +#include "util/ThreadPool.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +/// TODO add fileId and row to this so it can be checked in _workerKeyInsertReq +struct ChunkSubchunk { + ChunkSubchunk(int chunk_, int subchunk_) : chunk(chunk_), subchunk(subchunk_) {} + int const chunk; + int const subchunk; + friend std::ostream& operator<<(std::ostream& os, ChunkSubchunk const& csc); +}; + + +/// This class is 'central' to the execution of the program, and must be around +/// until the bitter end. As such, it can be accessed by normal pointers. +/// This class is central to clients, workers and the master. +/// It provides a DoList and a means to contact the master. The master +/// needs to know its own address. +class Central { +public: + Central() = delete; + Central(Central const&) = delete; + Central& operator=(Central const&) = delete; + + virtual ~Central(); + + std::string getMasterHostName() const { return _masterAddr.ip; } + int getMasterPort() const { return _masterAddr.port; } + NetworkAddress getMasterAddr() const { return _masterAddr; } + + uint64_t getNextMsgId() { return _sequence++; } + + int getErrCount() const { return _server->getErrCount(); } + + /// Start the server on UDP and/or TCP ports. May throw boost::system::system_error + void start() { + startService(); + startMonitoring(); + } + + /// Override with function to define and start the server. + void virtual startService() = 0; + + /// Override with functions to add do list items. + void virtual startMonitoring() {}; + + /// Send the contents of 'sendBuf' to 'host:port'. This waits for the message to be + /// sent before returning. + /// @throw boost::system::system_error on failure. + void sendBufferTo(std::string const& host, int port, BufferUdp& sendBuf) { + _server->sendBufferTo(host, port, sendBuf); + } + + /// Only allow tracked commands on the queue. The DoList has to able to tell + /// if a Command completed. + void queueCmd(util::CommandTracked::Ptr const& cmd) { + _queue->queCmd(cmd); + } + + /// Add a DoListItem to the _doList which will run and + /// rerun the item until it is no longer needed. + bool addDoListItem(DoListItem::Ptr const& item) { + return doList->addItem(item); + } + + /// Run the item immediately before adding it to _doList. + bool runAndAddDoListItem(DoListItem::Ptr const& item) { + doList->runItemNow(item); + return doList->addItem(item); + } + + /// Run the server. + void runServer() { + for (; _runningIOThreads < _iOThreads; ++_runningIOThreads) { + run(); + } + } + + /// Provides a method for identifying different Central classes and + /// CentralWorkers in the log file. + virtual std::string getOurLogId() const { return "Central baseclass"; } + +protected: + Central(boost::asio::io_service& ioService_, + std::string const& masterHostName, int masterPort, + int threadPoolSize, int loopSleepTime, + int iOThreads) + : ioService(ioService_), _masterAddr(masterHostName, masterPort), + _threadPoolSize(threadPoolSize), _loopSleepTime(loopSleepTime), + _iOThreads(iOThreads) { + _initialize(); + } + + void run(); ///< Run a single asio thread. + + boost::asio::io_service& ioService; + + DoList::Ptr doList; ///< List of items to be checked at regular intervals. + + ServerUdpBase::Ptr _server; + +private: + /// Repeatedly check the items on the _doList. + void _checkDoList(); + + void _initialize();///< Finish construction. + + NetworkAddress _masterAddr; ///< Network address of the master node. + + std::atomic _sequence{1}; + + util::CommandQueue::Ptr _queue{std::make_shared()}; // Must be defined before _pool + + int _threadPoolSize{10}; ///< Number of threads _pool. + util::ThreadPool::Ptr _pool; ///< Thread pool. + + bool _loop{true}; ///< continue looping through _checkDolList() while this is true. + int _loopSleepTime{100000}; ///< microseconds to sleep between each check of all list items. + + std::vector _ioServiceThreads; ///< List of asio io threads created by this + + std::thread _checkDoListThread; ///< Thread for running doList checks on DoListItems. + + int _iOThreads{5}; ///< Number of asio IO threads to run, set by config file. + int _runningIOThreads{0}; ///< Number of asio IO threads started. +}; + +}}} // namespace lsst::qserv::loader + + +#endif // LSST_QSERV_LOADER_CENTRAL_H diff --git a/core/modules/loader/CentralClient.cc b/core/modules/loader/CentralClient.cc new file mode 100644 index 0000000000..bb86b545f6 --- /dev/null +++ b/core/modules/loader/CentralClient.cc @@ -0,0 +1,398 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "CentralClient.h" +#include "ClientConfig.h" + +// system headers +#include + +// Third-party headers +#include "boost/asio.hpp" + +// qserv headers +#include "loader/LoaderMsg.h" +#include "loader/WWorkerList.h" +#include "proto/loader.pb.h" +#include "proto/ProtoImporter.h" + +// LSST headers +#include "lsst/log/Log.h" + + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.CentralClient"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +CentralClient::CentralClient(boost::asio::io_service& ioService_, + std::string const& hostName, ClientConfig const& cfg) + : CentralFollower(ioService_, hostName, cfg.getMasterHost(), cfg.getMasterPortUdp(), + cfg.getThreadPoolSize(),cfg.getLoopSleepTime(), cfg.getIOThreads(), cfg.getClientPortUdp()), + _defWorkerHost(cfg.getDefWorkerHost()), + _defWorkerPortUdp(cfg.getDefWorkerPortUdp()), + _doListMaxLookups(cfg.getMaxLookups()), + _doListMaxInserts(cfg.getMaxInserts()), + _maxRequestSleepTime(cfg.getMaxRequestSleepTime()) { +} + + +void CentralClient::startService() { + _server = std::make_shared(ioService, getHostName(), getUdpPort(), this); +} + + +CentralClient::~CentralClient() { +} + + +void CentralClient::handleKeyLookup(LoaderMsg const& inMsg, BufferUdp::Ptr const& data) { + LOGS(_log, LOG_LVL_DEBUG, "CentralClient::handleKeyLookup"); + + auto const sData = std::dynamic_pointer_cast(MsgElement::retrieve(*data, " CentralClient::handleKeyLookup ")); + if (sData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralClient::handleKeyLookup Failed to parse list"); + return; + } + auto protoData = sData->protoParse(); + if (protoData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralClient::handleKeyLookup Failed to parse list"); + return; + } + + // TODO put in separate thread + _handleKeyLookup(inMsg, protoData); +} + + +void CentralClient::_handleKeyLookup(LoaderMsg const& inMsg, std::unique_ptr& protoBuf) { + std::unique_ptr protoData(std::move(protoBuf)); + + CompositeKey key(protoData->keyint(), protoData->keystr()); + ChunkSubchunk chunkInfo(protoData->chunk(), protoData->subchunk()); + + LOGS(_log, LOG_LVL_DEBUG, "trying to remove oneShot for lookup key=" << key << " " << chunkInfo); + /// Locate the original one shot and mark it as done. + CentralClient::KeyLookupReqOneShot::Ptr keyLookupOneShot; + { + std::lock_guard lck(_waitingKeyLookupMtx); + auto iter = _waitingKeyLookupMap.find(key); + if (iter == _waitingKeyLookupMap.end()) { + LOGS(_log, LOG_LVL_WARN, "_handleKeyLookup could not find key=" << key); + return; + } + keyLookupOneShot = iter->second; + _waitingKeyLookupMap.erase(iter); + } + keyLookupOneShot->keyInfoComplete(key, chunkInfo.chunk, chunkInfo.subchunk, protoData->success()); + LOGS(_log, LOG_LVL_INFO, "Successful KEY_LOOKUP key=" << key << " " << chunkInfo); +} + + +void CentralClient::handleKeyInsertComplete(LoaderMsg const& inMsg, BufferUdp::Ptr const& data) { + LOGS(_log, LOG_LVL_DEBUG, "CentralClient::handleKeyInsertComplete"); + + auto sData = std::dynamic_pointer_cast(MsgElement::retrieve(*data, " CentralClient::handleKeyInsertComplete ")); + if (sData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralClient::handleKeyInsertComplete Failed to retrieve element"); + return; + } + auto protoData = sData->protoParse(); + if (protoData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralClient::handleKeyInsertComplete Failed to parse list"); + return; + } + + // TODO put in separate thread + _handleKeyInsertComplete(inMsg, protoData); +} + + +void CentralClient::_handleKeyInsertComplete(LoaderMsg const& inMsg, std::unique_ptr& protoBuf) { + std::unique_ptr protoData(std::move(protoBuf)); + + CompositeKey key(protoData->keyint(), protoData->keystr()); + ChunkSubchunk chunkInfo(protoData->chunk(), protoData->subchunk()); + + LOGS(_log, LOG_LVL_DEBUG, "trying to remove oneShot for key=" << key << " " << chunkInfo); + /// Locate the original one shot and mark it as done. + CentralClient::KeyInsertReqOneShot::Ptr keyInsertOneShot; + size_t mapSize; + { + std::lock_guard lck(_waitingKeyInsertMtx); + auto iter = _waitingKeyInsertMap.find(key); + if (iter == _waitingKeyInsertMap.end()) { + LOGS(_log, LOG_LVL_WARN, "handleKeyInsertComplete could not find key=" << key); + return; + } + keyInsertOneShot = iter->second; + _waitingKeyInsertMap.erase(iter); + mapSize = _waitingKeyInsertMap.size(); + } + keyInsertOneShot->keyInsertComplete(); + LOGS(_log, LOG_LVL_INFO, "Successful KEY_INSERT_COMPLETE key=" << key << " " << chunkInfo << + " mapSize=" << mapSize); +} + + +/// Returns a pointer to a Tracker object that can be used to track job +// completion and the status of the job. keyInsertOneShot will call +// _keyInsertReq until it knows the task was completed via a call +// to _handleKeyInsertComplete +KeyInfoData::Ptr CentralClient::keyInsertReq(CompositeKey const& key, int chunk, int subchunk) { + // Insert a oneShot DoListItem to keep trying to add the key until + // we get word that it has been added successfully. + LOGS(_log, LOG_LVL_INFO, "Trying to insert key=" << key << " chunk=" << chunk << + " subchunk=" << subchunk); + auto keyInsertOneShot = std::make_shared(this, key, chunk, subchunk); + { + std::unique_lock lck(_waitingKeyInsertMtx); + // Limit the number of concurrent inserts. + // If the key is already in the map, there is no point in blocking. + int loopCount = 0; + auto iter = _waitingKeyInsertMap.find(key); + while (_waitingKeyInsertMap.size() > _doListMaxInserts + && iter == _waitingKeyInsertMap.end()) { + size_t sz = _waitingKeyInsertMap.size(); + lck.unlock(); + if (loopCount % 100 == 0) { + LOGS(_log, LOG_LVL_DEBUG, "keyInsertReq waiting key=" << key << + "size=" << sz << " loopCount=" << loopCount); + } + // Let the CPU do something else while waiting for some requests to finish. + usleep(_maxRequestSleepTime); + ++loopCount; + lck.lock(); + iter = _waitingKeyInsertMap.find(key); + } + + if (iter != _waitingKeyInsertMap.end()) { + // There is already an entry in the map and we can just use the existing entry, + // as long as it has the same chunk and subchunk numbers. + auto cData = iter->second->cmdData; + if (cData->chunk == chunk && cData->subchunk == subchunk) { + return cData; + } else { + // TODO This MUST go to some form of output for the end user as it is an input data error + // either here or when the caller gets a nullptr response + LOGS(_log, LOG_LVL_ERROR, "key:value does not match existing key:value key=" << key << + " orignal(" << cData->chunk << "," << cData->subchunk << + ") new(" << chunk << "," << subchunk << ")"); + return nullptr; + } + } + // The key wasn't found and needs to be inserted. + _waitingKeyInsertMap[key] = keyInsertOneShot; + } + runAndAddDoListItem(keyInsertOneShot); + return keyInsertOneShot->cmdData; +} + + +void CentralClient::_keyInsertReq(CompositeKey const& key, int chunk, int subchunk) { + LOGS(_log, LOG_LVL_INFO, "CentralClient::_keyInsertReq trying key=" << key); + LoaderMsg msg(LoaderMsg::KEY_INSERT_REQ, getNextMsgId(), getHostName(), getUdpPort()); + BufferUdp msgData; + msg.appendToData(msgData); + // create the proto buffer + lsst::qserv::proto::KeyInfoInsert protoKeyInsert; + lsst::qserv::proto::LdrNetAddress* protoAddr = protoKeyInsert.mutable_requester(); + protoAddr->set_ip(getHostName()); + protoAddr->set_udpport(getUdpPort()); + protoAddr->set_tcpport(getTcpPort()); + lsst::qserv::proto::KeyInfo* protoKeyInfo = protoKeyInsert.mutable_keyinfo(); + protoKeyInfo->set_keyint(key.kInt); + protoKeyInfo->set_keystr(key.kStr); + protoKeyInfo->set_chunk(chunk); + protoKeyInfo->set_subchunk(subchunk); + protoKeyInsert.set_hops(0); + + StringElement strElem; + protoKeyInsert.SerializeToString(&(strElem.element)); + strElem.appendToData(msgData); + try { + std::string ip; + int port = 0; + getWorkerForKey(key, ip, port); + sendBufferTo(ip, port, msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralClient::_keyInsertReq boost system_error=" << e.what() << + " key=" << key << " chunk=" << chunk << " sub=" << subchunk); + } +} + + +KeyInfoData::Ptr CentralClient::keyLookupReq(CompositeKey const& key) { + // Returns a pointer to a Tracker object that can be used to track job + // completion and job status. keyInsertOneShot will call _keyInsertReq until + // it knows the task was completed. _handleKeyInfoComplete marks + // the jobs complete as the messages come in from workers. + // Insert a oneShot DoListItem to keep trying to add the key until + // we get word that it has been added successfully. + LOGS(_log, LOG_LVL_INFO, "Trying to lookup key=" << key); + auto keyLookupOneShot = std::make_shared(this, key); + { + std::unique_lock lck(_waitingKeyLookupMtx); + // Limit the number of concurrent lookups. + // If the key is already in the map, there is no point in blocking. + int loopCount = 0; + uint64_t sleptForMicroSec = 0; + uint64_t const tenSec = 10000000; + auto iter = _waitingKeyLookupMap.find(key); + while (_waitingKeyLookupMap.size() > _doListMaxLookups + && iter == _waitingKeyLookupMap.end()) { + size_t sz = _waitingKeyLookupMap.size(); + lck.unlock(); + // Log a message about this about once every 10 seconds. + if (sleptForMicroSec > tenSec) sleptForMicroSec = 0; + if (sleptForMicroSec == 0) { + LOGS(_log, LOG_LVL_INFO, "keyInfoReq waiting key=" << key << + "size=" << sz << " loopCount=" << loopCount); + } + // Let the CPU do something else while waiting for some requests to finish. + usleep(_maxRequestSleepTime); + sleptForMicroSec += _maxRequestSleepTime; + ++loopCount; + lck.lock(); + iter = _waitingKeyLookupMap.find(key); + } + + // Use the existing lookup, if there is one. + if (iter != _waitingKeyLookupMap.end()) { + auto cData = iter->second->cmdData; + return cData; + } + + _waitingKeyLookupMap[key] = keyLookupOneShot; + } + runAndAddDoListItem(keyLookupOneShot); + return keyLookupOneShot->cmdData; +} + + +void CentralClient::_keyLookupReq(CompositeKey const& key) { + LOGS(_log, LOG_LVL_INFO, "CentralClient::_keyLookupReq trying key=" << key); + LoaderMsg msg(LoaderMsg::KEY_LOOKUP_REQ, getNextMsgId(), getHostName(), getUdpPort()); + BufferUdp msgData; + msg.appendToData(msgData); + // create the proto buffer + lsst::qserv::proto::KeyInfoInsert protoKeyInsert; + lsst::qserv::proto::LdrNetAddress* protoAddr = protoKeyInsert.mutable_requester(); + protoAddr->set_ip(getHostName()); + protoAddr->set_udpport(getUdpPort()); + protoAddr->set_tcpport(getTcpPort()); + lsst::qserv::proto::KeyInfo* protoKeyInfo = protoKeyInsert.mutable_keyinfo(); + protoKeyInfo->set_keyint(key.kInt); + protoKeyInfo->set_keystr(key.kStr); + protoKeyInfo->set_chunk(0); + protoKeyInfo->set_subchunk(0); + protoKeyInsert.set_hops(0); + + StringElement strElem; + protoKeyInsert.SerializeToString(&(strElem.element)); + strElem.appendToData(msgData); + + try { + std::string ip; + int port = 0; + getWorkerForKey(key, ip, port); + sendBufferTo(ip, port, msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralClient::_keyInfoReq boost system_error=" << e.what() << + " key=" << key); + } +} + + +void CentralClient::getWorkerForKey(CompositeKey const& key, std::string& ip, int& port) { + auto worker = getWorkerList()->findWorkerForKey(key); + if (worker != nullptr) { + auto nAddr = worker->getUdpAddress(); + ip = nAddr.ip; + port = nAddr.port; + LOGS(_log, LOG_LVL_DEBUG, "getWorkerForKey " << key << " worker=" << *worker); + } else { + ip = getDefWorkerHost(); + port = getDefWorkerPortUdp(); + } +} + + +std::ostream& operator<<(std::ostream& os, KeyInfoData const& data) { + os << "key=" << data.key << "(" << data.chunk << "," << data.subchunk << ") " << + "success=" << data.success; + return os; +} + + +util::CommandTracked::Ptr CentralClient::KeyInsertReqOneShot::createCommand() { + struct KeyInsertReqCmd : public util::CommandTracked { + KeyInsertReqCmd(KeyInfoData::Ptr& cd, CentralClient* cent_) : cData(cd), cent(cent_) {} + void action(util::CmdData*) override { + cent->_keyInsertReq(cData->key, cData->chunk, cData->subchunk); + } + KeyInfoData::Ptr cData; + CentralClient* cent; + }; + return std::make_shared(cmdData, central); +} + + +void CentralClient::KeyInsertReqOneShot::keyInsertComplete() { + cmdData->success = true; + cmdData->setComplete(); + infoReceived(); +} + + +util::CommandTracked::Ptr CentralClient::KeyLookupReqOneShot::createCommand() { + struct KeyInfoReqCmd : public util::CommandTracked { + KeyInfoReqCmd(KeyInfoData::Ptr& cd, CentralClient* cent_) : cData(cd), cent(cent_) {} + void action(util::CmdData*) override { + cent->_keyLookupReq(cData->key); + } + KeyInfoData::Ptr cData; + CentralClient* cent; + }; + return std::make_shared(cmdData, central); +} + + +void CentralClient::KeyLookupReqOneShot::keyInfoComplete(CompositeKey const& key, + int chunk, int subchunk, bool success) { + if (key == cmdData->key) { + cmdData->chunk = chunk; + cmdData->subchunk = subchunk; + cmdData->success = success; + } + cmdData->setComplete(); + infoReceived(); +} + + +}}} // namespace lsst::qserv::loader diff --git a/core/modules/loader/CentralClient.h b/core/modules/loader/CentralClient.h new file mode 100644 index 0000000000..67e5e2364d --- /dev/null +++ b/core/modules/loader/CentralClient.h @@ -0,0 +1,174 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_CENTRAL_CLIENT_H +#define LSST_QSERV_LOADER_CENTRAL_CLIENT_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/Central.h" +#include "loader/CentralFollower.h" +#include "loader/ClientServer.h" +#include "loader/WWorkerList.h" +#include "util/Command.h" + +namespace lsst { +namespace qserv { +namespace loader { + +class ClientConfig; + +/// This class is used to track the status and value of jobs inserting +/// key-value pairs to the system or looking up key-value pairs. The +/// Tracker base class provides a means of notifying other threads +/// that the task is complete. +class KeyInfoData : public util::Tracker { +public: + using Ptr = std::shared_ptr; + KeyInfoData(CompositeKey const& key_, int chunk_, int subchunk_) : + key(key_), chunk(chunk_), subchunk(subchunk_) {} + + CompositeKey key; + int chunk; + int subchunk; + bool success{false}; + + friend std::ostream& operator<<(std::ostream& os, KeyInfoData const& data); +}; + +/// This class is 'Central' to the client. The client maintains a UDP port +/// so replies to its request can be sent directly back to it. +/// 'Central' provides access to the master and a DoList for handling requests. +class CentralClient : public CentralFollower { +public: + /// The client needs to know the master's IP and its own IP. + CentralClient(boost::asio::io_service& ioService_, + std::string const& hostName, ClientConfig const& cfg); + + void startService() override; + + ~CentralClient() override; + + /// @return the default worker's host name. + std::string getDefWorkerHost() const { return _defWorkerHost; } + /// @return the default worker's UDP port + int getDefWorkerPortUdp() const { return _defWorkerPortUdp; } + + /// Try to get the correct address for the worker responsible for 'key' + /// and place the information in 'ip' and 'port'. If a reasonable worker is + /// not located, the default worker information is returned. + void getWorkerForKey(CompositeKey const& key, std::string& ip, int& port); + + /// Asynchronously request a key value insert to the workers. + /// This can block if too many key insert requests are already in progress. + /// @return - a KeyInfoData object for checking the job's status or + /// nullptr if CentralClient is already trying to insert the key + /// but value doesn't match the existing value. This indicates + /// there is an input data error. + KeyInfoData::Ptr keyInsertReq(CompositeKey const& key, int chunk, int subchunk); + /// Handle a workers response to the keyInserReq call + void handleKeyInsertComplete(LoaderMsg const& inMsg, BufferUdp::Ptr const& data); + + /// Asynchronously request a key value lookup from the workers. It returns a + /// KeyInfoData object to be used to track job status and get the value of the key. + /// This can block if too many key lookup requests are already in progress. + KeyInfoData::Ptr keyLookupReq(CompositeKey const& key); + /// Handle a workers response to the keyInfoReq call. + void handleKeyLookup(LoaderMsg const& inMsg, BufferUdp::Ptr const& data); + + int getDoListMaxLookups() { return _doListMaxLookups; } + int getDoListMaxInserts() { return _doListMaxInserts; } + + std::string getOurLogId() const override { return "client"; } + +private: + void _keyInsertReq(CompositeKey const& key, int chunk, int subchunk); ///< see keyInsertReq() + void _handleKeyInsertComplete(LoaderMsg const& inMsg, std::unique_ptr& protoBuf); + + void _keyLookupReq(CompositeKey const& key); ///< see keyLookReq() + void _handleKeyLookup(LoaderMsg const& inMsg, std::unique_ptr& protoBuf); + + + + /// Create commands to add a key to the index and track that they are done. + /// It should keep trying this until it works, and then drop it from _waitingKeyInsertMap. + struct KeyInsertReqOneShot : public DoListItem { + using Ptr = std::shared_ptr; + + KeyInsertReqOneShot(CentralClient* central_, CompositeKey const& key_, int chunk_, int subchunk_) : + cmdData(std::make_shared(key_, chunk_, subchunk_)), central(central_) { + setOneShot(true); + } + + util::CommandTracked::Ptr createCommand() override; + + /// TODO Have this function take result codes (such as 'success') as arguments + /// and put them in cmdData. + void keyInsertComplete(); + + KeyInfoData::Ptr cmdData; + CentralClient* central; + }; + + /// Create commands to lookup a key in the index and get its value. + /// It should keep trying this until it works and then drop it from _waitingKeyInfoMap. + struct KeyLookupReqOneShot : public DoListItem { + using Ptr = std::shared_ptr; + + KeyLookupReqOneShot(CentralClient* central_, CompositeKey const& key_) : + cmdData(std::make_shared(key_, -1, -1)), central(central_) { setOneShot(true); } + + util::CommandTracked::Ptr createCommand() override; + + // TODO Have this function take result codes as arguments and put them in cmdData. + void keyInfoComplete(CompositeKey const& key, int chunk, int subchunk, bool success); + + KeyInfoData::Ptr cmdData; + CentralClient* central; + }; + + // If const is removed, these will need mutex protection. + const std::string _defWorkerHost; ///< Default worker host + const int _defWorkerPortUdp; ///< Default worker UDP port + + size_t const _doListMaxLookups = 1000; ///< Maximum number of concurrent lookups in DoList, set by config + size_t const _doListMaxInserts = 1000; ///< Maximum number of concurrent inserts in DoList, set by config + /// Time to sleep between checking requests when at max length, set by config + int const _maxRequestSleepTime = 100000; + + std::map _waitingKeyInsertMap; ///< Map of current insert requests. + std::mutex _waitingKeyInsertMtx; ///< protects _waitingKeyInsertMap, _doListMaxInserts + + std::map _waitingKeyLookupMap; ///< Map of current look up requests. + std::mutex _waitingKeyLookupMtx; ///< protects _waitingKeyLookMap, _doListMaxLookups +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_CENTRAL_CLIENT_H diff --git a/core/modules/loader/CentralFollower.cc b/core/modules/loader/CentralFollower.cc new file mode 100644 index 0000000000..2ae6278fed --- /dev/null +++ b/core/modules/loader/CentralFollower.cc @@ -0,0 +1,127 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "loader/CentralFollower.h" + +// system headers +#include + +// third party headers +#include "boost/asio.hpp" + +// qserv headers +#include "proto/loader.pb.h" +#include "proto/ProtoImporter.h" + +// LSST headers +#include "lsst/log/Log.h" + + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.CentralFollower"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +CentralFollower::~CentralFollower() { + // Members that contain pointers to this. Deleting while this != null. + // TODO: wait for reference count to drop to one on _wWorkerList + _destroy = true; + _wWorkerList.reset(); +} + + +WWorkerList::Ptr const& CentralFollower::getWorkerList() { + // _wWorkerList == nullptr is extremely rare. + if (_wWorkerList == nullptr && !_destroy) { + std::lock_guard lg(_wListInitMtx); + if (_wWorkerList == nullptr && !_destroy) _wWorkerList = std::make_shared(this); + } + return _wWorkerList; +} + + +void CentralFollower::startMonitoring() { + LOGS(_log, LOG_LVL_INFO, "CentralFollower::startMonitoring"); + doList->addItem(getWorkerList()); +} + + +bool CentralFollower::workerInfoReceive(BufferUdp::Ptr const& data) { + // Open the data protobuffer and add it to our list. + StringElement::Ptr sData = std::dynamic_pointer_cast(MsgElement::retrieve(*data, "CentralFollower::workerInfoReceive")); + if (sData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralFollower::workerInfoRecieve Failed to parse list"); + return false; + } + std::unique_ptr protoList = sData->protoParse(); + if (protoList == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralFollower::workerInfoRecieve Failed to parse list"); + return false; + } + + // TODO: move this call to another thread + _workerInfoReceive(protoList); + return true; +} + + +void CentralFollower::_workerInfoReceive(std::unique_ptr& protoL) { + std::unique_ptr protoList(std::move(protoL)); + + // Check the information, if it is our network address, set or check our id. + // Then compare it with the map, adding new/changed information. + uint32_t wId = protoList->wid(); + std::string ipUdp(""); + int portUdp = 0; + int portTcp = 0; + if (protoList->has_address()) { + proto::LdrNetAddress protoAddr = protoList->address(); + ipUdp = protoAddr.ip(); + portUdp = protoAddr.udpport(); + portTcp = protoAddr.tcpport(); + } + KeyRange strRange; + if (protoList->has_range()) { + proto::WorkerRange protoRange = protoList->range(); + bool valid = protoRange.valid(); + if (valid) { + CompositeKey min(protoRange.minint(), protoRange.minstr()); + CompositeKey max(protoRange.maxint(), protoRange.maxstr()); + bool unlimited = protoRange.maxunlimited(); + strRange.setMinMax(min, max, unlimited); + } + } + + checkForThisWorkerValues(wId, ipUdp, portUdp, portTcp, strRange); + + // Make/update entry in map. + getWorkerList()->updateEntry(wId, ipUdp, portUdp, portTcp, strRange); +} + + +}}} // namespace lsst::qserv::loader diff --git a/core/modules/loader/CentralFollower.h b/core/modules/loader/CentralFollower.h new file mode 100644 index 0000000000..1603bafcf8 --- /dev/null +++ b/core/modules/loader/CentralFollower.h @@ -0,0 +1,116 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2019 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_CENTRAL_FOLLOWER_H +#define LSST_QSERV_LOADER_CENTRAL_FOLLOWER_H + +// system headers +#include +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/Central.h" +#include "loader/DoList.h" +#include "loader/Neighbor.h" +#include "loader/ServerTcpBase.h" +#include "loader/WorkerConfig.h" + + +namespace lsst { +namespace qserv { + +namespace proto { +class WorkerKeysInfo; +} + +namespace loader { + +/// This class is used as a base central class for servers that need to get +/// lists of workers from the master. +/// CentralFollower provides no service on its own. The derived classes must: +/// call workerInfoReceive(data) to handle LoaderMsg::MAST_WORKER_INFO +/// call getWorkerList()->workerListReceive(data) to handle LoaderMsg::MAST_WORKER_LIST +/// messages sent from the master and call workerInfoReceive() as needed to +/// handle LoaderMsg::MAST_WORKER_INFO. +class CentralFollower : public Central { +public: + typedef std::pair CompKeyPair; + + CentralFollower(boost::asio::io_service& ioService, + std::string const& hostName_, std::string const& masterHost, int masterPortUdp, + int threadPoolSize, int loopSleepTime, int ioThreads, int fPortUdp) + : Central(ioService, masterHost, masterPortUdp, threadPoolSize, loopSleepTime, ioThreads), + _hostName(hostName_), + _udpPort(fPortUdp){ + } + + ~CentralFollower() override; + + /// CentralFollower provides no service on its own. The derived classes must handle + /// messages sent from the master and call workerInfoReceive() as needed. + void startMonitoring() override; + + std::string const& getHostName() const { return _hostName; } + int getUdpPort() const { return _udpPort; } + + /// Only workers have TCP ports. + virtual int getTcpPort() const { return 0; } + + /// Receive information about workers from the master. + bool workerInfoReceive(BufferUdp::Ptr const& data); + + /// Returns a pointer to our worker list. + WWorkerList::Ptr const& getWorkerList(); + + std::string getOurLogId() const override { return "CentralFollower"; } + +protected: + /// Real workers need to check this for initial ranges. + virtual void checkForThisWorkerValues(uint32_t wId, std::string const& ip, + int portUdp, int portTcp, KeyRange& strRange) {}; + +private: + const std::string _hostName; ///< our host name + const int _udpPort; ///< our UDP port + + /// This function is needed to fill the map. On real workers, CentralWorker + /// needs to do additional work to set its own id. + void _workerInfoReceive(std::unique_ptr& protoBuf); + + /// Maps of workers with their key ranges and network addresses. due to + /// lazy initialization. + /// This should be accessed through getWorkerList() + WWorkerList::Ptr _wWorkerList; + std::mutex _wListInitMtx; ///< mutex for initialization of _wWorkerList + std::atomic _destroy{false}; +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_CENTRAL_FOLLOWER_H + + diff --git a/core/modules/loader/CentralMaster.cc b/core/modules/loader/CentralMaster.cc new file mode 100644 index 0000000000..87ef96889a --- /dev/null +++ b/core/modules/loader/CentralMaster.cc @@ -0,0 +1,213 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/CentralMaster.h" + +// system headers +#include + +// Third-party headers +#include "boost/asio.hpp" + +// LSST headers +#include "lsst/log/Log.h" + + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.CentralMaster"); +} + +namespace lsst { +namespace qserv { +namespace loader { + +void CentralMaster::startService() { + _server = std::make_shared(ioService, getMasterHostName(), getMasterPort(), this); +} + + +void CentralMaster::addWorker(std::string const& ip, int udpPort, int tcpPort) { + auto item = _mWorkerList->addWorker(ip, udpPort, tcpPort); + + if (item != nullptr) { + // If that was the first worker added, it gets unlimited range. + if (_firstWorkerRegistered.exchange(true) == false) { + LOGS(_log, LOG_LVL_INFO, "setAllInclusiveRange for name=" << item->getId()); + item->setAllInclusiveRange(); + } + + item->addDoListItems(this); + LOGS(_log, LOG_LVL_INFO, "Master::addWorker " << *item); + } +} + + +void CentralMaster::updateWorkerInfo(uint32_t workerId, NeighborsInfo const& nInfo, KeyRange const& strRange) { + if (workerId == 0) { + return; + } + auto item = getWorkerWithId(workerId); + if (item == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralMaster::updateNeighbors nullptr for workerId=" << workerId); + return; + } + // TODO setting nInfo and strRange can be done in one call to reduce mutex locking. + item->setNeighborsInfo(nInfo); + item->setRangeString(strRange); + _assignNeighborIfNeeded(workerId, item); +} + + +void CentralMaster::setWorkerNeighbor(MWorkerListItem::WPtr const& target, int message, uint32_t neighborId) { + // Get the target worker's network address + auto targetWorker = target.lock(); + if (targetWorker == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralMaster::setWorkerNeighbor nullptr for " << neighborId); + return; + } + + LOGS(_log, LOG_LVL_DEBUG, "CentralMaster::setWorkerNeighbor " << neighborId << " " << *targetWorker); + // Build and send the message + LoaderMsg msg(message, getNextMsgId(), getMasterHostName(), getMasterPort()); + BufferUdp msgData; + msg.appendToData(msgData); + UInt32Element neighborIdElem(neighborId); + neighborIdElem.appendToData(msgData); + auto addr = targetWorker->getUdpAddress(); + try { + sendBufferTo(addr.ip, addr.port, msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralMaster::setWorkerNeighbor boost system_error=" << e.what() << + " targ=" << *targetWorker << " msg=" << message << + " neighborId=" << neighborId); + } +} + + +void CentralMaster::_assignNeighborIfNeeded(uint32_t workerId, MWorkerListItem::Ptr const& wItem) { + // Go through the list and see if all the workers are full. + // If they are, assign a worker to the end (rightmost) worker + // and increase the maximum by an order of magnitude, max 10 million. + // TODO Make a better algorithm, insert workers at busiest worker. + std::string funcName("_assignNeighborIfNeeded"); + LOGS(_log, LOG_LVL_DEBUG, funcName); + if (_addingWorkerId != 0 && _addingWorkerId != workerId) { + // Already in process of adding a worker, and the worker + // with new information wasn't the one added. Nothing to do. + // TODO Check if it failed. (May need to go in a timer thread instead) + return; + } + // Only one thread should ever be working on this logic at a time. + std::lock_guard lck(_assignMtx); + if (_addingWorkerId != 0) { + if (_addingWorkerId == workerId) { + auto rng = wItem->getRangeString(); + if (rng.getValid()) { + wItem->setActive(true); + LOGS(_log, LOG_LVL_INFO, "Successfully activated wId=" << workerId << + " range=" << rng); + _addingWorkerId = 0; + } + } + } + + auto pair = _mWorkerList->getActiveInactiveWorkerLists(); + std::vector& activeList = pair.first; + std::vector& inactiveList = pair.second; + if (inactiveList.empty() || _addingWorkerId != 0) { return; } + double sum = 0.0; + int max = 0; + uint32_t maxWId = 0; + uint32_t rightMostName = 0; // Name of the rightmost worker, unlimited upper range. + MWorkerListItem::Ptr rightMostItem; + for(auto& item : activeList) { + int keyCount = item->getKeyCount(); + sum += keyCount; + if (keyCount > max) { + max = keyCount; + maxWId = item->getId(); + } + auto range = item->getRangeString(); + if (range.getValid() && range.getUnlimited()) { + if (rightMostName != 0) { + std::string eStr("Multiple rightMost workers name="); + eStr += rightMostName + " name=" + item->getId(); + LOGS(_log, LOG_LVL_ERROR, "_assignNeighborIfNeeded " << eStr); + throw LoaderMsgErr(ERR_LOC, eStr); + } + rightMostName = item->getId(); + rightMostItem = item; + } + } + if (rightMostItem == nullptr) { + LOGS(_log, LOG_LVL_WARN, funcName << " no rightmost worker found when expected."); + return; + } + double avg = sum/(double)(activeList.size()); + LOGS(_log, LOG_LVL_INFO, "max=" << max << " maxWId=" << maxWId << " avg=" << avg); + if (avg > getMaxKeysPerWorker()) { + // Assign a neighbor to the rightmost worker, if there are any unused nodes. + // TODO Probably better to assign a new neighbor next to the node with the most recent activity. + // but that's much more complicated. + LOGS(_log, LOG_LVL_INFO, "ADDING WORKER avg=" << avg); + auto inactiveItem = inactiveList.front(); + if (inactiveItem == nullptr) { + throw LoaderMsgErr(ERR_LOC,"_assignNeighborIfNeeded unexpected inactiveList nullptr"); + } + _addingWorkerId = inactiveItem->getId(); + // Sequence of events goes something like + // 1) left item gets message from master that it is getting a right neighbor, writes it down. + // 2) Right item get message from master that it is getting a left neighbor, writes it down. + // 3) CentralWorker::_monitor() on the left node(rightmostItem) connects to the right + // node(inactiveItem), ranges are setup and shifts are started. + // 4) When message received from the new worker saying that it has a valid range, + // set _addingWorkerId to 0. This check happens earlier in this function. + // + // Steps 1 and 2 + rightMostItem->setRightNeighbor(inactiveItem); + inactiveItem->setLeftNeighbor(rightMostItem); + } +} + +MWorkerListItem::Ptr CentralMaster::getWorkerWithId(uint32_t id) { + return _mWorkerList->getWorkerWithId(id); +} + + +void CentralMaster::reqWorkerKeysInfo(uint64_t msgId, std::string const& targetIp, short targetPort, + std::string const& ourHostName, short ourPort) { + LoaderMsg reqMsg(LoaderMsg::WORKER_KEYS_INFO_REQ, msgId, ourHostName, ourPort); + BufferUdp data; + reqMsg.appendToData(data); + try { + sendBufferTo(targetIp, targetPort, data); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralMaster::reqWorkerKeysInfo boost system_error=" << e.what() << + " msgId=" << msgId << " tIp=" << targetIp << " tPort=" << targetPort << + " ourHost=" << ourHostName << " ourPort=" << ourPort); + } +} + +}}} // namespace lsst::qserv::loader diff --git a/core/modules/loader/CentralMaster.h b/core/modules/loader/CentralMaster.h new file mode 100644 index 0000000000..24a934d8e4 --- /dev/null +++ b/core/modules/loader/CentralMaster.h @@ -0,0 +1,102 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_CENTRALMASTER_H +#define LSST_QSERV_LOADER_CENTRALMASTER_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/Central.h" +#include "loader/MasterConfig.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +/// Central Master is the central element of the master. It maintains a DoList +/// and a list of all workers including their addresses, key ranges, and number of keys +/// on each worker. The authoritative ranges come from the workers. The ranges in the +/// master's list may be out of date but the system should handle it gracefully. +/// +/// Workers register with the master when they start and are inactive until the master +/// gives them a valid range or a neighbor. The first worker activated has a range +/// covering all possible keys. +class CentralMaster : public Central { +public: + /// Base class basic constructor, copy constructor, and operator= set to delete. + CentralMaster(boost::asio::io_service& ioService_, std::string const& masterHostName_, + MasterConfig const& cfg) + : Central(ioService_, masterHostName_, cfg.getMasterPort(), + cfg.getThreadPoolSize(), cfg.getLoopSleepTime(), cfg.getIOThreads()), + _maxKeysPerWorker(cfg.getMaxKeysPerWorker()) {} + + /// Open the UDP port. This can throw boost::system::system_error. + void startService() override; + + ~CentralMaster() override { _mWorkerList.reset(); } + + void setMaxKeysPerWorker(int val) { _maxKeysPerWorker = val; } + int getMaxKeysPerWorker() const { return _maxKeysPerWorker; } + + void addWorker(std::string const& ip, int udpPort, int tcpPort); ///< Add a new worker to the system. + void updateWorkerInfo(uint32_t workerId, NeighborsInfo const& nInfo, KeyRange const& strRange); + + MWorkerListItem::Ptr getWorkerWithId(uint32_t id); + + MWorkerList::Ptr getWorkerList() const { return _mWorkerList; } + + void reqWorkerKeysInfo(uint64_t msgId, std::string const& targetIp, short targetPort, + std::string const& ourHostName, short ourPort); + + std::string getOurLogId() const override { return "master"; } + + void setWorkerNeighbor(MWorkerListItem::WPtr const& target, int message, uint32_t neighborId); + +private: + /// Upon receiving new worker information, check if an inactive worker should be made active. + void _assignNeighborIfNeeded(uint32_t workerId, MWorkerListItem::Ptr const& wItem); + + std::mutex _assignMtx; ///< Protects critical region where worker's can be set to active. + + std::atomic _maxKeysPerWorker{1000}; // TODO load from config file. + + MWorkerList::Ptr _mWorkerList{std::make_shared(this)}; ///< List of workers. + + std::atomic _firstWorkerRegistered{false}; ///< True when one worker has been activated. + + /// The id of the worker being added. '0' indicates no worker being added. + /// Its value can only be set to non-zero values within _assignNeighborIfNeeded(...). + std::atomic _addingWorkerId{0}; // TODO maybe move _addingWorkerId to MWorkerList. +}; + +}}} // namespace lsst::qserv::loader + + +#endif // LSST_QSERV_LOADER_CENTRALMASTER_H diff --git a/core/modules/loader/CentralWorker.cc b/core/modules/loader/CentralWorker.cc new file mode 100644 index 0000000000..a6511ffa5a --- /dev/null +++ b/core/modules/loader/CentralWorker.cc @@ -0,0 +1,1143 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/CentralWorker.h" + +// system headers +#include + +// third party headers +#include "boost/asio.hpp" + +// qserv headers +#include "loader/BufferUdp.h" +#include "loader/CentralWorkerDoListItem.h" +#include "loader/LoaderMsg.h" +#include "loader/WorkerConfig.h" +#include "proto/loader.pb.h" +#include "proto/ProtoImporter.h" +#include "util/Timer.h" + + +// LSST headers +#include "lsst/log/Log.h" + + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.CentralWorker"); +} + +namespace lsst { +namespace qserv { +namespace loader { + +CentralWorker::CentralWorker(boost::asio::io_service& ioService_, boost::asio::io_context& io_context_, + std::string const& hostName_, WorkerConfig const& cfg) + : CentralFollower(ioService_, hostName_, cfg.getMasterHost(), cfg.getMasterPortUdp(), + cfg.getThreadPoolSize(), cfg.getLoopSleepTime(), cfg.getIOThreads(),cfg.getWPortUdp()), + _tcpPort(cfg.getWPortTcp()), + _ioContext(io_context_), + _recentAddLimit(cfg.getRecentAddLimit()), + _thresholdNeighborShift(cfg.getThresholdNeighborShift()), + _maxKeysToShift(cfg.getMaxKeysToShift()) { +} + + +void CentralWorker::startService() { + _server = std::make_shared(ioService, getHostName(), getUdpPort(), this); + _tcpServer = std::make_shared(_ioContext, _tcpPort, this); + _tcpServer->runThread(); +} + + +CentralWorker::~CentralWorker() { + // Members that contain pointers to this. Deleting while this != null. + // TODO: Wait for reference count to drop to one or less, + // although CentralWorker is never really shutdown. + _tcpServer.reset(); +} + + +std::string CentralWorker::getOurLogId() const { + std::stringstream os; + os << "(w name=" << _ourId << " addr=" << getHostName() << + ":udp=" << getUdpPort() << " tcp=" << _tcpPort << ")"; + return os.str(); +} + +void CentralWorker::startMonitoring() { + CentralFollower::startMonitoring(); + // Add _workerList to _doList so it starts checking new entries. + _centralWorkerDoListItem = std::make_shared(this); + doList->addItem(_centralWorkerDoListItem); +} + + +void CentralWorker::_monitor() { + LOGS(_log, LOG_LVL_INFO, "CentralWorker::_monitor"); + + /// If our id is invalid, try registering with the master. + if (_isOurIdInvalid()) { + _registerWithMaster(); + // Give the master a half second to answer. + usleep(500000); + return; + } + + // If data gets shifted, check everything again as ranges will have + // changed and there may be a lot more data to shift. + bool dataShifted = false; + do { + // TODO Check if we've heard from left neighbor (possibly kill connection if nothing heard???) + + // Check the right neighbor connection, kill and restart if needed. + // Check if data needs to be shifted with the right node + // This mutex is locked for a long time TODO break this up? + std::lock_guard rMtxLG(_rightMtx); + LOGS(_log, LOG_LVL_INFO, "_monitor " << _ourId << + " checking right neighbor " << _neighborRight.getId()); + if (_neighborRight.getId() != 0) { + try { + if (not _neighborRight.getEstablished()) { + LOGS(_log, LOG_LVL_INFO, "_monitor " << _ourId << " trying to connect"); + auto nAddr = _neighborRight.getAddressTcp(); + if (nAddr.ip == "") { + // look up the network address for the rightNeighbor + WWorkerListItem::Ptr nWorker = + getWorkerList()->getWorkerWithId(_neighborRight.getId()); + if (nWorker != nullptr) { + auto addrTcp = nWorker->getTcpAddress(); + auto addrUdp = nWorker->getUdpAddress(); + if (addrTcp.ip.empty() || addrUdp.ip.empty()) { + throw LoaderMsgErr(ERR_LOC, "Missing valid address for neighbor=" + + std::to_string(_neighborRight.getId())); + } + LOGS(_log, LOG_LVL_INFO, "_monitor neighbor right " << + _neighborRight.getId() << " T=" << addrTcp << " U=" << addrUdp); + _neighborRight.setAddressTcp(addrTcp); + _neighborRight.setAddressUdp(addrUdp); + } + } + + LOGS(_log, LOG_LVL_INFO, "_monitor trying to establish TCP connection with " << + _neighborRight.getId() << " " << _neighborRight.getAddressTcp()); + _rightConnect(rMtxLG); // calls _determineRange() while establishing connection + } else { + LOGS(_log, LOG_LVL_INFO, "_monitor " << _ourId << " getting range info"); + if (_determineRange()) { + _rangeChanged = true; + } + } + dataShifted = _shiftIfNeeded(rMtxLG); + } catch (LoaderMsgErr const& ex) { + LOGS(_log, LOG_LVL_ERROR, "_monitor() catching exception " << ex.what()); + _rightDisconnect(rMtxLG, "_monitor msgErr ex"); + } catch (boost::system::system_error const& ex) { + LOGS(_log, LOG_LVL_ERROR, "_monitor() catching boost exception " << ex.what()); + _rightDisconnect(rMtxLG, "_monitor boost ex"); + } + } else { + // If there is a connection, close it. + _rightDisconnect(rMtxLG, "_monitor closing since _neighborRight.getId()=0"); + } + if (_rangeChanged) { + // Send new range to master so all clients and workers can be updated. + _rangeChanged = false; + LOGS(_log, LOG_LVL_INFO, "_monitor updating range with master"); + NetworkAddress masterAddr(getMasterHostName(), getMasterPort()); + _sendWorkerKeysInfo(masterAddr, getNextMsgId()); + } + } while (dataShifted); +} + + +bool CentralWorker::_setOurId(uint32_t id) { + std::lock_guard lck(_ourIdMtx); + if (_ourIdInvalid) { + _ourId = id; + _ourIdInvalid = false; + return true; + } else { + /// TODO add error message, check if _ourId matches id + if (id == 0) { + _masterDisable(); + } else if (id != _ourId) { + LOGS(_log, LOG_LVL_ERROR, "worker=" << _ourId << + " id being changed by master!!! new id=" << id); + } + return false; + } +} + + +uint32_t CentralWorker::getOurId() const { + std::lock_guard lck(_ourIdMtx); + return _ourId; +} + + +void CentralWorker::_masterDisable() { + LOGS(_log, LOG_LVL_INFO, "worker=" << _ourId << + " changed to 0, master shutting this down."); + _ourIdInvalid = true; + // Disconnect from right neighbor. + { + std::lock_guard rMtxLG(_rightMtx); + _rightDisconnect(rMtxLG, "_masterDisable"); + _neighborRight.setId(0); + } + // Disconnect from left neighbor. TODO actively kill the left connection. + _neighborLeft.setId(0); + // TODO invalidate range and _keyValueMap +} + + +bool CentralWorker::_determineRange() { + std::string const funcName("CentralWorker::_determineRange"); + bool rangeChanged = false; + BufferUdp data(2000); + { + data.reset(); + UInt32Element imLeftKind(LoaderMsg::IM_YOUR_L_NEIGHBOR); + imLeftKind.appendToData(data); + // Send information about how many keys on this node and their range. + StringElement strElem; + std::unique_ptr protoWKI = workerKeysInfoBuilder(); + protoWKI->SerializeToString(&(strElem.element)); + UInt32Element bytesInMsg(strElem.transmitSize()); + // Must send the number of bytes in the message so TCP server knows how many bytes to read. + bytesInMsg.appendToData(data); + strElem.appendToData(data); + ServerTcpBase::writeData(*_rightSocket, data); + } + // Get back their basic info + { + data.reset(); + auto msgElem = data.readFromSocket(*_rightSocket, funcName + " - range bytes"); + auto bytesInMsg = std::dynamic_pointer_cast(msgElem); + msgElem = data.readFromSocket(*_rightSocket, funcName + " - range info"); + auto strWKI = std::dynamic_pointer_cast(msgElem); + auto protoItem = strWKI->protoParse(); + if (protoItem == nullptr) { + LOGS(_log, LOG_LVL_ERROR, funcName << " protoItem parse issue!!!!!"); + throw LoaderMsgErr(ERR_LOC, "protoItem parse issue!!!!!"); + } + NeighborsInfo nInfoR; + auto workerId = protoItem->wid(); + nInfoR.keyCount = protoItem->mapsize(); + _neighborRight.setKeyCount(nInfoR.keyCount); // TODO add a timestamp to this data. + nInfoR.recentAdds = protoItem->recentadds(); + proto::WorkerRange protoRange = protoItem->range(); + LOGS(_log, LOG_LVL_INFO, funcName << " rightNeighbor workerId=" << workerId << + " keyCount=" << nInfoR.keyCount << " recentAdds=" << nInfoR.recentAdds); + bool valid = protoRange.valid(); + KeyRange rightRange; + if (valid) { + CompositeKey min(protoRange.minint(), protoRange.minstr()); + CompositeKey max(protoRange.maxint(), protoRange.maxstr()); + bool unlimited = protoRange.maxunlimited(); + rightRange.setMinMax(min, max, unlimited); + LOGS(_log, LOG_LVL_INFO, funcName << " rightRange=" << rightRange); + _neighborRight.setRange(rightRange); + // Adjust our max range given the the right minimum information. + // Our maximum value is up to but not including the right minimum. + { + std::lock_guard lckMap(_idMapMtx); + auto origMax = _keyRange.getMax(); + auto origUnlim = _keyRange.getUnlimited(); + // Can't be unlimited anymore as there is a right neighbor. + _keyRange.setMax(min, false); + if (origUnlim != _keyRange.getUnlimited() || + (!origUnlim && origMax != _keyRange.getMax())) { + rangeChanged = true; + } + } + } + proto::Neighbor protoLeftNeigh = protoItem->left(); + nInfoR.neighborLeft->update(protoLeftNeigh.wid()); // Not really useful in this case. + proto::Neighbor protoRightNeigh = protoItem->right(); + nInfoR.neighborRight->update(protoRightNeigh.wid()); // This should be our id + if (nInfoR.neighborLeft->get() != getOurId()) { + LOGS(_log, LOG_LVL_ERROR, "Our (" << getOurId() << + ") right neighbor does not have our name as its left neighbor" ); + } + } + return rangeChanged; +} + + +// must hold _rightMtx before calling +bool CentralWorker::_shiftIfNeeded(std::lock_guard const& rightMtxLG) { + // There should be reasonably recent information from our neighbors. Use that + // and our status to ask the right neighbor to give us entries or we send entries + // to the right neighbor. + // If right connection is not established, return + if (not _neighborRight.getEstablished()) { + LOGS(_log, LOG_LVL_INFO, "_shiftIfNeeded no right neighbor, no shift."); + return false; + } + if (_shiftAsClientInProgress) { + LOGS(_log, LOG_LVL_INFO, "_shiftIfNeeded shift already in progress."); + return false; + } + + // Get local copies of range and map info. + KeyRange range; + size_t mapSize; + { + std::lock_guard lck(_idMapMtx); + range = _keyRange; + mapSize = _keyValueMap.size(); + } + + // If this worker has more keys than the rightNeighbor, consider shifting keys to the right neighbor. + // If this worker has _thresholdAverage more keys than average or _thresholdNeighborShift more keys than the right neighbor + // send enough keys to the right to balance (min 1 key, max _maxShiftKeys, never shift more than 1/3 of our keys) + int rightKeyCount = 0; + KeyRange rightRange; + _neighborRight.getKeyData(rightKeyCount, rightRange); + if (range > rightRange) { + LOGS(_log, LOG_LVL_ERROR, "Right neighbor range is less than ours!!!! our=" << range << " right=" << rightRange); + return false; + } + int keysToShift = 0; + CentralWorker::Direction direction = NONE0; + int sourceSize = 0; + LOGS(_log, LOG_LVL_INFO, "_shiftIfNeeded _monitor thisSz=" << mapSize << " rightSz=" << rightKeyCount); + if (mapSize > rightKeyCount*_thresholdNeighborShift) { // TODO add average across workers check + keysToShift = (mapSize - rightKeyCount)/2; // Try for nearly equal number of keys on each. + direction = TORIGHT1; + sourceSize = mapSize; + } else if (mapSize*_thresholdNeighborShift < rightKeyCount) { // TODO add average across workers check + keysToShift = (rightKeyCount - mapSize)/2; // Try for nearly equal number of keys on each. + direction = FROMRIGHT2; + sourceSize = rightKeyCount; + } else { + LOGS(_log, LOG_LVL_INFO, "No reason to shift."); + return false; + } + if (keysToShift > _maxKeysToShift) keysToShift = _maxKeysToShift; + if (keysToShift > sourceSize/3) keysToShift = sourceSize/3; + if (keysToShift < 1) { + LOGS(_log, LOG_LVL_INFO, "Worker doesn't have enough keys to shift."); + return false; + } + _shiftAsClientInProgress = true; + LOGS(_log, LOG_LVL_INFO, "shift dir(TO1 FROM2)=" << direction << " keys=" << keysToShift << + " szThis=" << mapSize << " szRight=" << rightKeyCount); + _shift(direction, keysToShift); + return true; +} + + +void CentralWorker::_shift(Direction direction, int keysToShift) { + std::string const fName("CentralWorker::_shift"); + LOGS(_log, LOG_LVL_DEBUG, fName); + if (direction == FROMRIGHT2) { + BufferUdp data(1000000); + // Construct a message asking for keys to shift (it will shift its lowest keys, which will be our highest keys) + proto::KeyShiftRequest protoKeyShiftRequest; + protoKeyShiftRequest.set_keystoshift(keysToShift); + { + StringElement keyShiftReq; + protoKeyShiftRequest.SerializeToString(&(keyShiftReq.element)); + // Send the message kind, followed by the transmit size, and then the protobuffer. + UInt32Element kindShiftFromRight(LoaderMsg::SHIFT_FROM_RIGHT); + UInt32Element bytesInMsg(keyShiftReq.transmitSize()); + BufferUdp data(kindShiftFromRight.transmitSize() + bytesInMsg.transmitSize() + keyShiftReq.transmitSize()); + kindShiftFromRight.appendToData(data); + bytesInMsg.appendToData(data); + keyShiftReq.appendToData(data); + LOGS(_log, LOG_LVL_INFO, fName << " FROMRIGHT " << keysToShift); + ServerTcpBase::writeData(*_rightSocket, data); + } + // Wait for the KeyList response + { + data.reset(); + auto msgElem = data.readFromSocket(*_rightSocket, + fName + " waiting for FROMRIGHT KeyList"); + auto keyListElem = std::dynamic_pointer_cast(msgElem); + if (keyListElem == nullptr) { + throw LoaderMsgErr(ERR_LOC, fName +" FROMRIGHT failure to get KeyList"); + } + auto protoKeyList = keyListElem->protoParse(); + if (protoKeyList == nullptr) { + throw LoaderMsgErr(ERR_LOC, fName + " FROMRIGHT failure to parse KeyList size=" + + std::to_string(keyListElem->element.size())); + } + + // TODO This is very similar to code in TcpBaseConnection::_handleShiftToRight and they should be merged. + int sz = protoKeyList->keypair_size(); + std::vector keyList; + for (int j=0; j < sz; ++j) { + proto::KeyInfo const& protoKI = protoKeyList->keypair(j); + ChunkSubchunk chSub(protoKI.chunk(), protoKI.subchunk()); + keyList.push_back(std::make_pair(CompositeKey(protoKI.keyint(), protoKI.keystr()), chSub)); + } + insertKeys(keyList, false); + } + // Send received message + data.reset(); + UInt32Element elem(LoaderMsg::SHIFT_FROM_RIGHT_RECEIVED); + elem.appendToData(data); + ServerTcpBase::writeData(*_rightSocket, data); + LOGS(_log, LOG_LVL_INFO, fName << " direction=" << direction << " keys=" << keysToShift); + + } else if (direction == TORIGHT1) { + LOGS(_log, LOG_LVL_INFO, fName << " TORIGHT " << keysToShift); + // TODO this is very similar to CentralWorker::buildKeyList() and should be merged with that. + // Construct a message with that many keys and send it (sending the highest keys) + proto::KeyList protoKeyList; + protoKeyList.set_keycount(keysToShift); + CompositeKey minKey = CompositeKey::minValue; // smallest key is sent to right neighbor + { + std::lock_guard lck(_idMapMtx); + if (not _transferListToRight.empty()) { + throw LoaderMsgErr(ERR_LOC, fName + " _transferList not empty"); + } + for (int j=0; j < keysToShift && _keyValueMap.size() > 1; ++j) { + auto iter = _keyValueMap.end(); + --iter; // rbegin() returns a reverse iterator which doesn't work with erase(). + _transferListToRight.push_back(std::make_pair(iter->first, iter->second)); + proto::KeyInfo* protoKI = protoKeyList.add_keypair(); + minKey = iter->first; + protoKI->set_keyint(minKey.kInt); + protoKI->set_keystr(minKey.kStr); + protoKI->set_chunk(iter->second.chunk); + protoKI->set_subchunk(iter->second.subchunk); + _keyValueMap.erase(iter); + } + // Adjust our range; + _keyRange.setMax(minKey); + } + StringElement keyList; + protoKeyList.SerializeToString(&(keyList.element)); + // Send the message kind, followed by the transmit size, and then the protobuffer. + UInt32Element kindShiftRight(LoaderMsg::SHIFT_TO_RIGHT); + UInt32Element bytesInMsg(keyList.transmitSize()); + BufferUdp data(kindShiftRight.transmitSize() + bytesInMsg.transmitSize() + keyList.transmitSize()); + if (data.getMaxLength() > TcpBaseConnection::getMaxBufSize()) { + std::string errMsg = fName + " SHIFT_TO_RIGHT FAILED message too big sz=" + + std::to_string(data.getMaxLength()) + + " max=" + std::to_string(TcpBaseConnection::getMaxBufSize()); + LOGS(_log, LOG_LVL_ERROR, errMsg); + // This will keep getting thrown and never work, but at least it will show up + // in the logs. + // TODO Maybe create new exception, catch it and halve the number of keys to shift? + throw LoaderMsgErr(ERR_LOC, errMsg); + } + kindShiftRight.appendToData(data); + bytesInMsg.appendToData(data); + keyList.appendToData(data); + + LOGS(_log, LOG_LVL_INFO, fName << " TORIGHT sending keys"); + ServerTcpBase::writeData(*_rightSocket, data); + + // read back LoaderMsg::SHIFT_TO_RIGHT_KEYS_RECEIVED + data.reset(); + auto msgElem = data.readFromSocket(*_rightSocket, + fName + " SHIFT_TO_RIGHT_KEYS_RECEIVED"); + UInt32Element::Ptr received = std::dynamic_pointer_cast(msgElem); + LOGS(_log, LOG_LVL_INFO, fName << " TORIGHT keys were received"); + if (received == nullptr || received->element != LoaderMsg::SHIFT_TO_RIGHT_RECEIVED) { + throw LoaderMsgErr(ERR_LOC, fName +" receive failure"); + } + _finishShiftToRight(); + LOGS(_log, LOG_LVL_INFO, fName + " end direction=" << direction << " keys=" << keysToShift); + } + LOGS(_log, LOG_LVL_INFO, "CentralWorker::_shift DumpKeys " << dumpKeysStr(2)); + _shiftAsClientInProgress = false; +} + + + + + +void CentralWorker::_finishShiftToRight() { + std::lock_guard lck(_idMapMtx); + _transferListToRight.clear(); +} + + +void CentralWorker::finishShiftFromRight() { + std::lock_guard lck(_idMapMtx); + _transferListWithLeft.clear(); +} + + +StringElement::UPtr CentralWorker::buildKeyList(int keysToShift) { + std::string funcName = "CentralWorker::buildKeyList"; + proto::KeyList protoKeyList; + CompositeKey minKey = CompositeKey::minValue; // smallest key sent + CompositeKey maxKey = CompositeKey::minValue; // largest key sent + { + LOGS(_log, LOG_LVL_INFO, funcName); + std::lock_guard lck(_idMapMtx); + if (not _transferListWithLeft.empty()) { + throw LoaderMsgErr(ERR_LOC, "_shift _transferListFromRight not empty"); + } + int maxKeysToShift = _keyValueMap.size()/3; + if (keysToShift > maxKeysToShift) keysToShift = maxKeysToShift; + protoKeyList.set_keycount(keysToShift); + bool firstPass = true; + for (int j=0; j < keysToShift && _keyValueMap.size() > 1; ++j) { + auto iter = _keyValueMap.begin(); + if (firstPass) { + minKey = iter->first; + } + _transferListWithLeft.push_back(std::make_pair(iter->first, iter->second)); + proto::KeyInfo* protoKI = protoKeyList.add_keypair(); + maxKey = iter->first; + protoKI->set_keyint(maxKey.kInt); + protoKI->set_keystr(maxKey.kStr); + protoKI->set_chunk(iter->second.chunk); + protoKI->set_subchunk(iter->second.subchunk); + _keyValueMap.erase(iter); + } + // Adjust our range; + auto iter = _keyValueMap.begin(); + auto minKey = _keyRange.getMin(); + if (minKey != CompositeKey::minValue) { + if (iter->first != minKey) { + _keyRange.setMin(iter->first); + _rangeChanged = true; + } + } + } + StringElement::UPtr keyList(new StringElement()); + protoKeyList.SerializeToString(&(keyList->element)); + return keyList; +} + + +/// Must hold _rightMtx before calling +void CentralWorker::_rightConnect(std::lock_guard const& rightMtxLG) { + std::string const funcName("CentralWorker::_rightConnect"); + if(_rightConnectStatus == VOID0) { + LOGS(_log, LOG_LVL_INFO, funcName + " starting rightConnection"); + _rightConnectStatus = STARTING1; + // Connect to the right neighbor server + AsioTcp::resolver resolver(_ioContext); + auto addr = _neighborRight.getAddressTcp(); + AsioTcp::resolver::results_type endpoints = resolver.resolve(addr.ip, std::to_string(addr.port)); + _rightSocket.reset(new AsioTcp::socket(_ioContext)); + boost::system::error_code ec; + boost::asio::connect(*_rightSocket, endpoints, ec); + if (ec) { + _rightSocket.reset(); + LOGS(_log, LOG_LVL_WARN, "failed to connect to " << _neighborRight.getId() << " " << + addr << " ec=" << ec.value() << ":" << ec.message()); + return; + } + + // Get name from server + BufferUdp data(2000); + { + auto msgElem = data.readFromSocket(*_rightSocket, "CentralWorker::_rightConnect"); + // First element should be UInt32Element with the other worker's name + UInt32Element::Ptr nghName = std::dynamic_pointer_cast(msgElem); + if (nghName == nullptr) { + throw LoaderMsgErr(ERR_LOC, std::string("first element wasn't correct type ") + + msgElem->getStringVal()); + } + + // Check if correct name + if (nghName->element != _neighborRight.getId()) { + throw LoaderMsgErr(ERR_LOC, std::string("wrong name expected ") + + std::to_string(_neighborRight.getId()) + + " got " + std::to_string(nghName->element)); + } + } + + // Send our basic key info so ranges can be determined. + _determineRange(); + + _rightConnectStatus = ESTABLISHED2; + LOGS(_log, LOG_LVL_INFO, funcName + " established rightConnection"); + _neighborRight.setEstablished(true); + } +} + + +void CentralWorker::setNeighborInfoLeft(uint32_t wId, int keyCount, KeyRange const& range) { + if (wId != _neighborLeft.getId()) { + LOGS(_log, LOG_LVL_ERROR, "disconnecting left since setNeighborInfoLeft wId(" << wId << + ") != neighborLeft.name(" << _neighborLeft.getId() << ")"); + _neighborLeft.setEstablished(false); + return; + } + _neighborLeft.setKeyCount(keyCount); + _neighborLeft.setRange(range); + _neighborLeft.setEstablished(true); +} + + +/// Must hold _rightMtx before calling +void CentralWorker::_rightDisconnect(std::lock_guard const& lg, std::string const& note) { + LOGS(_log, LOG_LVL_INFO, "CentralWorker::_rightDisconnect " << note); + if (_rightSocket != nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralWorker::_rightDisconnect disconnecting"); + _rightSocket->shutdown(boost::asio::ip::tcp::socket::shutdown_both); + _rightSocket->close(); + _neighborRight.setEstablished(false); + } + _rightConnectStatus = VOID0; + _cancelShiftsWithRightNeighbor(); +} + + +void CentralWorker::_cancelShiftsWithRightNeighbor() { + // Client side of connection, was sending largest keys right. + // If keys were being shifted from right, this node's map is still intact. + LOGS(_log, LOG_LVL_DEBUG, "_cancelShiftsWithRightNeighbor"); + std::lock_guard lck(_idMapMtx); + if (_shiftAsClientInProgress.exchange(false)) { + LOGS(_log, LOG_LVL_WARN, "Canceling shiftToRight neighbor"); + // Restore the transfer list to the id map + for (auto&& elem:_transferListToRight) { + auto res = _keyValueMap.insert(std::make_pair(elem.first, elem.second)); + if (not res.second) { + LOGS(_log, LOG_LVL_WARN, "_cancelShiftsRightNeighbor Possible duplicate " << + elem.first << ":" << elem.second); + } + } + _transferListToRight.clear(); + // Leave the reduced range until fixed by our right neighbor. + } +} + + +void CentralWorker::cancelShiftsWithLeftNeighbor() { + // Server side of connection, was sending smallest keys left. + // If keys were being transfered from the left node, this node's map is still intact. + LOGS(_log, LOG_LVL_WARN, "cancelShiftsWithLeftNeighbor"); + std::lock_guard lck(_idMapMtx); + if (not _transferListWithLeft.empty()) { + // Restore the transfer list to the id map + for (auto&& elem:_transferListWithLeft) { + auto res = _keyValueMap.insert(std::make_pair(elem.first, elem.second)); + if (not res.second) { + LOGS(_log, LOG_LVL_WARN, "_cancelShiftsRightNeighbor Possible duplicate " << + elem.first << ":" << elem.second); + } + } + _transferListWithLeft.clear(); + + // Fix the bottom of the range. + if (_keyRange.getMin() != CompositeKey::minValue) { + _keyRange.setMin(_keyValueMap.begin()->first); + } + } +} + + +void CentralWorker::checkForThisWorkerValues(uint32_t wId, std::string const& ip, + int portUdp, int portTcp, KeyRange& strRange) { + // If the address matches ours, check the name. + if (getHostName() == ip && getUdpPort() == portUdp) { + if (_isOurIdInvalid()) { + LOGS(_log, LOG_LVL_INFO, "Setting our name " << wId); + _setOurId(wId); + } else if (getOurId() != wId) { + LOGS(_log, LOG_LVL_ERROR, "Our wId doesn't match address from master! wId=" << + getOurId() << " from master=" << wId); + } + + // It is this worker. If there is a valid range in the message and our range is not valid, + // take the range given as our own. + if (strRange.getValid()) { + std::lock_guard lckM(_idMapMtx); + if (not _keyRange.getValid()) { + LOGS(_log, LOG_LVL_INFO, "Setting our range " << strRange); + _keyRange.setMinMax(strRange.getMin(), strRange.getMax(), strRange.getUnlimited()); + } + } + } + +} + + +KeyRange CentralWorker::updateRangeWithLeftData(KeyRange const& leftNeighborRange) { + // Update our range with data from our left neighbor. Our min is their max. + // If our range is invalid + // our min is their max incremented (stringRange increment function) + // if their max is unlimited, our max becomes unlimited + // else max = increment(min) + // send range to master + // return our new range + KeyRange newLeftNeighborRange(leftNeighborRange); + { + std::unique_lock lck(_idMapMtx); + if (not _keyRange.getValid()) { + // Our range has not been set, so base it on the range of the left neighbor. + auto min = KeyRange::increment(leftNeighborRange.getMax()); + auto max = min; + _keyRange.setMinMax(min, max, leftNeighborRange.getUnlimited()); + newLeftNeighborRange.setMax(max, false); + } else { + // Our range is valid already, it should be > than the left neighbor range. + if (_keyRange < leftNeighborRange) { + LOGS(_log, LOG_LVL_ERROR, "LeftNeighborRange(" << leftNeighborRange << + ") is greater than our range(" << _keyRange << ")"); + // TODO corrective action? + } + // The left neighbor's max should be the minimum value in our keymap, unless the + // map is empty. + if (_keyValueMap.empty()) { + // Don't do anything to left neighbor range. + } else { + auto min = _keyValueMap.begin()->first; + _keyRange.setMin(min); + newLeftNeighborRange.setMax(min, false); + } + } + } + + return newLeftNeighborRange; +} + + +bool CentralWorker::workerKeyInsertReq(LoaderMsg const& inMsg, BufferUdp::Ptr const& data) { + StringElement::Ptr sData = std::dynamic_pointer_cast( + MsgElement::retrieve(*data, " CentralWorker::workerKeyInsertReq")); + if (sData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralWorker::workerKeyInsertReq Failed to read list element"); + return false; + } + auto protoData = sData->protoParse(); + if (protoData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralWorker::workerKeyInsertReq Failed to parse list"); + return false; + } + + // TODO move this to another thread + _workerKeyInsertReq(inMsg, protoData); + return true; +} + + +void CentralWorker::_workerKeyInsertReq(LoaderMsg const& inMsg, std::unique_ptr& protoBuf) { + std::unique_ptr protoData(std::move(protoBuf)); + + // Get the source of the request + proto::LdrNetAddress protoAddr = protoData->requester(); + NetworkAddress nAddr(protoAddr.ip(), protoAddr.udpport()); + + proto::KeyInfo protoKeyInfo = protoData->keyinfo(); + CompositeKey key(protoKeyInfo.keyint(), protoKeyInfo.keystr()); + ChunkSubchunk chunkInfo(protoKeyInfo.chunk(), protoKeyInfo.subchunk()); + + /// see if the key should be inserted into our map + std::unique_lock lck(_idMapMtx); + auto min = _keyRange.getMin(); + auto leftAddress = _neighborLeft.getAddressUdp(); + auto rightAddress = _neighborRight.getAddressUdp(); + if (_keyRange.isInRange(key)) { + // insert into our map + auto res = _keyValueMap.insert(std::make_pair(key, chunkInfo)); + lck.unlock(); + if (not res.second) { + // Element already found, check file id and row number. Bad if not the same. + // TODO HIGH send back duplicate key mismatch message to the original requester and return + } + LOGS(_log, LOG_LVL_INFO, "Key inserted=" << key << "(" << chunkInfo << ")"); + // TODO Send this item to the keyLogger (which would then send KEY_INSERT_COMPLETE back to the requester), + // for now this function will send the message back for proof of concept. + LoaderMsg msg(LoaderMsg::KEY_INSERT_COMPLETE, inMsg.msgId->element, getHostName(), getUdpPort()); + BufferUdp msgData; + msg.appendToData(msgData); + // protoKeyInfo should still be the same + proto::KeyInfo protoReply; + protoReply.set_keyint(key.kInt); + protoReply.set_keystr(key.kStr); + protoReply.set_chunk(chunkInfo.chunk); + protoReply.set_subchunk(chunkInfo.subchunk); + StringElement strElem; + protoReply.SerializeToString(&(strElem.element)); + strElem.appendToData(msgData); + LOGS(_log, LOG_LVL_INFO, "sending complete " << key << " to " << nAddr << " from " << _ourId); + try { + sendBufferTo(nAddr.ip, nAddr.port, msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_workerKeyInsertReq boost system_error=" << e.what() << + " msg=" << inMsg); + } + } else { + lck.unlock(); + // Find the target range in the list and send the request there + auto targetWorker = getWorkerList()->findWorkerForKey(key); + if (targetWorker != nullptr && targetWorker->getId() != _ourId) { + _forwardKeyInsertRequest(targetWorker->getUdpAddress(), inMsg, protoData); + } else { + // Send request to left or right neighbor + if (key < min && leftAddress.ip != "") { + _forwardKeyInsertRequest(leftAddress, inMsg, protoData); + } else if (key > min && rightAddress.ip != "") { + _forwardKeyInsertRequest(rightAddress, inMsg, protoData); + } + } + } +} + + +void CentralWorker::_forwardKeyInsertRequest(NetworkAddress const& targetAddr, LoaderMsg const& inMsg, + std::unique_ptr& protoData) { + // Aside from hops, the proto buffer should be the same. + proto::KeyInfo protoKeyInfo = protoData->keyinfo(); + CompositeKey key(protoKeyInfo.keyint(), protoKeyInfo.keystr()); + // The proto buffer should be the same, just need a new message. + int hops = protoData->hops() + 1; + if (hops > 4) { // TODO replace magic number with variable set via config file. + LOGS(_log, LOG_LVL_WARN, "Too many hops, dropping insert request hops=" << hops << " key=" << key); + return; + } + LOGS(_log, LOG_LVL_INFO, "Forwarding key insert hops=" << hops << " key=" << key); + LoaderMsg msg(LoaderMsg::KEY_INSERT_REQ, inMsg.msgId->element, getHostName(), getUdpPort()); + BufferUdp msgData; + msg.appendToData(msgData); + + StringElement strElem; + protoData->SerializeToString(&(strElem.element)); + strElem.appendToData(msgData); + try { + sendBufferTo(targetAddr.ip, targetAddr.port, msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_forwardKeyInsertRequest boost system_error=" << e.what() << + " tAddr=" << targetAddr << " inMsg=" << inMsg); + } +} + + +bool CentralWorker::workerKeyInfoReq(LoaderMsg const& inMsg, BufferUdp::Ptr const& data) { + LOGS(_log, LOG_LVL_DEBUG, "CentralWorker::workerKeyInfoReq"); + StringElement::Ptr sData = std::dynamic_pointer_cast( + MsgElement::retrieve(*data, " CentralWorker::workerKeyInfoReq ")); + if (sData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralWorker::workerKeyInfoReq Failed to read list element"); + return false; + } + auto protoData = sData->protoParse(); + if (protoData == nullptr) { + LOGS(_log, LOG_LVL_WARN, "CentralWorker::workerKeyInfoReq Failed to parse list"); + return false; + } + + // TODO move this to another thread + _workerKeyInfoReq(inMsg, protoData); + return true; +} + + +void CentralWorker::_workerKeyInfoReq(LoaderMsg const& inMsg, std::unique_ptr& protoBuf) { + std::unique_ptr protoData(std::move(protoBuf)); + + // Get the source of the request + proto::LdrNetAddress protoAddr = protoData->requester(); + NetworkAddress nAddr(protoAddr.ip(), protoAddr.udpport()); + + proto::KeyInfo protoKeyInfo = protoData->keyinfo(); + CompositeKey key(protoKeyInfo.keyint(), protoKeyInfo.keystr()); + + /// see if the key is in our map + std::unique_lock lck(_idMapMtx); + if (_keyRange.isInRange(key)) { + LOGS(_log, LOG_LVL_INFO, "CentralWorker::_workerKeyInfoReq " << _ourId << " looking for key=" << key); + // check out map + auto iter = _keyValueMap.find(key); + lck.unlock(); + + // Key found or not, message will be returned. + LoaderMsg msg(LoaderMsg::KEY_LOOKUP, inMsg.msgId->element, getHostName(), getUdpPort()); + BufferUdp msgData; + msg.appendToData(msgData); + proto::KeyInfo protoReply; + protoReply.set_keyint(key.kInt); + protoReply.set_keystr(key.kStr); + if (iter == _keyValueMap.end()) { + // key not found message. + protoReply.set_chunk(0); + protoReply.set_subchunk(0); + protoReply.set_success(false); + LOGS(_log, LOG_LVL_INFO, "Key info not found key=" << key); + } else { + // key found message. + auto elem = iter->second; + protoReply.set_chunk(elem.chunk); + protoReply.set_subchunk(elem.subchunk); + protoReply.set_success(true); + LOGS(_log, LOG_LVL_INFO, "Key info lookup key=" << key << + " (" << protoReply.chunk() << ", " << protoReply.subchunk() << ")"); + } + StringElement strElem; + protoReply.SerializeToString(&(strElem.element)); + strElem.appendToData(msgData); + LOGS(_log, LOG_LVL_INFO, "sending key lookup " << key << " to " << nAddr << " from " << _ourId); + try { + sendBufferTo(nAddr.ip, nAddr.port, msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_workerKeyInfoReq boost system_error=" << e.what() << + " inMsg=" << inMsg); + } + } else { + // Find the target range in the list and send the request there + auto targetWorker = getWorkerList()->findWorkerForKey(key); + if (targetWorker == nullptr) { + LOGS(_log, LOG_LVL_INFO, "CentralWorker::_workerKeyInfoReq " << _ourId << + " could not forward key=" << key); + // TODO HIGH forward request to neighbor in case it was in recent shift. + return; + } + LOGS(_log, LOG_LVL_INFO, "CentralWorker::_workerKeyInfoReq " << _ourId << + " forwarding key=" << key << " to " << *targetWorker); + _forwardKeyInfoRequest(targetWorker, inMsg, protoData); + } +} + + +bool CentralWorker::workerWorkerSetRightNeighbor(LoaderMsg const& inMsg, BufferUdp::Ptr const& data) { + auto msgElem = MsgElement::retrieve(*data, "CentralWorker::workerWorkerSetRightNeighbor"); + UInt32Element::Ptr neighborName = std::dynamic_pointer_cast(msgElem); + if (neighborName == nullptr) { + return false; + } + + LOGS(_log, LOG_LVL_INFO, "workerWorkerSetRightNeighbor ourName=" << _ourId << " rightN=" << neighborName->element); + // Just setting the name, so it can stay here. See CentralWorker::_monitor(), which establishes/maintains connections. + _neighborRight.setId(neighborName->element); + return true; +} + + +bool CentralWorker::workerWorkerSetLeftNeighbor(LoaderMsg const& inMsg, BufferUdp::Ptr const& data) { + auto msgElem = MsgElement::retrieve(*data, "CentralWorker::workerWorkerSetLeftNeighbor"); + UInt32Element::Ptr neighborName = std::dynamic_pointer_cast(msgElem); + if (neighborName == nullptr) { + return false; + } + + LOGS(_log, LOG_LVL_INFO, "workerWorkerSetLeftNeighbor ourName=" << _ourId << " leftN=" << neighborName->element); + // TODO move to separate thread + _neighborLeft.setId(neighborName->element); + // Just setting the name. See CentralWorker::_monitor(), which establishes/maintains connections. + return true; +} + + +bool CentralWorker::workerWorkerKeysInfoReq(LoaderMsg const& inMsg, BufferUdp::Ptr const& data) { + // Send a message containing information about the range and number of keys handled by this worker back + // to the sender. Nothing in data + + // TODO move this to another thread + _workerWorkerKeysInfoReq(inMsg); + return true; + +} + + +void CentralWorker::_workerWorkerKeysInfoReq(LoaderMsg const& inMsg) { + // Use the address from inMsg as this kind of request is pointless to forward. + NetworkAddress nAddr(inMsg.senderHost->element, inMsg.senderPort->element); + uint64_t msgId = inMsg.msgId->element; + _sendWorkerKeysInfo(nAddr, msgId); +} + + +void CentralWorker::_sendWorkerKeysInfo(NetworkAddress const& nAddr, uint64_t msgId) { + // Build message containing Range, size of map, number of items added. + LoaderMsg msg(LoaderMsg::WORKER_KEYS_INFO, msgId, getHostName(), getUdpPort()); + BufferUdp msgData; + msg.appendToData(msgData); + std::unique_ptr protoWKI = workerKeysInfoBuilder(); + StringElement strElem; + protoWKI->SerializeToString(&(strElem.element)); + strElem.appendToData(msgData); + LOGS(_log, LOG_LVL_INFO, "sending WorkerKeysInfo name=" << _ourId << + " mapsize=" << protoWKI->mapsize() << " recentAdds=" << protoWKI->recentadds() << + " to " << nAddr); + try { + sendBufferTo(nAddr.ip, nAddr.port, msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_sendWorkerKeysInfo boost system_error=" << e.what() << + " nAddr=" << nAddr << "msgId=" << msgId); + } +} + + +std::unique_ptr CentralWorker::workerKeysInfoBuilder() { + std::unique_ptr protoWKI(new proto::WorkerKeysInfo()); + // Build message containing Range, size of map, number of items added. + // TODO this code is similar to code elsewhere, try to merge it. + KeyRange range; + size_t mapSize; + size_t recentAdds; + { + std::lock_guard lck(_idMapMtx); + range = _keyRange; + mapSize = _keyValueMap.size(); + _removeOldEntries(); + recentAdds = _recentAdds.size(); + } + LOGS(_log, LOG_LVL_INFO, "CentralWorker WorkerKeysInfo a name=" << _ourId << + " keyCount=" << mapSize << " recentAdds=" << recentAdds); + protoWKI->set_wid(_ourId); + protoWKI->set_mapsize(mapSize); + protoWKI->set_recentadds(recentAdds); + proto::WorkerRange *protoRange = protoWKI->mutable_range(); + range.loadProtoRange(*protoRange); + proto::Neighbor *protoLeft = protoWKI->mutable_left(); + protoLeft->set_wid(_neighborLeft.getId()); + proto::Neighbor *protoRight = protoWKI->mutable_right(); + protoRight->set_wid(_neighborRight.getId()); + LOGS(_log, LOG_LVL_INFO, "CentralWorker WorkerKeysInfo b name=" << _ourId << + " keyCount=" << mapSize << " recentAdds=" << recentAdds); + return protoWKI; +} + + +// TODO This looks a lot like the other _forward*** functions, try to combine them. +void CentralWorker::_forwardKeyInfoRequest(WWorkerListItem::Ptr const& target, LoaderMsg const& inMsg, + std::unique_ptr const& protoData) { + // The proto buffer should be the same, just need a new message. + LoaderMsg msg(LoaderMsg::KEY_LOOKUP_REQ, inMsg.msgId->element, getHostName(), getUdpPort()); + BufferUdp msgData; + msg.appendToData(msgData); + + StringElement strElem; + protoData->SerializeToString(&(strElem.element)); + strElem.appendToData(msgData); + + auto nAddr = target->getUdpAddress(); + try { + sendBufferTo(nAddr.ip, nAddr.port, msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_forwardKeyInfoRequest boost system_error=" << e.what() << + " target=" << target << " inMsg=" << inMsg); + } +} + + +void CentralWorker::_registerWithMaster() { + LoaderMsg msg(LoaderMsg::MAST_WORKER_ADD_REQ, getNextMsgId(), getHostName(), getUdpPort()); + BufferUdp msgData; + msg.appendToData(msgData); + // create the proto buffer + lsst::qserv::proto::LdrNetAddress protoBuf; + protoBuf.set_ip(getHostName()); + protoBuf.set_udpport(getUdpPort()); + protoBuf.set_tcpport(getTcpPort()); + + StringElement strElem; + protoBuf.SerializeToString(&(strElem.element)); + strElem.appendToData(msgData); + + try { + sendBufferTo(getMasterHostName(), getMasterPort(), msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralWorker::_registerWithMaster boost system_error=" << e.what()); + } +} + + +void CentralWorker::testSendBadMessage() { + uint16_t kind = 60200; + LoaderMsg msg(kind, getNextMsgId(), getHostName(), getUdpPort()); + LOGS(_log, LOG_LVL_INFO, "testSendBadMessage msg=" << msg); + BufferUdp msgData(128); + msg.appendToData(msgData); + try { + sendBufferTo(getMasterHostName(), getMasterPort(), msgData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "CentralWorker::testSendBadMessage boost system_error=" << e.what()); + throw e; // This would not be the expected error, re-throw so it is noticed. + } +} + + +void CentralWorker::_removeOldEntries() { + // _idMapMtx must be held when this is called. + auto now = std::chrono::system_clock::now(); + auto then = now - _recentAddLimit; + while (_recentAdds.size() > 0 && _recentAdds.front() < then) { + _recentAdds.pop_front(); + } +} + + +void CentralWorker::insertKeys(std::vector const& keyList, bool mustSetMin) { + std::unique_lock lck(_idMapMtx); + auto maxKey = _keyRange.getMax(); + bool maxKeyChanged = false; + for (auto&& elem:keyList) { + auto const& key = elem.first; + auto res = _keyValueMap.insert(std::make_pair(key, elem.second)); + if (key > maxKey) { + maxKey = key; + maxKeyChanged = true; + } + if (not res.second) { + LOGS(_log, LOG_LVL_WARN, "insertKeys Possible duplicate " << + elem.first << ":" << elem.second); + } + } + + // On all nodes except the left most, the minimum should be reset. + if (mustSetMin && _keyValueMap.size() > 0) { + auto minKeyPair = _keyValueMap.begin(); + _keyRange.setMin(minKeyPair->first); + } + + if (maxKeyChanged) { + // if unlimited is false, range will be slightly off until corrected by the right neighbor. + bool unlimited = _keyRange.getUnlimited(); + _keyRange.setMax(maxKey, unlimited); + } +} + + +std::string CentralWorker::dumpKeysStr(unsigned int count) { + std::stringstream os; + std::lock_guard lck(_idMapMtx); + os << "name=" << getOurId() << " count=" << _keyValueMap.size() << " range(" + << _keyRange << ") pairs: "; + + if (count < 1 || _keyValueMap.size() < count*2) { + for (auto&& elem:_keyValueMap) { + os << elem.first << "{" << elem.second << "} "; + } + } else { + auto iter = _keyValueMap.begin(); + for (size_t j=0; j < count && iter != _keyValueMap.end(); ++iter, ++j) { + os << iter->first << "{" << iter->second << "} "; + } + os << " ... "; + auto rIter = _keyValueMap.rbegin(); + for (size_t j=0; j < count && rIter != _keyValueMap.rend(); ++rIter, ++j) { + os << rIter->first << "{" << rIter->second << "} "; + } + + } + return os.str(); +} +}}} // namespace lsst::qserv::loader diff --git a/core/modules/loader/CentralWorker.h b/core/modules/loader/CentralWorker.h new file mode 100644 index 0000000000..b11a8e4dcf --- /dev/null +++ b/core/modules/loader/CentralWorker.h @@ -0,0 +1,266 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_CENTRAL_WORKER_H +#define LSST_QSERV_LOADER_CENTRAL_WORKER_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/CentralFollower.h" +#include "loader/DoList.h" +#include "loader/Neighbor.h" +#include "loader/ServerTcpBase.h" +#include "loader/WorkerConfig.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +class CentralWorkerDoListItem; + +/// This class is central to the worker. In addition +/// to maintaining lists of other workers it maintains a key-value +/// store over a range of keys. The range can change over time +/// as keys are shifted between this worker and its left and +/// right neighbors. It connects to its neighbors using TCP and +/// informs the master of its current key range using UDP. +/// Key-value lookups and inserts are done using UDP. +/// Workers will attempt to forward key lookups and inserts +/// to the correct worker when the key is not in this worker's +/// range. +class CentralWorker : public CentralFollower { +public: + typedef std::pair CompKeyPair; + + enum SocketStatus { + VOID0 = 0, + STARTING1, + ESTABLISHED2 + }; + + enum Direction { + NONE0 = 0, + TORIGHT1, + FROMRIGHT2 + }; + + CentralWorker(boost::asio::io_service& ioService, boost::asio::io_context& io_context_, + std::string const& hostName_, WorkerConfig const& cfg); + + /// Open the UDP and TCP ports and start monitoring. This can throw boost::system::system_error. + void startService() override; + void startMonitoring() override; + + ~CentralWorker() override; + + int getTcpPort() const override { return _tcpPort; } + uint32_t getOurId() const; + + /// Insert the keys in keyList into _keyValueMap, adjusting ranges + /// as needed. + /// @parameter mustSetMin should be set true if this is not the left + /// most worker. It causes the minimum value to + /// be set to the smallest key in _keyValueMap. + void insertKeys(std::vector const& keyList, bool mustSetMin); + + /// @Return a list of the smallest keys from _keyValueMap. The keys are removed from + /// from the map. Put keys are also put in _transferList in case the shift fails + /// and they need to be put back into _keyValueMap. + /// TODO add argument for smallest or largest and code to build list from smallest or largest keys. + StringElement::UPtr buildKeyList(int keysToShift); + + ///////////////////////////////////////////////////////////////////////////////// + /// Methods to handle messages received from other servers. + /// 'inMsg' contains information about the originator of a request + /// and the type of message. + /// 'data' contains the message data. + + /// Update our range with data from our left neighbor. + /// Our minimum key is their maximum key(exclusive). + /// @returns what it thinks the range of the left neighbor should be. + KeyRange updateRangeWithLeftData(KeyRange const& strRange); + + /// Receive a request to insert a key value pair. + /// If the key value pair could not be inserted, it tries to forward the request appropriately. + /// @Returns true if the request could be parsed. + bool workerKeyInsertReq(LoaderMsg const& inMsg, BufferUdp::Ptr const& data); + + /// Receive a request to lookup a key value. + /// If the key is not within this worker's range, it tries to forward the request appropriately. + /// @Returns true if the request could be parsed. + bool workerKeyInfoReq(LoaderMsg const& inMsg, BufferUdp::Ptr const& data); + + /// Receive a request for information about this worker's keys, how many key-value pairs are + /// stored and the range of keys the worker is responsible for. + /// @Returns true if the message could be parsed. + bool workerWorkerKeysInfoReq(LoaderMsg const& inMsg, BufferUdp::Ptr const& data); + + /// Receive a message from the master providing the wId of our right neighbor. + bool workerWorkerSetRightNeighbor(LoaderMsg const& inMsg, BufferUdp::Ptr const& data); + + /// Receive a message from the master providing the wId of our left neighbor. + bool workerWorkerSetLeftNeighbor(LoaderMsg const& inMsg, BufferUdp::Ptr const& data); + + std::string getOurLogId() const override; + + std::unique_ptr workerKeysInfoBuilder(); // TODO make private + void setNeighborInfoLeft(uint32_t wId, int keyCount, KeyRange const& range); // TODO make private + + /// @Return a string describing the first and last 'count' keys. count=0 dumps all keys. + std::string dumpKeysStr(unsigned int count); + + /// Called when our right neighbor indicates it is done with a shift FROMRIGHT + void finishShiftFromRight(); + + /// Called when there has been a problem with shifting with the left neighbor and changes + /// to _keyValueMap need to be undone. + void cancelShiftsWithLeftNeighbor(); + + /// Send a bad message for testing purposes. + void testSendBadMessage(); + + friend CentralWorkerDoListItem; + +protected: + void checkForThisWorkerValues(uint32_t wId, std::string const& ip, + int portUdp, int portTcp, KeyRange& strRange) override; +private: + /// Contact the master so it can provide this worker with an id. The master + /// will activate this worker when it is needed at a later time. + void _registerWithMaster(); + + /// @return true if our worker id is not valid. + bool _isOurIdInvalid() const { + std::lock_guard lck(_ourIdMtx); + return _ourIdInvalid; + } + + /// If ourId is invalid, set our id to id. + bool _setOurId(uint32_t id); + + /// Disable this worker. Only to be used if the master has deemed + /// this worker as too unreliable and replaced it. + void _masterDisable(); + + /// This function is run to monitor this worker's status. It is used to + /// register with the master, connect and control shifting with the + /// the right neighbor. + void _monitor(); + + /// Use the information from our right neighbor to set our key range. + bool _determineRange(); + + /// If this worker has significantly more or fewer keys than its right neighbor, + /// shift keys between them to make a more even distribution. + /// @Return true if data was shifted with the right neighbor. + bool _shiftIfNeeded(std::lock_guard const& rightMtxLG); + + /// Attempt to shift keys to or from the right neighbor. + /// @parameter keysToShift is number of keys to shift. + /// @parameter direction is TO or FROM the right neighbor. + void _shift(Direction direction, int keysToShift); + + /// See workerKeyInsertReq(...) + void _workerKeyInsertReq(LoaderMsg const& inMsg, std::unique_ptr& protoBuf); + /// Forward a workerKeyInsertReq to an appropriate worker. + void _forwardKeyInsertRequest(NetworkAddress const& targetAddr, LoaderMsg const& inMsg, + std::unique_ptr& protoData); + + /// See workerKeyInfoReq + void _workerKeyInfoReq(LoaderMsg const& inMsg, std::unique_ptr& protoBuf); + /// Forward a workerKeyInfoReq to an appropriate worker. + void _forwardKeyInfoRequest(WWorkerListItem::Ptr const& target, LoaderMsg const& inMsg, + std::unique_ptr const& protoData); + + /// See workerWorkerKeysInfoReq(...) + void _workerWorkerKeysInfoReq(LoaderMsg const& inMsg); + /// Send information about our keys (range, number of pairs) to 'nAddr'. + void _sendWorkerKeysInfo(NetworkAddress const& nAddr, uint64_t msgId); + + void _removeOldEntries(); ///< remove old entries from _recentAdds + + /// Connect to the right neighbor. Must hold _rightMtx in the lock. + void _rightConnect(std::lock_guard const& rightMtxLG); + ///< Disconnect from the right neighbor. Must hold _rightMtx in the lock. + void _rightDisconnect(std::lock_guard const& rightMtxLG, std::string const& note); + + void _cancelShiftsWithRightNeighbor(); ///< Cancel shifts to/from the right neighbor. + void _finishShiftToRight(); ///< The shift to the right neighbor is complete, cleanup. + + const int _tcpPort; + boost::asio::io_context& _ioContext; + + bool _ourIdInvalid{true}; ///< true until our id has been set by the master. + std::atomic _ourId{0}; ///< id given by the master, 0 is invalid id. + mutable std::mutex _ourIdMtx; ///< protects _ourIdInvalid, _ourId + + KeyRange _keyRange; ///< range for this worker + std::atomic _rangeChanged{false}; + std::map _keyValueMap; + std::deque _recentAdds; ///< track how many keys added recently. + std::chrono::milliseconds _recentAddLimit; ///< After this period of time, additions are no longer recent. + std::mutex _idMapMtx; ///< protects _strRange, _keyValueMap, + ///< _recentAdds, _transferListToRight, _transferListFromRight + + Neighbor _neighborLeft{Neighbor::LEFT}; + Neighbor _neighborRight{Neighbor::RIGHT}; + + ServerTcpBase::Ptr _tcpServer; // For our right neighbor to connect to us. + + std::mutex _rightMtx; + SocketStatus _rightConnectStatus{VOID0}; + std::shared_ptr _rightSocket; + + std::atomic _shiftAsClientInProgress{false}; ///< True when shifting to or from right neighbor. + + /// Shift if a node has % more than it's neighbor. the percentage threshold is expressed + /// as a decimal, so 1.1 would be 10% more than neighbor or 110%. + double _thresholdNeighborShift; + + /// Maximum number of keys to shift in one iteration. 10000 may be reasonable. + /// An iteration would be transfer, insert, and verify range. During the + /// insert phase, the mutex is locked preventing key inserts and lookups. + /// Using smaller values locks the mutex for more periods of time but each + /// period is shorter and lookups can occur during the gaps. + /// Too big a value, and the maps will be paralyzed for a long time during inserts. + /// Too small and shift operations will take significantly longer. + int _maxKeysToShift; + std::vector _transferListToRight; ///< List of items being transfered to right + /// List of items being transfered to our left neighbor. (answering neighbor's FromRight request) + std::vector _transferListWithLeft; + + /// The DoListItem that makes sure _monitor() is run. + std::shared_ptr _centralWorkerDoListItem; +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_CENTRAL_WORKER_H + diff --git a/core/modules/loader/CentralWorkerDoListItem.h b/core/modules/loader/CentralWorkerDoListItem.h new file mode 100644 index 0000000000..6ab99228b8 --- /dev/null +++ b/core/modules/loader/CentralWorkerDoListItem.h @@ -0,0 +1,67 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ + +#ifndef LSST_QSERV_LOADER_CENTRALWORKERDOLISTITEM_H +#define LSST_QSERV_LOADER_CENTRALWORKERDOLISTITEM_H + +// Qserv headers +#include "loader/CentralWorker.h" + +namespace lsst { +namespace qserv { +namespace loader { + +/// This class exists to regularly call the CentralWorker::_monitor() function, which +/// does things like monitor TCP connections and control shifting with the right neighbor. +class CentralWorkerDoListItem : public DoListItem { +public: + CentralWorkerDoListItem() = delete; + explicit CentralWorkerDoListItem(CentralWorker* centralWorker) : _centralWorker(centralWorker) { + setTimeOut(std::chrono::seconds(15)); // TODO: DM-17453 set via config + } + + util::CommandTracked::Ptr createCommand() override { + struct CWMonitorCmd : public util::CommandTracked { + CWMonitorCmd(CentralWorker* centralW) : centralWorker(centralW) {} + void action(util::CmdData*) override { + centralWorker->_monitor(); + } + CentralWorker* centralWorker; + }; + util::CommandTracked::Ptr cmd(std::make_shared(_centralWorker)); + return cmd; + } + +private: + CentralWorker* _centralWorker; +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_CENTRAL_WORKER_DO_LIST_ITEM_H + + + + + diff --git a/core/modules/loader/ClientConfig.cc b/core/modules/loader/ClientConfig.cc new file mode 100644 index 0000000000..66247a281e --- /dev/null +++ b/core/modules/loader/ClientConfig.cc @@ -0,0 +1,62 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "loader/ClientConfig.h" + +// System headers + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "util/ConfigStore.h" +#include "util/ConfigStoreError.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.Config"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +ClientConfig::ClientConfig(util::ConfigStore const& configStore) { + try { + setFromConfig(configStore); + } catch (util::ConfigStoreError const& e) { + throw ConfigErr(ERR_LOC, std::string("ClientConfig ") + e.what()); + } +} + + +std::ostream& ClientConfig::dump(std::ostream &os) const { + os << "(ClientConfig(" << header << ") "; + ConfigBase::dump(os); + os << ")"; + return os; +} + +}}} // namespace lsst::qserv::css + diff --git a/core/modules/loader/ClientConfig.h b/core/modules/loader/ClientConfig.h new file mode 100644 index 0000000000..6256a7e2ab --- /dev/null +++ b/core/modules/loader/ClientConfig.h @@ -0,0 +1,104 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_CLIENTCONFIG_H +#define LSST_QSERV_LOADER_CLIENTCONFIG_H + +// Qserv headers +#include "loader/ConfigBase.h" + +namespace lsst { +namespace qserv { +namespace loader { + +/// A class for reading the configuration file for the client which consists of +/// a collection of key-value pairs and provide access functions for those values. +/// +class ClientConfig : public ConfigBase { +public: + explicit ClientConfig(std::string const& configFileName) + : ClientConfig(util::ConfigStore(configFileName)) {} + + ClientConfig() = delete; + ClientConfig(ClientConfig const&) = delete; + ClientConfig& operator=(ClientConfig const&) = delete; + + std::string getMasterHost() const { return _masterHost->getValue(); } + int getMasterPortUdp() const { return _masterPortUdp->getInt(); } + int getDefWorkerPortUdp() const { return _defWorkerPortUdp->getInt(); } + std::string getDefWorkerHost() const { return _defWorkerHost->getValue(); } + int getClientPortUdp() const { return _clientPortUdp->getInt(); } + int getThreadPoolSize() const { return _threadPoolSize->getInt(); } + int getLoopSleepTime() const { return _loopSleepTime->getInt(); } // TODO: Maybe chrono types for times + int getMaxLookups() const { return _maxLookups->getInt(); } + int getMaxInserts() const { return _maxInserts->getInt(); } + int getMaxRequestSleepTime() const { return _maxRequestSleepTime->getInt(); } + int getIOThreads() const { return _iOThreads->getInt(); } + + std::ostream& dump(std::ostream &os) const override; + + std::string const header{"client"}; +private: + ClientConfig(util::ConfigStore const& configStore); + + /// Master host name + ConfigElement::Ptr _masterHost{ + ConfigElement::create(cfgList, header, "masterHost", ConfigElement::STRING, true)}; + /// Master UDP port + ConfigElement::Ptr _masterPortUdp{ + ConfigElement::create(cfgList, header, "masterPortUdp", ConfigElement::INT, true)}; + /// UDP port for default worker. Reasonable value - 9876 + ConfigElement::Ptr _clientPortUdp{ + ConfigElement::create(cfgList, header, "clientPortUdp", ConfigElement::INT, true)}; + /// Default worker host name + ConfigElement::Ptr _defWorkerHost{ + ConfigElement::create(cfgList, header, "defWorkerHost", ConfigElement::STRING, true)}; + /// Default worker UDP port. Reasonable value - 9876 + ConfigElement::Ptr _defWorkerPortUdp{ + ConfigElement::create(cfgList, header, "defWorkerPortUdp", ConfigElement::INT, true)}; + /// Size of the thread pool. Reasonable value - 10 + ConfigElement::Ptr _threadPoolSize{ + ConfigElement::create(cfgList, header, "threadPoolSize", ConfigElement::INT, true)}; + /// Time spent sleeping between checking elements in the DoList in micro seconds. 100000 + ConfigElement::Ptr _loopSleepTime{ + ConfigElement::create(cfgList, header, "loopSleepTime", ConfigElement::INT, false, "100000")}; + /// Maximum number of lookup requests allowed in the DoList. + ConfigElement::Ptr _maxLookups{ + ConfigElement::create(cfgList, header, "maxLookups", ConfigElement::INT, true)}; + /// Maximum number of insert requests allowed in the DoList. + ConfigElement::Ptr _maxInserts{ + ConfigElement::create(cfgList, header, "maxInserts", ConfigElement::INT, true)}; + /// When reaching maxInserts or maxLookups, sleep this long before trying to add more, + /// in micro seconds. 100000micro = 0.1sec + ConfigElement::Ptr _maxRequestSleepTime{ + ConfigElement::create(cfgList, header, + "maxRequestSleepTime", ConfigElement::INT, false, "100000")}; + /// Number of IO threads the server should run. + ConfigElement::Ptr _iOThreads{ + ConfigElement::create(cfgList, header, "iOThreads", ConfigElement::INT, false, "4")}; +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_CLIENTCONFIG_H diff --git a/core/modules/loader/ClientServer.cc b/core/modules/loader/ClientServer.cc new file mode 100644 index 0000000000..89da87cbfc --- /dev/null +++ b/core/modules/loader/ClientServer.cc @@ -0,0 +1,177 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/ClientServer.h" + +// System headers +#include + +// Qserv headers +#include "loader/CentralClient.h" +#include "loader/LoaderMsg.h" +#include "proto/loader.pb.h" +#include "proto/ProtoImporter.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.ClientServer"); +} + + +namespace lsst { +namespace qserv { +namespace loader { + + +BufferUdp::Ptr ClientServer::parseMsg(BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + LOGS(_log, LOG_LVL_DEBUG, "ClientServer::parseMsg sender " << senderEndpoint << + " data length=" << data->getAvailableWriteLength()); + BufferUdp::Ptr sendData; /// nullptr for empty response. + LoaderMsg inMsg; + inMsg.parseFromData(*data); + LOGS(_log, LOG_LVL_INFO, "ClientServer::parseMsg sender " << senderEndpoint << + " kind=" << inMsg.msgKind->element << " data length=" << data->getAvailableWriteLength()); + switch (inMsg.msgKind->element) { + case LoaderMsg::MSG_RECEIVED: + LOGS(_log, LOG_LVL_WARN, "ClientServer::parseMsg MSG_RECEIVED"); + _msgRecievedHandler(inMsg, data, senderEndpoint); + sendData.reset(); // Never send a response back for one of these, infinite loop. + break; + + case LoaderMsg::KEY_LOOKUP: + LOGS(_log, LOG_LVL_DEBUG, "KEY_LOOK"); + _centralClient->handleKeyLookup(inMsg, data); + break; + + case LoaderMsg::KEY_INSERT_COMPLETE: + LOGS(_log, LOG_LVL_DEBUG, "KEY_INSERT_COMPLETE"); + _centralClient->handleKeyInsertComplete(inMsg, data); + break; + + case LoaderMsg::MAST_WORKER_LIST: + _centralClient->getWorkerList()->workerListReceive(data); + break; + case LoaderMsg::MAST_WORKER_INFO: + _centralClient->workerInfoReceive(data); + break; + // following not expected by client + case LoaderMsg::KEY_INSERT_REQ: + case LoaderMsg::KEY_LOOKUP_REQ: + case LoaderMsg::MAST_INFO: + case LoaderMsg::MAST_INFO_REQ: + case LoaderMsg::MAST_WORKER_LIST_REQ: + case LoaderMsg::MAST_WORKER_INFO_REQ: + case LoaderMsg::MAST_WORKER_ADD_REQ: + // TODO add response for known but unexpected message. + sendData = prepareReplyToMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, + "unexpected Msg Kind"); + break; + + default: + sendData = prepareReplyToMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, + "unknownMsgKind"); + } + + return sendData; +} + + +BufferUdp::Ptr ClientServer::prepareReplyToMsg(boost::asio::ip::udp::endpoint const& senderEndpoint, + LoaderMsg const& inMsg, + int status, std::string const& msgTxt) { + + if (status != LoaderMsg::STATUS_SUCCESS) { + LOGS(_log,LOG_LVL_WARN, "Error response Original from " << senderEndpoint << + " msg=" << msgTxt << " inMsg=" << inMsg.getStringVal()); + } + + LoaderMsg outMsg(LoaderMsg::MSG_RECEIVED, inMsg.msgId->element, getOurHostName(), getOurPort()); + + // create the proto buffer + proto::LdrMsgReceived protoBuf; + protoBuf.set_originalid(inMsg.msgId->element); + protoBuf.set_originalkind(inMsg.msgKind->element); + protoBuf.set_status(LoaderMsg::STATUS_PARSE_ERR); + protoBuf.set_errmsg(msgTxt); + protoBuf.set_dataentries(0); + + StringElement respBuf; + protoBuf.SerializeToString(&(respBuf.element)); + + auto sendData = std::make_shared(); + outMsg.appendToData(*sendData); + respBuf.appendToData(*sendData); + return sendData; +} + + +void ClientServer::_msgRecievedHandler(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + bool success = true; + // This is only really expected for parsing errors. Most responses to + // requests come in as normal messages. + StringElement::Ptr seData = + std::dynamic_pointer_cast(MsgElement::retrieve(*data, "ClientServer::_msgRecievedHandler")); + if (seData == nullptr) { + success = false; + } + + std::unique_ptr protoBuf; + if (success) { + protoBuf = seData->protoParse(); + if (protoBuf == nullptr) success = false; + } + + std::stringstream os; + int status = LoaderMsg::STATUS_PARSE_ERR; + + if (success) { + auto originalId = protoBuf->originalid(); + auto originalKind = protoBuf->originalkind(); + status = protoBuf->status(); + auto errMsg = protoBuf->errmsg(); + os << " sender=" << senderEndpoint << + " id=" << originalId << " kind=" << originalKind << " status=" << status << + " msg=" << errMsg; + } else { + os << " Failed to parse MsgRecieved! sender=" << senderEndpoint; + } + + if (status != LoaderMsg::STATUS_SUCCESS) { + ++_errCount; + LOGS(_log, LOG_LVL_WARN, "MsgRecieved Message sent by this server caused error at its target" << + " errCount=" << _errCount << os.str()); + } else { + // There shouldn't be many of these, unless there's a need to time things. + LOGS(_log, LOG_LVL_INFO, "MsgRecieved " << os.str()); + } +} + + +}}} // namespace lsst:qserv::loader + diff --git a/core/modules/loader/ClientServer.h b/core/modules/loader/ClientServer.h new file mode 100644 index 0000000000..9e6d276e44 --- /dev/null +++ b/core/modules/loader/ClientServer.h @@ -0,0 +1,80 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_CLIENTSERVER_H +#define LSST_QSERV_LOADER_CLIENTSERVER_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/LoaderMsg.h" +#include "loader/ServerUdpBase.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +class CentralClient; + +/// This class implements a UDP server for the client so that message replies can be +/// sent directly to the client instead of passed back through the chain of workers +/// that were queried when looking for the worker that could handle this client's +/// request. +class ClientServer : public ServerUdpBase { +public: + // The base class default constructor, copy constructor, and operator= have been set to delete. + ClientServer(boost::asio::io_service& ioService, std::string const& host, int port, + CentralClient* centralClient) + : ServerUdpBase(ioService, host, port), _centralClient(centralClient) {} + + ~ClientServer() override = default; + + /// Parse enough of an incoming message so it can be passed to the proper handler. + BufferUdp::Ptr parseMsg(BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& endpoint) override; + + /// Build a reply to a message that was received, usually used to handle unknown or unexpected messages. + /// @return a pointer to a buffer with the constructed message. + // TODO shows up in both MasterServer and WorkerServer + BufferUdp::Ptr prepareReplyToMsg(boost::asio::ip::udp::endpoint const& senderEndpoint, + LoaderMsg const& inMsg, + int status, std::string const& msgTxt); + +private: + /// Construct basic replies to unknown and unexpected messages. + void _msgRecievedHandler(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint); + + CentralClient* _centralClient; +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_CLIENTSERVER_H diff --git a/core/modules/loader/CompositeKey.cc b/core/modules/loader/CompositeKey.cc new file mode 100644 index 0000000000..e2a3d26731 --- /dev/null +++ b/core/modules/loader/CompositeKey.cc @@ -0,0 +1,66 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/CompositeKey.h" + +// System headers +#include + + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.CompositeKey"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +CompositeKey const CompositeKey::minValue(0,""); + + +void CompositeKey::dump(std::ostream& os) const { + os << "CKey(" << kInt << ", " << kStr << ")"; +} + + +std::string CompositeKey::dump() const { + std::stringstream os; + dump(os); + return os.str(); +} + + +std::ostream& operator<<(std::ostream& os, CompositeKey const& cKey) { + cKey.dump(os); + return os; +} + + +}}} // namespace lsst::qserv::loader + diff --git a/core/modules/loader/CompositeKey.h b/core/modules/loader/CompositeKey.h new file mode 100644 index 0000000000..b2a7a60df6 --- /dev/null +++ b/core/modules/loader/CompositeKey.h @@ -0,0 +1,98 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_COMPOSITEKEY_H +#define LSST_QSERV_LOADER_COMPOSITEKEY_H + +// system headers +#include +#include +#include +#include + + +namespace lsst { +namespace qserv { +namespace loader { + + +/// A key consisting of an unsigned 64 bit integer and a std::string with support for comparisons. +/// The integer component is compared before the string component. +class CompositeKey { +public: + CompositeKey(uint64_t ki, std::string const& ks) : kInt(ki), kStr(ks) {} + explicit CompositeKey(uint64_t ki) : CompositeKey(ki, "") {} + explicit CompositeKey(std::string const& ks) : CompositeKey(0, ks) {} + CompositeKey(CompositeKey const& ck) : CompositeKey(ck.kInt, ck.kStr) {} + CompositeKey() : CompositeKey(0, "") {} + ~CompositeKey() = default; + + static uint64_t maxIntVal() { return std::numeric_limits::max(); } + + CompositeKey& operator=(CompositeKey const& other) { + if (this != &other) { + kInt = other.kInt; + kStr = other.kStr; + } + return *this; + } + + /// Smallest possible value for a CompositeKey (0,"") + static CompositeKey const minValue; + + bool operator<(CompositeKey const& other) const { + return std::tie(kInt, kStr) < std::tie(other.kInt, other.kStr); + } + + bool operator>(CompositeKey const& other) const { + return other < *this; + } + + bool operator==(CompositeKey const& other) const { + return (kInt == other.kInt) && (kStr == other.kStr); + } + + bool operator!=(CompositeKey const& other) const { + return !(*this == other); + } + + bool operator<=(CompositeKey const& other) const { + return !(*this > other); + } + + bool operator>=(CompositeKey const& other) const { + return !(*this < other); + } + + void dump(std::ostream& os) const; + std::string dump() const ; + + uint64_t kInt; + std::string kStr; +}; + +std::ostream& operator<<(std::ostream& os, CompositeKey const& cKey); + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_COMPOSITEKEY_H diff --git a/core/modules/loader/ConfigBase.cc b/core/modules/loader/ConfigBase.cc new file mode 100644 index 0000000000..b5ecec6628 --- /dev/null +++ b/core/modules/loader/ConfigBase.cc @@ -0,0 +1,188 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "loader/ConfigBase.h" + +// System headers + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "util/ConfigStore.h" +#include "util/ConfigStoreError.h" + +// Third-party headers +#include "boost/lexical_cast.hpp" + + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.Config"); +} + + +namespace lsst { +namespace qserv { +namespace loader { + + +void ConfigElement::setFromConfig(util::ConfigStore const& cfgStore) { + if (_required) { + _value = cfgStore.getRequired(getFullKey()); + } else { + _value = cfgStore.get(getFullKey(), _default); + } +} + + +bool ConfigElement::verifyValueIsOfKind() { + switch (_kind) { + case STRING: + return true; + case INT: + return isInteger(); + case FLOAT: + return isFloat(); + default: + return false; + } +} + + +std::string ConfigElement::getFullKey() const { + if (_header.empty()) return std::string(_key); + return std::string(_header + "." + _key); +} + + +int ConfigElement::getInt() const { + if (_kind != INT) { + throw ConfigErr(ERR_LOC, "getInt called for non-integer " + dump()); + } + return boost::lexical_cast(_value); +} + + +double ConfigElement::getDouble() const { + if (_kind != FLOAT) { + throw ConfigErr(ERR_LOC, "getDouble called for non-float " + dump()); + } + return boost::lexical_cast(_value); +} + + +bool ConfigElement::isInteger() const { + if (_kind != INT) return false; + try { + // lexical cast is more strict than std::stoi() + getInt(); + } catch (boost::bad_lexical_cast const& exc) { + return false; + } + return true; +} + + +bool ConfigElement::isFloat() const { + if (_kind != FLOAT) return false; + try { + getDouble(); + } catch (boost::bad_lexical_cast const& exc) { + return false; + } + return true; +} + + +std::string ConfigElement::kindToStr(Kind kind) { + switch (kind) { + case STRING: + return "STRING"; + case INT: + return "INT"; + case FLOAT: + return "FLOAT"; + default: + return "undefined Kind"; + } +} + + +std::ostream& ConfigElement::dump(std::ostream &os) const { + os << "(key=" << getFullKey() + << " val=" << _value + << " req=" << _required + << " kind=" << kindToStr(_kind) + << " def=" << _default << ")"; + return os; +} + + +std::string ConfigElement::dump() const { + std::ostringstream os; + dump(os); + return os.str(); +} + + +std::ostream& operator<<(std::ostream &os, ConfigElement const& elem) { + return elem.dump(os); +} + + +void ConfigBase::setFromConfig(util::ConfigStore const& configStore) { + for (auto& elem:cfgList) { + elem->setFromConfig(configStore); + if (not elem->verifyValueIsOfKind()) { + throw util::ConfigStoreError("Could not parse " + elem->dump()); + } + } +} + + +std::ostream& ConfigBase::dump(std::ostream &os) const { + os << "(ConfigBase: "; + for (auto&& elem:cfgList) { + os << *elem << " "; + } + os << ")"; + return os; +} + + +std::string ConfigBase::dump() const { + std::stringstream os; + dump(os); + return os.str(); +} + + +std::ostream& operator<<(std::ostream &os, ConfigBase const& cfg) { + return cfg.dump(os); +} + + +}}} // namespace lsst::qserv::loader + + diff --git a/core/modules/loader/ConfigBase.h b/core/modules/loader/ConfigBase.h new file mode 100644 index 0000000000..dac19b282e --- /dev/null +++ b/core/modules/loader/ConfigBase.h @@ -0,0 +1,147 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_CONFIGBASE_H +#define LSST_QSERV_LOADER_CONFIGBASE_H + +// system headers +#include +#include +#include + +// Qserv headers +#include "util/ConfigStore.h" +#include "util/Issue.h" + +namespace lsst { +namespace qserv { +namespace loader { + +class ConfigErr : public util::Issue { +public: + ConfigErr(util::Issue::Context const& ctx, std::string const& message) : + util::Issue(ctx, message) {} +}; + + +/// A class to store the information about a particular configuration +/// file element and allow it to be put on a list. +class ConfigElement { +public: + typedef std::shared_ptr Ptr; + typedef std::vector CfgElementList; + enum Kind { STRING, INT, FLOAT }; // Possibly expand to time types. + + ConfigElement() = delete; + virtual ~ConfigElement() = default; + + /// A factory to create the ConfigElement and add it to 'list'. + static Ptr create(CfgElementList &list, + std::string const& header, std::string const& key, + Kind kind, bool required, + std::string const& default_="") { + Ptr ptr(new ConfigElement(header, key, kind, required, default_)); + list.push_back(ptr); + return ptr; + } + + static std::string kindToStr(Kind kind); + + /// @return the full key including _header and _key. + std::string getFullKey() const; + + std::string getValue() const { return _value; } + + /// @return an integer value. Throws ConfigErr if _kind is not INT + int getInt() const; + + /// @return a double value. Throws ConfigErr if _kind is not FLOAT + double getDouble() const; + + /// Set the _value for this element from 'cfgStore' using getFullKey() as the key. + /// This function can throw util::ConfigStoreError. + void setFromConfig(util::ConfigStore const& cfgStore); + + bool verifyValueIsOfKind(); + + /// @return true if the string parses as an integer. + bool isInteger() const; + + /// @return true if the string parses as a floating type. + bool isFloat() const; + + /// This is only meant for testing. + void setValue(std::string const& val) { _value = val; } + + /// Functions to dump this objects information to a log file. Child classes + /// should only need to provide their own version for dump(std::ostream&). + virtual std::ostream& dump(std::ostream &os) const; + std::string dump() const; + friend std::ostream& operator<<(std::ostream &out, ConfigElement const& elem); + +private: + ConfigElement(std::string const& header, std::string const& key, + Kind kind, bool required, std::string const& default_) + : _header(header), _key(key), _kind(kind), _required(required), _default(default_) {} + + std::string _header; ///< name of the header for this element + std::string _key; ///< name of the key under the header + Kind _kind{STRING}; ///< Kind (type) of value expected + std::string _value; ///< value found in config or default + bool _required{true}; ///< required to be in config. + std::string _default; ///< default value. +}; + + +/// A base class for configuration loading. Child classes define elements +/// expected in the configuration files and provide access functions. +/// +/// The constructor can throw. In most cases this is reasonable as exiting with an +/// error is safer than running with a bad configuration file. In other cases, care +/// needs to be taken. +class ConfigBase { +public: + ConfigBase(ConfigBase const&) = delete; + ConfigBase& operator=(ConfigBase const&) = delete; + + virtual ~ConfigBase() = default; + + virtual std::ostream& dump(std::ostream &os) const; + std::string dump() const; + friend std::ostream& operator<<(std::ostream &out, ConfigBase const& config); + +protected: + ConfigBase() = default; + + /// Set the values for all the elements in cfgList. This can + /// only be meaningfully called after the child class has filled in cfgList. + void setFromConfig(util::ConfigStore const& configStore); + + /// A list of ConfigElements that can be found in the configuration. + ConfigElement::CfgElementList cfgList; +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_CONFIGBASE_H + diff --git a/core/modules/loader/DoList.cc b/core/modules/loader/DoList.cc new file mode 100644 index 0000000000..b27c6a9773 --- /dev/null +++ b/core/modules/loader/DoList.cc @@ -0,0 +1,87 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/DoList.h" + +// System headers +#include + +// Qserv headers +#include "loader/Central.h" +#include "loader/LoaderMsg.h" +#include "proto/loader.pb.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.DoList"); + +//std::atomic limiter(0); // Counter to limit log messages &&& make class member +} + +namespace lsst { +namespace qserv { +namespace loader { + + +void DoList::checkList() { + if (_limiter%1000 == 0) LOGS(_log, LOG_LVL_DEBUG, "DoList::checkList " << _limiter); + ++_limiter; + std::lock_guard lock(_listMtx); + { + std::lock_guard lockAddList(_addListMtx); + // Move all the items in _addList to _list. _addList is emptied + _list.splice(_list.end(), _addList); + } + for (auto iter = _list.begin(); iter != _list.end(); ++iter){ + DoListItem::Ptr const& item = *iter; + auto cmd = item->runIfNeeded(TimeOut::Clock::now()); + if (cmd != nullptr) { + LOGS(_log, LOG_LVL_DEBUG, "queuing command"); + _central.queueCmd(cmd); + } else { + if (item->shouldRemoveFromList()) { + LOGS(_log, LOG_LVL_DEBUG, "removing item " << item->getCommandsCreated()); + item->setAddedToList(false); + iter = _list.erase(iter); + } + } + } +} + + +void DoList::runItemNow(DoListItem::Ptr const& item) { + auto cmd = item->runIfNeeded(TimeOut::Clock::now()); + if (cmd != nullptr) { + LOGS(_log, LOG_LVL_DEBUG, "DoList::addAndRunItemNow queuing command"); + _central.queueCmd(cmd); + } +} + + +}}} // namespace lsst:qserv::loader + + diff --git a/core/modules/loader/DoList.h b/core/modules/loader/DoList.h new file mode 100644 index 0000000000..9be1e1f126 --- /dev/null +++ b/core/modules/loader/DoList.h @@ -0,0 +1,82 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_DOLIST_H +#define LSST_QSERV_LOADER_DOLIST_H + +// Qserv headers +#include "loader/DoListItem.h" + +namespace lsst { +namespace qserv { +namespace loader { + +/// A list of things that need to be done with timers. +/// Everything on the list is checked, if it's timer has expired, it +/// is queued and the timer reset. +/// If it is a single use item, it is deleted after successful completion. +class DoList { +public: + typedef std::shared_ptr Ptr; + + DoList(Central& central) : _central(central) {} + DoList() = delete; + DoList(DoList const&) = delete; + DoList& operator=(DoList const&) = delete; + + ~DoList() = default; + + void checkList(); + bool addItem(DoListItem::Ptr const& item) { + if (item == nullptr) return false; + if (item->isAlreadyOnList()) return false; // fast atomic test + { + std::lock_guard lock(_addListMtx); + // Need to make sure this wasn't added before the mutex got locked. + if (not item->setAddedToList(true)) { + _addList.push_back(item); + return true; + } + } + return false; + } + + void runItemNow(DoListItem::Ptr const& item); + +private: + std::list _list; + std::mutex _listMtx; ///< Protects _list (lock this one first) + + std::list _addList; + std::mutex _addListMtx; ///< Protects _addList (lock this one second) + + std::atomic _limiter{0}; // Counter to limit log messages, wraps back to 0. + + Central& _central; +}; + + +}}} // namespace lsst:qserv:loader + + +#endif // LSST_QSERV_LOADER_DOLIST_H diff --git a/core/modules/loader/DoListItem.cc b/core/modules/loader/DoListItem.cc new file mode 100644 index 0000000000..36d79f7e98 --- /dev/null +++ b/core/modules/loader/DoListItem.cc @@ -0,0 +1,74 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2019 LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/DoListItem.h" + +// System headers +#include + +// Qserv headers +#include "loader/Central.h" +#include "loader/LoaderMsg.h" +#include "proto/loader.pb.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.DoListItem"); + +} + +namespace lsst { +namespace qserv { +namespace loader { + + +util::CommandTracked::Ptr DoListItem::runIfNeeded(TimeOut::TimePoint now) { + std::lock_guard lock(_mtx); + if (_command == nullptr) { + if (_isOneShotDone()) return nullptr; + if ((_needInfo || _timeOut.due(now)) && _timeRateLimit.due(now)) { + _timeRateLimit.triggered(); + // Randomly vary the next rate limit timeout + int rand = (std::rand()/(RAND_MAX/1000)); // 0 to 1000 + rand += std::min(_commandsCreated * 10000, 120000); + auto rateLimitRandom = now + std::chrono::milliseconds(rand); + _timeRateLimit.triggered(rateLimitRandom); + _command = createCommand(); + if (_oneShot) ++_commandsCreated; + LOGS(_log, LOG_LVL_DEBUG, "cCreated=" << _commandsCreated << " rand=" << rand); + return _command; + } + } else if (_command->isFinished()) { + _command.reset(); // Allow the command to be sent again later. + } + return nullptr; + } + + +}}} // namespace lsst:qserv::loader + diff --git a/core/modules/loader/DoListItem.h b/core/modules/loader/DoListItem.h new file mode 100644 index 0000000000..bb192d7dae --- /dev/null +++ b/core/modules/loader/DoListItem.h @@ -0,0 +1,173 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_DOLISTITEM_H +#define LSST_QSERV_LOADER_DOLISTITEM_H + +// system headers +#include +#include + +// Qserv headers +#include "util/ThreadPool.h" + +namespace lsst { +namespace qserv { +namespace loader { + + +class Central; + + +class TimeOut { +public: + using TimePoint = std::chrono::system_clock::time_point; + using Clock = std::chrono::system_clock; + + explicit TimeOut(std::chrono::milliseconds timeOut) : _timeOut(timeOut) {} + + bool due() { return due(Clock::now()); } + bool due(TimePoint now) { + auto triggerDiff = std::chrono::duration_cast(now - _lastTrigger); + return (triggerDiff > _timeOut); + } + + void triggered() { return triggered(Clock::now()); } + void triggered(TimePoint now) { + _lastTrigger = now; + } + + std::chrono::milliseconds timeLeft(TimePoint now) { + return std::chrono::duration_cast(now - _lastTrigger); + } + + void setTimeOut(std::chrono::milliseconds timeOut) { _timeOut = timeOut; } + std::chrono::milliseconds getTimeOut() const { return _timeOut; } +private: + // How much time since lastTrigger needs to pass before triggering. + std::chrono::milliseconds _timeOut; + TimePoint _lastTrigger{std::chrono::seconds(0)}; +}; + + +/// This is a base class for other classes to be put on a DoList. +/// Child classes of this class *MUST* be created with shared pointers. +/// +/// A DoListItem is meant to checked periodically by the DoList +/// at a low frequency (a couple of times a second to once every +/// few hours or even days). +/// The DoListItems can cycle forever by just remaining on the +/// DoList where it will run their actions when the timer runs out, +/// which is useful for monitoring status. +/// Or they can be setup to run until they have completed once, +/// a oneShot, which is useful for looking up or inserting keys. +/// +/// A typical action would be sending out a UDP request for status +/// every few seconds until a response is received. Then, after a +/// few minutes with no updates, repeating that request to make sure +/// the status hasn't changed. +/// The system is supposed to notify others on changes, but these +/// notifications can get lost, so it makes sense to ask again if +/// nothing has been received for a while. +class DoListItem : public std::enable_shared_from_this { +public: + using Ptr = std::shared_ptr; + + DoListItem(DoListItem const&) = delete; + DoListItem& operator=(DoListItem const&) = delete; + + virtual ~DoListItem() = default; + + util::CommandTracked::Ptr runIfNeeded(TimeOut::TimePoint now); + + bool isAlreadyOnList() { return _addedToList; } + + /// Returns original value of _addedToList. + bool setAddedToList(bool value) { + return _addedToList.exchange(value); + } + + /// @return true if this item should be removed from the list. + bool shouldRemoveFromList() { + std::lock_guard lock(_mtx); + return (_isOneShotDone() || _remove); + } + + /// The info has been updated, so no need to ask for it for a while. + void infoReceived() { + std::lock_guard lock(_mtx); + _needInfo = false; + _timeOut.triggered(); + } + + void setNeedInfo() { + std::lock_guard lock(_mtx); + _needInfo = true; + } + + DoListItem::Ptr getDoListItemPtr() { + return shared_from_this(); + } + + void setTimeOut(std::chrono::milliseconds timeOut) { _timeOut.setTimeOut(timeOut); } + void setTimeRateLimit(std::chrono::milliseconds rateLimit) { _timeRateLimit.setTimeOut(rateLimit); } + + int getCommandsCreated() { return _commandsCreated; } + + virtual util::CommandTracked::Ptr createCommand()=0; + +protected: + /// All derived instances of this class *MUST* be created with shared pointers. + /// A factory function to enforce this is not practical since + /// this class is meant to serve as a base class for unknown + /// future purposes. Sadly, the compiler doesn't enforce the rule. + DoListItem() = default; + + /// Set true if this item only needs to be successfully completed once. + void setOneShot(bool val) { _oneShot = val; } + +private: + /// Lock _mtx before calling. + bool _isOneShotDone() { + return (!_needInfo && _oneShot); + } + + std::atomic _addedToList{false}; ///< True when added to a DoList + bool _oneShot = false; ///< True if after the needed information is gathered, this item can be dropped. + bool _needInfo = true; ///< True if information is needed. + bool _remove = false; ///< set to true if this item should no longer be checked. + /// If no info is needed, check for info after this period of time. + TimeOut _timeOut{std::chrono::minutes(5)}; + /// Rate limiter, no more than 1 message every few seconds + TimeOut _timeRateLimit{std::chrono::milliseconds(7500)}; // TODO: DM-17453 set via config + util::CommandTracked::Ptr _command; + std::mutex _mtx; ///< protects _timeOut, _timeRequest, _command, _oneShot, _needInfo + /// Number of times the command needed to be created. It's only tracked for oneShots as + /// it indicates how many times it needed to be run before it worked. + std::atomic _commandsCreated{0}; +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_DOLISTITEM_H diff --git a/core/modules/loader/KeyRange.cc b/core/modules/loader/KeyRange.cc new file mode 100644 index 0000000000..0554be6ddc --- /dev/null +++ b/core/modules/loader/KeyRange.cc @@ -0,0 +1,207 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "KeyRange.h" + +#include + +// qserv headers +#include "loader/BufferUdp.h" +#include "loader/LoaderMsg.h" +#include "proto/loader.pb.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.StringRange"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +std::ostream& operator<<(std::ostream& os, NeighborsInfo const& ni) { + os << "NeighborsInfo"; + os << " neighborLeft=" << (ni.neighborLeft == nullptr) ? "nullptr" : std::to_string(ni.neighborLeft->get()); + os << " neighborRight=" << (ni.neighborRight == nullptr) ? "nullptr" : std::to_string(ni.neighborRight->get()); + os << " recentAdds=" << ni.recentAdds; + os << " keyCount=" << ni.keyCount; + return os; +} + +std::ostream& operator<<(std::ostream& os, KeyRange const& strRange) { + os << "valid=" << strRange._valid + << " min=" << strRange._min + << " max=" << strRange._maxE + << " unlimited=" << strRange._unlimited; + return os; +} + + +void KeyRange::setAllInclusiveRange() { + _min = CompositeKey(0,""); + _maxE = CompositeKey(CompositeKey::maxIntVal(), ""); + _unlimited = true; + setValid(); +} + + +bool KeyRange::setMin(CompositeKey const& val) { + if (not _unlimited && val >= _maxE) { + _min = decrement(_maxE); + return false; + } + _min = val; + return true; +} + + +bool KeyRange::setMax(CompositeKey const& val, bool unlimited) { + _unlimited = unlimited; + if (unlimited) { + if (val > _maxE) { _maxE = val; } + return true; + } + if (val < _min) { + _maxE = increment(_min); + return false; + } + _maxE = val; + return true; +} + + +bool KeyRange::setMinMax(CompositeKey const& vMin, CompositeKey const& vMax, bool unlimited) { + _unlimited = unlimited; + if (!unlimited && vMin > vMax) { + return false; + } + _unlimited = unlimited; + if (_unlimited) { + _min = vMin; + _maxE = std::max(vMax, _min); // max is irrelevant at this point + } else { + _min = vMin; + _maxE = vMax; + } + setValid(); + return true; +} + + + + +std::string KeyRange::incrementString(std::string const& str, char appendChar) { + std::string output(str); + if (output.empty()) { + output += appendChar; + } + size_t pos = output.size() - 1; + char lastChar = output[pos]; + if (lastChar < 'z') { + ++lastChar; + output[pos] = lastChar; + } else { + output += appendChar; + } + return output; +} + + +CompositeKey KeyRange::increment(CompositeKey const& key, char appendChar) { + CompositeKey outKey(key.kInt, incrementString(key.kStr, appendChar)); + return outKey; +} + + +std::string KeyRange::decrementString(std::string const& str, char minChar) { + if (str.empty()) { + return std::string(); + } + std::string output(str); + size_t pos = output.size() - 1; + char lastChar = output[pos]; + --lastChar; + if (lastChar > minChar) { + output[pos] = lastChar; + return output; + } else { + output.erase(pos, 1); + } + return output; +} + + +CompositeKey KeyRange::decrement(CompositeKey const& key, char minChar) { + CompositeKey outK(key); + if (outK.kStr.empty()) { + if (outK.kInt > 0) --outK.kInt; + return outK; + } + outK.kStr = decrementString(outK.kStr, minChar); + return outK; +} + + +void KeyRange::loadProtoRange(proto::WorkerRange& protoRange) { + protoRange.set_valid(_valid); + protoRange.set_minint(_min.kInt); + protoRange.set_minstr(_min.kStr); + protoRange.set_maxint(_maxE.kInt); + protoRange.set_maxstr(_maxE.kStr); + protoRange.set_maxunlimited(_unlimited); +} + + +void ProtoHelper::workerKeysInfoExtractor(BufferUdp& data, uint32_t& wId, NeighborsInfo& nInfo, KeyRange& keyRange) { + auto funcName = "CentralWorker::_workerKeysInfoExtractor"; + LOGS(_log, LOG_LVL_DEBUG, funcName); + auto protoItem = StringElement::protoParse(data); + if (protoItem == nullptr) { + throw LoaderMsgErr(ERR_LOC, "protoItem nullptr"); + } + + wId = protoItem->wid(); + nInfo.keyCount = protoItem->mapsize(); + nInfo.recentAdds = protoItem->recentadds(); + proto::WorkerRange protoRange = protoItem->range(); + bool valid = protoRange.valid(); + if (valid) { + CompositeKey minKey(protoRange.minint(), protoRange.minstr()); + CompositeKey maxKey(protoRange.maxint(), protoRange.maxstr()); + bool unlimited = protoRange.maxunlimited(); + keyRange.setMinMax(minKey, maxKey, unlimited); + } + proto::Neighbor protoLeftNeigh = protoItem->left(); + nInfo.neighborLeft->update(protoLeftNeigh.wid()); + proto::Neighbor protoRightNeigh = protoItem->right(); + nInfo.neighborRight->update(protoRightNeigh.wid()); +} + + +}}} // namespace lsst::qserv::loader + diff --git a/core/modules/loader/KeyRange.h b/core/modules/loader/KeyRange.h new file mode 100644 index 0000000000..451b26b29a --- /dev/null +++ b/core/modules/loader/KeyRange.h @@ -0,0 +1,163 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_KEYRANGE_H +#define LSST_QSERV_LOADER_KEYRANGE_H + +// system headers +#include +#include + +// Qserv headers +#include "loader/CompositeKey.h" +#include "loader/Updateable.h" +#include "proto/loader.pb.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +/// Class for storing the key range of a single worker. +/// This is likely to become a template class, hence lots in the header. +/// It tries to keep its state consistent, _min < _max, but depends on +/// other classes to eventually get the correct values for _min and _max. +/// +/// When new workers are activated, they need placeholder values for +/// for their ranges, as the new worker will have no keys. increment(...) +/// and decrement(...) try to create reasonable key values for the ranges +/// but true ranges cannot be established until the worker and its +/// right neighbor (if there is one) each have at least one key. The worker +/// ranges should eventually reach the master, then the other workers +/// and clients. +class KeyRange { +public: + using Ptr = std::shared_ptr; + + KeyRange() = default; + KeyRange(KeyRange const&) = default; + KeyRange& operator=(KeyRange const&) = default; + + ~KeyRange() = default; + + void setAllInclusiveRange(); + + bool setMin(CompositeKey const& val); + bool setMax(CompositeKey const& val, bool unlimited=false); + bool setMinMax(CompositeKey const& vMin, CompositeKey const& vMax, bool unlimited=false); + + bool setValid() { + _valid = (_min <= _maxE ); + return _valid; + } + + /// Return true if other functionally equivalent. + bool equal(KeyRange const& other) const { + if (_valid != other._valid) return false; + if (not _valid) return true; // both invalid + if (_min != other._min) return false; + if (_unlimited != other._unlimited) return false; + if (_unlimited) return true; // both same _min and _unlimited + if (_maxE != other._maxE) return false; + return true; + } + + bool isInRange(CompositeKey const& cKey) const { + if (not _valid) return false; + if (cKey < _min) return false; + if (not _unlimited && cKey >= _maxE) return false; + return true; + } + + bool getValid() const { return _valid; } + bool getUnlimited() const { return _unlimited; } + CompositeKey const& getMin() const { return _min; } + CompositeKey const& getMax() const { return _maxE; } + + bool operator<(KeyRange const& other) const { + /// Arbitrarily, invalid are less than valid, but such comparisons should be avoided. + if (_valid != other._valid) { + if (not _valid) { return true; } + return false; + } + /// Compare minimums. There should be little if any overlap. + if (_min < other._min) { return true; } + return false; + } + + bool operator>(KeyRange const& other) const { + return other < *this; + } + + /// Return a string that would slightly follow the value of the input string 'str' + /// appendChar is the character appended to a string ending with a character > 'z' + static std::string incrementString(std::string const& str, char appendChar='0'); + /// Return a CompositeKey slightly higher value than 'key'. + static CompositeKey increment(CompositeKey const& key, char appendChar='0'); + + // Return a string that would come slightly before 'str'. 'minChar' is the + // smallest acceptable value for the last character before just erasing the last character. + static std::string decrementString(std::string const& str, char minChar='0'); + /// Return a CompositeKey slightly higher lower than 'key'. + static CompositeKey decrement(CompositeKey const& str, char minChar='0'); + + /// Load 'protoRange' with information from this object. + void loadProtoRange(proto::WorkerRange& protoRange); + + friend std::ostream& operator<<(std::ostream&, KeyRange const&); + +private: + bool _valid{false}; ///< true if range is valid + bool _unlimited{false}; ///< true if the range includes largest possible values. + CompositeKey _min; ///< Smallest value = (0, "") + CompositeKey _maxE; ///< maximum value exclusive +}; + + +struct NeighborsInfo { + NeighborsInfo() = default; + NeighborsInfo(NeighborsInfo const&) = delete; + NeighborsInfo& operator=(NeighborsInfo const&) = delete; + + typedef std::shared_ptr> NeighborPtr; + typedef std::weak_ptr> NeighborWPtr; + NeighborPtr neighborLeft{new Updatable(0)}; ///< Neighbor with lesser values + NeighborPtr neighborRight{new Updatable(0)}; ///< Neighbor with higher values + uint32_t recentAdds{0}; ///< Number of keys added to this worker recently. + uint32_t keyCount{0}; ///< Total number of keys stored on the worker. + + friend std::ostream& operator<<(std::ostream& os, NeighborsInfo const& ni); +}; + + +class BufferUdp; + +class ProtoHelper { +public: + static void workerKeysInfoExtractor(BufferUdp& data, uint32_t& name, NeighborsInfo& nInfo, KeyRange& strRange); +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_KEYRANGE_H + diff --git a/core/modules/loader/LoaderMsg.cc b/core/modules/loader/LoaderMsg.cc new file mode 100644 index 0000000000..87f3a8deab --- /dev/null +++ b/core/modules/loader/LoaderMsg.cc @@ -0,0 +1,117 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/LoaderMsg.h" + +// System headers +#include + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.LoaderMsg"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +LoaderMsg::LoaderMsg(uint16_t kind, uint64_t id, std::string const& host, uint32_t port) : + msgKind(std::make_shared(kind)), + msgId(std::make_shared(id)), + senderHost(std::make_shared(host)), + senderPort(std::make_shared(port)) { +} + + +void LoaderMsg::parseFromData(BufferUdp& data) { + MsgElement::Ptr elem = MsgElement::retrieve(data, "1parseFromData"); + msgKind = std::dynamic_pointer_cast(elem); + if (msgKind == nullptr) { + throw LoaderMsgErr(ERR_LOC, "LoaderMsg::parseMsg wrong type for msgKind:" + + MsgElement::getStringVal(elem)); + } + + elem = MsgElement::retrieve(data, "2parseFromData"); + msgId = std::dynamic_pointer_cast(elem); + if (msgId == nullptr) { + throw LoaderMsgErr(ERR_LOC, "LoaderMsg::parseMsg wrong type for msgId:" + + MsgElement::getStringVal(elem)); + } + + elem = MsgElement::retrieve(data, "3parseFromData"); + senderHost = std::dynamic_pointer_cast(elem); + if (senderHost == nullptr) { + throw LoaderMsgErr(ERR_LOC, "LoaderMsg::parseMsg wrong type for senderHost:" + + MsgElement::getStringVal(elem)); + } + + elem = MsgElement::retrieve(data, "4parseFromData"); + senderPort = std::dynamic_pointer_cast(elem); + if (senderPort == nullptr) { + throw LoaderMsgErr(ERR_LOC, "LoaderMsg::parseMsg wrong type for senderPort:" + + MsgElement::getStringVal(elem)); + } +} + + +void LoaderMsg::appendToData(BufferUdp& data) { + bool success = true; + if (msgKind == nullptr || msgId == nullptr || senderHost == nullptr || senderPort == nullptr) { + success = false; + } else { + success |= msgKind->appendToData(data); + success |= msgId->appendToData(data); + success |= senderHost->appendToData(data); + success |= senderPort->appendToData(data); + } + + if (not success) { + std::string str("LoaderMsg::serialize nullptr"); + str += " msgKind=" + MsgElement::getStringVal(msgKind); + str += " msgId=" + MsgElement::getStringVal(msgId); + str += " senderHost=" + MsgElement::getStringVal(senderHost); + str += " senderPort=" + MsgElement::getStringVal(senderPort); + throw LoaderMsgErr(ERR_LOC, str); + } +} + + +std::string LoaderMsg::getStringVal() const { + std::string str("LMsg("); + str += msgKind->getStringVal() + " " + msgId->getStringVal() + " "; + str += senderHost->getStringVal() + ":" + senderPort->getStringVal() + ")"; + return str; +} + +std::ostream& operator<<(std::ostream& os, LoaderMsg const& loaderMsg) { + os << loaderMsg.getStringVal(); + return os; +} + + +}}} // namespace lsst::qserv::loader diff --git a/core/modules/loader/LoaderMsg.h b/core/modules/loader/LoaderMsg.h new file mode 100644 index 0000000000..8a8460a6ee --- /dev/null +++ b/core/modules/loader/LoaderMsg.h @@ -0,0 +1,111 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_LOADERMSG_H +#define LSST_QSERV_LOADER_LOADERMSG_H + + +// Qserv headers +#include "loader/MsgElement.h" + + +#define MAX_MSG_STRING_LENGTH 5000 + +namespace lsst { +namespace qserv { +namespace loader { + +/// Base class for loader messages. +// These messages are meant to be short and simple UDP messages. Long messages +// may have difficulty being transmitted successfully. +// +// The message contains the message kind and the address of the entity sending +// the message. +// +class LoaderMsg { +public: + enum Kind { + WAITING = 0, + MSG_RECEIVED = 100, // Standard success/error response to received message. + TEST, // Communications test. + MAST_INFO_REQ, // Request some information about the master + MAST_INFO, // Information about the master + MAST_WORKER_LIST_REQ, // Request a list of workers from the master. + MAST_WORKER_LIST, // List of all workers known by the master. + MAST_WORKER_INFO_REQ, // Request information for a single worker. + MAST_WORKER_INFO, // All the information the master has about one worker. TODO add key list information + MAST_WORKER_ADD_REQ, // Request the Master add the worker. MSG_RECIEVED + MAST_WORKER_INFO + WORKER_KEYS_INFO_REQ, // Master asking a worker for information about its key-value pairs. + WORKER_KEYS_INFO, // Information about number of key values, range, number of new keys. + KEY_INSERT_REQ, // Insert a new key with info. MSG_RECEIVED + KEY_INFO + KEY_INSERT_COMPLETE, // Key has been inserted and logged. + KEY_LOOKUP_REQ, // Request info for a single key. + KEY_LOOKUP, // Information about a specific key. (includes file id and row) + WORKER_LEFT_NEIGHBOR, // Master assigns a left neighbor to a worker. + WORKER_RIGHT_NEIGHBOR, // Master assigns a right neighbor to a worker. + IM_YOUR_L_NEIGHBOR, // Worker message to other worker to setup being neighbors. + IM_YOUR_R_NEIGHBOR, // Worker message to other worker to setup being neighbors. + NEIGHBOR_VERIFIED, // + SHIFT_TO_RIGHT, + SHIFT_TO_RIGHT_RECEIVED, + SHIFT_FROM_RIGHT, + SHIFT_FROM_RIGHT_RECEIVED + }; + + enum Status { + STATUS_SUCCESS = 0, + STATUS_PARSE_ERR + }; + + LoaderMsg() = default; + /// Contains the address of entity sending the message. + LoaderMsg(uint16_t kind, uint64_t id, std::string const& host, uint32_t port); + LoaderMsg(LoaderMsg const&) = delete; + LoaderMsg& operator=(LoaderMsg const&) = delete; + + virtual ~LoaderMsg() = default; + + void parseFromData(BufferUdp& data); + void appendToData(BufferUdp& data); + + std::string getStringVal() const; + + size_t getExpectedSize() const { + size_t exp = sizeof(msgKind->element); + exp += sizeof(msgId->element); + exp += senderHost->element.size(); + exp += sizeof(senderPort->element); + return exp; + } + + UInt16Element::Ptr msgKind; + UInt64Element::Ptr msgId; + StringElement::Ptr senderHost; + UInt32Element::Ptr senderPort; + + friend std::ostream& operator<<(std::ostream& os, LoaderMsg const& loaderMsg); +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_LOADERMSG_H diff --git a/core/modules/loader/MWorkerList.cc b/core/modules/loader/MWorkerList.cc new file mode 100644 index 0000000000..39211fbf09 --- /dev/null +++ b/core/modules/loader/MWorkerList.cc @@ -0,0 +1,431 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/MWorkerList.h" + +// System headers +#include + +// Third-party headers +#include "boost/asio.hpp" + +// qserv headers +#include "loader/CentralMaster.h" +#include "loader/LoaderMsg.h" +#include "proto/ProtoImporter.h" +#include "proto/loader.pb.h" + + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.MWorkerList"); +} + +namespace lsst { +namespace qserv { +namespace loader { + +/// Create commands to set a worker's neighbor. +/// It should keep trying this until it works. When the worker sets the neighbor to +/// the target value, this object should initiate a chain reaction that destroys itself. +/// It is very important that the message and neighborPtr both point to +/// the same (left or right) neighbor. +class SetNeighborOneShot : public DoListItem, public UpdateNotify { +public: + using Ptr = std::shared_ptr; + + static Ptr create(CentralMaster* central_, + MWorkerListItem::Ptr const& target_, + int msg_, + uint32_t neighborId_, + NeighborsInfo::NeighborPtr const& neighborPtr_) { + Ptr oneShot(new SetNeighborOneShot(central_, target_, msg_, neighborId_, neighborPtr_)); + auto oneShotPtr = std::static_pointer_cast(oneShot->getDoListItemPtr()); + auto updatePtr = std::static_pointer_cast>(oneShotPtr); + neighborPtr_->registerNotify(updatePtr); // Must do this so it will call our updateNotify(). + LOGS(_log, LOG_LVL_INFO, "SetNeighborOneShot neighborId=" << + neighborId_ << " " << neighborPtr_->get()); + // Send this message frequently as the target node could be getting spammed. + oneShot->setTimeRateLimit(std::chrono::milliseconds(100)); // TODO: DM-17453 set via config + return oneShot; + } + + util::CommandTracked::Ptr createCommand() override; + + // This is called every time the worker sends the master a value for its (left/right) neighbor. + // See neighborPtr_->registerNotify() + void updateNotify(uint32_t& oldVal, uint32_t& newVal) override { + if (newVal == neighborId) { + infoReceived(); // This should result in this oneShot DoListItem being removed->destroyed. + } + } + + CentralMaster* const central; + MWorkerListItem::WPtr target; + int const message; + uint32_t const neighborId; + NeighborsInfo::NeighborWPtr neighborPtr; +private: + SetNeighborOneShot(CentralMaster* central_, + MWorkerListItem::Ptr const& target_, + int msg_, + uint32_t neighborId_, + NeighborsInfo::NeighborPtr const& neighborPtr_) : + central(central_), target(target_), message(msg_), neighborId(neighborId_), + neighborPtr(neighborPtr_) { + setOneShot(true); + } +}; + + + +util::CommandTracked::Ptr SetNeighborOneShot::createCommand() { + struct SetNeighborCmd : public util::CommandTracked { + SetNeighborCmd(SetNeighborOneShot::Ptr const& ptr) : oneShotData(ptr) {} + void action(util::CmdData*) override { + auto oSData = std::dynamic_pointer_cast(oneShotData.lock()); + if (oSData != nullptr) { + oSData->central->setWorkerNeighbor(oSData->target, oSData->message, oSData->neighborId); + } + } + std::weak_ptr oneShotData; + }; + auto ptr = std::dynamic_pointer_cast(getDoListItemPtr()); + return std::make_shared(ptr); +} + + +util::CommandTracked::Ptr MWorkerList::createCommand() { + return createCommandMaster(_central); +} + + +util::CommandTracked::Ptr MWorkerList::createCommandMaster(CentralMaster* centralM) { + // The master probably doesn't need to make any checks on the list. + return nullptr; +} + + +// Returns pointer to new item when new worker added, otherwise nullptr. +MWorkerListItem::Ptr MWorkerList::addWorker(std::string const& ip, int udpPort, int tcpPort) { + NetworkAddress udpAddress(ip, udpPort); + NetworkAddress tcpAddress(ip, tcpPort); + + + // If it is already in the map, do not change its id. + std::lock_guard lock(_mapMtx); + auto iter = _ipMap.find(udpAddress); + if (iter != _ipMap.end()) { + LOGS(_log, LOG_LVL_WARN, "addWorker, Could not add worker as worker already exists. " << + ip << ":" << udpPort); + return nullptr; + } + // Get an id and make new worker item + auto workerListItem = MWorkerListItem::create(_sequenceId++, udpAddress, tcpAddress, _central); + _ipMap.insert(std::make_pair(udpAddress, workerListItem)); + _wIdMap.insert(std::make_pair(workerListItem->getId(), workerListItem)); + LOGS(_log, LOG_LVL_INFO, "Added worker " << *workerListItem); + _flagListChange(); + + return workerListItem; +} + +bool MWorkerList::sendListTo(uint64_t msgId, std::string const& ip, short port, + std::string const& ourHostName, short ourPort) { + NetworkAddress address(ip, port); + StringElement workerList; + { + std::lock_guard lockStatList(_statListMtx); + { + std::lock_guard lockMap(_mapMtx); + if (_wListChanged || _stateListData == nullptr) { + _wListChanged = false; + /// At this time, all workers should easily fit in a single message. + /// TODO send multiple messages (if needed) with each having the address and + /// range of 100 workers. + /// This version is useful for testing. _stateListData becomes a vector. + proto::LdrMastWorkerList protoList; + protoList.set_workercount(_wIdMap.size()); + for (auto const& item : _wIdMap ) { + proto::WorkerListItem* protoItem = protoList.add_worker(); + MWorkerListItem::Ptr const& wListItem = item.second; + protoItem->set_wid(wListItem->getId()); + } + protoList.SerializeToString(&(workerList.element)); + LoaderMsg workerListMsg(LoaderMsg::MAST_WORKER_LIST, msgId, ourHostName, ourPort); + _stateListData = std::make_shared(); + workerListMsg.appendToData(*_stateListData); + workerList.appendToData(*_stateListData); + } + } + try { + _central->sendBufferTo(ip, port, *_stateListData); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "MWorkerList::sendListTo boost system_error=" << e.what() << + " msgId=" << msgId << " ip=" << ip << " port=" << port << + " ourName=" << ourHostName << " ourPort=" << ourPort); + } + } + + // See if this worker is know. + MWorkerListItem::Ptr workerItem; + { + // See if this is a worker in our map + std::lock_guard lock(_mapMtx); + auto iter = _ipMap.find(address); + if (iter != _ipMap.end()) { + workerItem = iter->second; + } + } + if (workerItem != nullptr) { + workerItem->sendListToWorkerInfoReceived(); + } + return true; +} + + + + +std::pair, std::vector> +MWorkerList::getActiveInactiveWorkerLists() { + std::vector active; + std::vector inactive; + std::lock_guard lck(_mapMtx); + for(auto const& elem : _wIdMap) { + auto item = elem.second; + if (item->isActive()) { + active.push_back(item); + } else { + inactive.push_back(item); + } + } + auto pair = std::make_pair(active, inactive); + return pair; +} + + +// must lock _mapMtx before calling this function +void MWorkerList::_flagListChange() { + _wListChanged = true; + // On the Master, flag each worker in the list that it needs to send an updated list to it's worker. + for (auto const& elem : _wIdMap) { + auto const& item = elem.second; + item->flagNeedToSendList(); + } +} + + +std::string MWorkerList::dump() const { + std::stringstream os; + os << "MWorkerList:\n"; + { + std::lock_guard lck(_mapMtx); + for (auto elem:_wIdMap) { + os << " " << *elem.second << "\n"; + } + os << "MWorkerList ip:\n"; + for (auto elem:_ipMap) { + os << " " << *elem.second << "\n"; + } + } + return os.str(); +} + + +void MWorkerListItem::addDoListItems(Central *central) { + LOGS(_log, LOG_LVL_DEBUG, "MWorkerListItem::addDoListItems"); + std::lock_guard lck(_doListItemsMtx); + if (_sendListToWorker == nullptr) { + _sendListToWorker = std::make_shared(getThis(), _central); + _central->addDoListItem(_sendListToWorker); + } + if (_reqWorkerKeyInfo == nullptr) { + _reqWorkerKeyInfo = std::make_shared(getThis(), _central); + _central->addDoListItem(_reqWorkerKeyInfo); + } +} + + +void MWorkerListItem::flagNeedToSendList() { + auto slw = _sendListToWorker; + if (slw != nullptr) { slw->setNeedInfo(); } +} + + +void MWorkerListItem::sendListToWorkerInfoReceived() { + auto slw = _sendListToWorker; + if (slw != nullptr) { + // _sendListToWorker is a tough one to tell if the worker got the info, so + // it is assumed that this worked when the list is sent. The worker + // will either ask for it or it will be sent again later. + // TODO find a reasonable way to tell that the worker got the list. + slw->infoReceived(); + } +} + + +void MWorkerListItem::setAllInclusiveRange() { + LOGS(_log, LOG_LVL_INFO, "MWorkerListItem::setAllInclusiveRange for wId=" << _wId); + std::lock_guard lck(_mtx); + _range.setAllInclusiveRange(); + _active = true; /// First worker. +} + + +void MWorkerListItem::setNeighborsInfo(NeighborsInfo const& nInfo) { + std::lock_guard lck(_mtx); + _neighborsInfo.keyCount = nInfo.keyCount; + _neighborsInfo.recentAdds = nInfo.recentAdds; + + auto old = _neighborsInfo.neighborLeft->get(); + if (old != 0 && old != nInfo.neighborLeft->get()) { + LOGS(_log, LOG_LVL_WARN, "Worker=" << _wId << + "neighborLeft changing from valid old=" << old << + " to new=" << nInfo.neighborLeft->get()); + } + if (old != nInfo.neighborLeft->get()) { + LOGS(_log, LOG_LVL_INFO, "Worker=" << _wId << + "neighborLeft=" << nInfo.neighborLeft->get()); + } + _neighborsInfo.neighborLeft->update(nInfo.neighborLeft->get()); + + old = _neighborsInfo.neighborRight->get(); + if (old != 0) { + LOGS(_log, LOG_LVL_WARN, "Worker=" << _wId << + "neighborRight changing from valid old=" << old << + " to new=" << nInfo.neighborRight->get()); + } + if (old != nInfo.neighborRight->get()) { + + LOGS(_log, LOG_LVL_INFO, "Worker=" << _wId << + "neighborRight=" << nInfo.neighborRight->get()); + } + _neighborsInfo.neighborRight->update(nInfo.neighborRight->get()); +} + + +int MWorkerListItem::getKeyCount() const { + return _neighborsInfo.keyCount; +} + + +std::ostream& MWorkerListItem::dump(std::ostream& os) const { + WorkerListItemBase::dump(os); // call base class version + os << " active=" << _active; + return os; +} + + +/// Set this worker's RIGHT neighbor to the worker described in 'item'. +void MWorkerListItem::setRightNeighbor(MWorkerListItem::Ptr const& item) { + // Create a one shot to send a message to the worker. + // It knows it has worked when the worker sends a message back saying it + // has the correct right neighbor. + LOGS(_log, LOG_LVL_DEBUG," MWorkerListItem::setRightNeighbor"); + + auto oneShot = SetNeighborOneShot::create(_central, + getThis(), + LoaderMsg::WORKER_RIGHT_NEIGHBOR, + item->getId(), + _neighborsInfo.neighborRight); + _central->runAndAddDoListItem(oneShot); +} + + +// TODO very similar to MWorkerListItem::setRightNeighbor, consider merging. +void MWorkerListItem::setLeftNeighbor(MWorkerListItem::Ptr const& item) { + // Create a one shot to send a message to the worker. + // It knows it has worked when the worker sends a message back saying it + // has the correct left neighbor. + LOGS(_log, LOG_LVL_DEBUG,"MWorkerListItem::setLeftNeighbor"); + + auto oneShot = SetNeighborOneShot::create(_central, + getThis(), + LoaderMsg::WORKER_LEFT_NEIGHBOR, + item->getId(), + _neighborsInfo.neighborLeft); + + _central->runAndAddDoListItem(oneShot); +} + + +util::CommandTracked::Ptr MWorkerListItem::SendListToWorker::createCommand() { + auto item = mWorkerListItem.lock(); + if (item == nullptr) { + // TODO: should mark set the removal flag for this doListItem + return nullptr; + } + + struct SendListToWorkerCmd : public util::CommandTracked { + SendListToWorkerCmd(CentralMaster *centM_, MWorkerListItem::Ptr const& tItem_) : centM(centM_), tItem(tItem_) {} + void action(util::CmdData*) override { + LOGS(_log, LOG_LVL_DEBUG, "SendListToWorkerCmd::action"); + auto udp = tItem->getUdpAddress(); + centM->getWorkerList()->sendListTo(centM->getNextMsgId(), + udp.ip, udp.port, + centM->getMasterHostName(), centM->getMasterPort()); + } + CentralMaster *centM; + MWorkerListItem::Ptr tItem; + }; + LOGS(_log, LOG_LVL_DEBUG, "SendListToWorker::createCommand"); + return std::make_shared(central, item); +} + + + +util::CommandTracked::Ptr MWorkerListItem::ReqWorkerKeyInfo::createCommand() { + auto item = mWorkerListItem.lock(); + if (item == nullptr) { + // TODO: should mark set the removal flag for this doListItem + return nullptr; + } + + struct ReqWorkerKeysInfoCmd : public util::CommandTracked { + ReqWorkerKeysInfoCmd(CentralMaster *centM_, MWorkerListItem::Ptr const& tItem_) : centM(centM_), tItem(tItem_) {} + void action(util::CmdData*) override { + LOGS(_log, LOG_LVL_DEBUG, "ReqWorkerKeyInfoCmd::action"); + auto udp = tItem->getUdpAddress(); + centM->reqWorkerKeysInfo(centM->getNextMsgId(), + udp.ip, udp.port, + centM->getMasterHostName(), centM->getMasterPort()); + } + CentralMaster *centM; + MWorkerListItem::Ptr tItem; + }; + LOGS(_log, LOG_LVL_DEBUG, "SendListToWorker::createCommand"); + return std::make_shared(central, item); +} + + +}}} // namespace lsst::qserv::loader + + + + + + diff --git a/core/modules/loader/MWorkerList.h b/core/modules/loader/MWorkerList.h new file mode 100644 index 0000000000..725273a118 --- /dev/null +++ b/core/modules/loader/MWorkerList.h @@ -0,0 +1,210 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_MWORKERLIST_H +#define LSST_QSERV_LOADER_MWORKERLIST_H + +// system headers +#include +#include +#include +#include + +// Qserv headers +#include "loader/Updateable.h" +#include "loader/BufferUdp.h" +#include "loader/DoList.h" +#include "loader/KeyRange.h" +#include "loader/NetworkAddress.h" +#include "loader/WorkerListItemBase.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +class CentralMaster; +class LoaderMsg; + +/// Standard information for a single worker, IP address, key range, timeouts. +class MWorkerListItem : public WorkerListItemBase { +public: + using Ptr = std::shared_ptr; + using WPtr = std::weak_ptr; + + static MWorkerListItem::Ptr create(uint32_t name, NetworkAddress const& udpAddress, + NetworkAddress const& tcpAddress, CentralMaster *central) { + return MWorkerListItem::Ptr(new MWorkerListItem(name, udpAddress, tcpAddress, central)); + } + + MWorkerListItem() = delete; + MWorkerListItem(MWorkerListItem const&) = delete; + MWorkerListItem& operator=(MWorkerListItem const&) = delete; + + virtual ~MWorkerListItem() = default; + + /// @return a properly typed shared pointer to this object. + Ptr getThis() { + Ptr ptr = std::static_pointer_cast(shared_from_this()); + return ptr; + } + + bool isActive() const { return _active; } + void setActive(bool val) { _active = val; } + + /// Add permanent items to the DoList for this worker. + /// They should only be removed if this object is being destroyed. + void addDoListItems(Central *central) override; + + void setAllInclusiveRange(); + + void setNeighborsInfo(NeighborsInfo const& nInfo); + int getKeyCount() const; + + void setRightNeighbor(MWorkerListItem::Ptr const& item); + void setLeftNeighbor(MWorkerListItem::Ptr const& item); + + void flagNeedToSendList(); + + /// Check if the worker actually received the list of workers that + /// was sent. Currently, it assumes the worker got the list. + /// If the worker didn't get the list, it will ask for it after not + /// receiving updates. + void sendListToWorkerInfoReceived(); + + std::ostream& dump(std::ostream& os) const override; +private: + MWorkerListItem(uint32_t wId, + NetworkAddress const& udpAddress, + NetworkAddress const& tcpAddress, + CentralMaster* central) + : WorkerListItemBase(wId), _central(central) { + setUdpAddress(udpAddress); + setTcpAddress(tcpAddress); + } + + TimeOut _lastContact{std::chrono::minutes(10)}; ///< Last time information was received from this worker + NeighborsInfo _neighborsInfo; ///< information used to set neighbors. + + std::atomic _active{false}; ///< true when worker has been given a valid range, or a neighbor. + + CentralMaster* _central; + + // Occasionally send a list of all workers to the worker represented by this object. + struct SendListToWorker : public DoListItem { + SendListToWorker(MWorkerListItem::Ptr const& mWorkerListItem_, CentralMaster *central_) : + mWorkerListItem(mWorkerListItem_), central(central_) {} + MWorkerListItem::WPtr mWorkerListItem; + CentralMaster *central; + util::CommandTracked::Ptr createCommand() override; + }; + + // This is a DoListItem to try to keep the worker up to date about what + // other workers are in the system. + // Once _sendListToWorker has been set, it should be valid until this + // instance is being destroyed. + DoListItem::Ptr _sendListToWorker; + + // Occasionally ask this worker for information about its list of keys, if it hasn't + // been heard from. + struct ReqWorkerKeyInfo : public DoListItem { + ReqWorkerKeyInfo(MWorkerListItem::Ptr const& mWorkerListItem_, CentralMaster *central_) : + mWorkerListItem(mWorkerListItem_), central(central_) {} + MWorkerListItem::WPtr mWorkerListItem; + CentralMaster *central; + util::CommandTracked::Ptr createCommand() override; + }; + DoListItem::Ptr _reqWorkerKeyInfo; + std::mutex _doListItemsMtx; ///< protects _sendListToWorker +}; + + + + +class MWorkerList : public DoListItem { +public: + using Ptr = std::shared_ptr; + + MWorkerList(CentralMaster* central) : _central(central) {} // MUST be created as shared pointer. + MWorkerList() = delete; + MWorkerList(MWorkerList const&) = delete; + MWorkerList& operator=(MWorkerList const&) = delete; + + virtual ~MWorkerList() = default; + + ///// Master only ////////////////////// + // Returns pointer to new item if an item was created. + MWorkerListItem::Ptr addWorker(std::string const& ip, int udpPort, int tcpPort); + + /// Returns true of message could be parsed and a send will be attempted. + /// It sends a list of worker ids. The worker then asks for each id individually + /// to get ips, ports, and ranges. + bool sendListTo(uint64_t msgId, std::string const& ip, short port, + std::string const& outHostName, short ourPort); + + util::CommandTracked::Ptr createCommand() override; + util::CommandTracked::Ptr createCommandMaster(CentralMaster* centralM); + + ////////////////////////////////////////// + /// Nearly the same on Worker and Master + size_t getIdMapSize() { + std::lock_guard lck(_mapMtx); + return _wIdMap.size(); + } + + MWorkerListItem::Ptr getWorkerWithId(uint32_t id) { + std::lock_guard lck(_mapMtx); + auto iter = _wIdMap.find(id); + if (iter == _wIdMap.end()) { return nullptr; } + return iter->second; + } + + /// @Return 2 lists. One of active workers, one of inactive workers. Both lists are copies. + std::pair, std::vector> + getActiveInactiveWorkerLists(); + + std::string dump() const; + +protected: + void _flagListChange(); + + CentralMaster* _central; + std::map _wIdMap; + std::map _ipMap; + bool _wListChanged{false}; ///< true if the list has changed + BufferUdp::Ptr _stateListData; ///< message object for sendListTo(...) + + /// Protects _stateListData. It must be locked before _wIdMap and never + /// locked in conjunction with _ipMap. + std::mutex _statListMtx; + uint32_t _totalNumberOfWorkers{0}; ///< total number of workers according to the master. + mutable std::mutex _mapMtx; ///< protects _wIdMap, _ipMap, _wListChanged + + std::atomic _sequenceId{1}; ///< Source of ids for workers. 0 is invalid name. + +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_MWORKERLIST_H diff --git a/core/modules/loader/MasterConfig.cc b/core/modules/loader/MasterConfig.cc new file mode 100644 index 0000000000..3cbd18fb1e --- /dev/null +++ b/core/modules/loader/MasterConfig.cc @@ -0,0 +1,65 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "loader/MasterConfig.h" + +// System headers + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "util/ConfigStore.h" +#include "util/ConfigStoreError.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.Config"); +} + + +namespace lsst { +namespace qserv { +namespace loader { + + +MasterConfig::MasterConfig(util::ConfigStore const& configStore) { + try { + setFromConfig(configStore); + } catch (util::ConfigStoreError const& e) { + throw ConfigErr(ERR_LOC, std::string("MasterConfig ") + e.what()); + } +} + + +std::ostream& MasterConfig::dump(std::ostream &os) const { + os << "(MasterConfig(" << header << "):"; + ConfigBase::dump(os); + os << ")"; + return os; +} + + +}}} // namespace lsst::qserv::loader + + diff --git a/core/modules/loader/MasterConfig.h b/core/modules/loader/MasterConfig.h new file mode 100644 index 0000000000..c1b46610f2 --- /dev/null +++ b/core/modules/loader/MasterConfig.h @@ -0,0 +1,78 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_MASTERCONFIG_H +#define LSST_QSERV_LOADER_MASTERCONFIG_H + +// Qserv headers +#include "loader/ConfigBase.h" + +namespace lsst { +namespace qserv { +namespace loader { + +/// A class for reading the configuration file for the master which consists of +/// a collection of key-value pairs and provide access functions for those values. +/// +class MasterConfig : public ConfigBase { +public: + /// Constructor can throw ConfigErr + explicit MasterConfig(std::string const& configFileName) + : MasterConfig(util::ConfigStore(configFileName)) {} + + MasterConfig() = delete; + MasterConfig(MasterConfig const&) = delete; + MasterConfig& operator=(MasterConfig const&) = delete; + + int getMasterPort() const { return _portUdp->getInt(); } + int getThreadPoolSize() const { return _threadPoolSize->getInt(); } + int getLoopSleepTime() const { return _loopSleepTime->getInt(); } + int getMaxKeysPerWorker() const { return _maxKeysPerWorker->getInt(); } + int getIOThreads() const { return _iOThreads->getInt(); } + + + std::ostream& dump(std::ostream &os) const override; + + std::string const header{"master"}; ///< Header for values +private: + MasterConfig(util::ConfigStore const& configStore); + + /// UDP port for the master - usually 9875 + ConfigElement::Ptr _portUdp{ConfigElement::create(cfgList, header, "portUdp", ConfigElement::INT, true)}; + /// Maximum average keys per worker before activating a new worker. + ConfigElement::Ptr _maxKeysPerWorker{ + ConfigElement::create(cfgList, header, "maxKeysPerWorker", ConfigElement::INT, true)}; + /// Size of the master's thread pool - 10 + ConfigElement::Ptr _threadPoolSize{ + ConfigElement::create(cfgList, header, "threadPoolSize", ConfigElement::INT, true)}; + /// Time spent sleeping between checking elements in the DoList in microseconds. 0.1 seconds. + ConfigElement::Ptr _loopSleepTime{ + ConfigElement::create(cfgList, header, "loopSleepTime", ConfigElement::INT, true)}; + /// Number of IO threads the server should run. + ConfigElement::Ptr _iOThreads{ + ConfigElement::create(cfgList, header, "iOThreads", ConfigElement::INT, false, "5")}; +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_MASTERCONFIG_H diff --git a/core/modules/loader/MasterServer.cc b/core/modules/loader/MasterServer.cc new file mode 100644 index 0000000000..21c4b377ff --- /dev/null +++ b/core/modules/loader/MasterServer.cc @@ -0,0 +1,278 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/MasterServer.h" + +// System headers +#include + +// Third-party headers + +// qserv headers +#include "loader/CentralMaster.h" +#include "loader/LoaderMsg.h" +#include "loader/NetworkAddress.h" +#include "proto/ProtoImporter.h" +#include "proto/loader.pb.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.MasterServer"); +} + +namespace lsst { +namespace qserv { +namespace loader { + +BufferUdp::Ptr MasterServer::parseMsg(BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + + LOGS(_log, LOG_LVL_DEBUG, "MasterServer::parseMsg sender " << senderEndpoint << + " data length=" << data->getAvailableWriteLength()); + BufferUdp::Ptr sendData; /// nullptr for empty response. + LoaderMsg inMsg; + try { + inMsg.parseFromData(*data); + } catch (LoaderMsgErr const& exc) { + std::string errMsg("MasterServer::parseMsg inMsg garbled exception "); + errMsg += exc.what(); + LOGS(_log, LOG_LVL_ERROR, errMsg); + sendData = prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, errMsg); + return sendData; + } + + try { + LOGS(_log, LOG_LVL_INFO, "MasterServer::parseMsg sender " << senderEndpoint << + " kind=" << inMsg.msgKind->element << " data length=" << data->getAvailableWriteLength()); + switch (inMsg.msgKind->element) { + case LoaderMsg::MSG_RECEIVED: + // TODO: locate msg id in send messages and take appropriate action + break; + case LoaderMsg::MAST_INFO_REQ: + // TODO: sendData = masterInfoRequest(inMsg, data, senderEndpoint); + break; + case LoaderMsg::MAST_WORKER_LIST_REQ: + sendData = workerListRequest(inMsg, data, senderEndpoint); + break; + case LoaderMsg::MAST_WORKER_INFO_REQ: + // Request information about a specific worker via MAST_WORKER_INFO + sendData = workerInfoRequest(inMsg, data, senderEndpoint); + break; + case LoaderMsg::MAST_WORKER_ADD_REQ: + sendData = workerAddRequest(inMsg, data, senderEndpoint); + break; + case LoaderMsg::WORKER_KEYS_INFO: + sendData = workerKeysInfo(inMsg, data, senderEndpoint); + break; + // following not expected by master + case LoaderMsg::MAST_INFO: + case LoaderMsg::MAST_WORKER_LIST: + case LoaderMsg::MAST_WORKER_INFO: + case LoaderMsg::KEY_INSERT_REQ: + case LoaderMsg::KEY_LOOKUP_REQ: + case LoaderMsg::KEY_LOOKUP: + /// TODO add msg unexpected by master response. + break; + default: + ++_errCount; + LOGS(_log, LOG_LVL_ERROR, "unknownMsgKind errCount=" << _errCount << " inMsg=" << inMsg); + sendData = prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, + "unknownMsgKind"); + } + } catch (LoaderMsgErr const& exc) { + ++_errCount; + std::string errMsg("MasterServer::parseMsg inMsg garbled exception "); + errMsg += exc.what(); + LOGS(_log, LOG_LVL_ERROR, errMsg); + // Send error back to the server in inMsg + auto reply = prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, errMsg); + sendBufferTo(inMsg.senderHost->element, inMsg.senderPort->element, *reply); + return nullptr; + } + + return sendData; +} + + +BufferUdp::Ptr MasterServer::prepareReplyMsg(boost::asio::ip::udp::endpoint const& senderEndpoint, + LoaderMsg const& inMsg, int status, std::string const& msgTxt) { + + if (status != LoaderMsg::STATUS_SUCCESS) { + LOGS(_log,LOG_LVL_WARN, "Error response Original from " << senderEndpoint << + " msg=" << msgTxt << " inMsg=" << inMsg.getStringVal()); + } + + LoaderMsg outMsg(LoaderMsg::MSG_RECEIVED, inMsg.msgId->element, getOurHostName(), getOurPort()); + + // create the proto buffer + proto::LdrMsgReceived protoBuf; + protoBuf.set_originalid(inMsg.msgId->element); + protoBuf.set_originalkind(inMsg.msgKind->element); + protoBuf.set_status(LoaderMsg::STATUS_PARSE_ERR); + protoBuf.set_errmsg(msgTxt); + protoBuf.set_dataentries(0); + + StringElement respBuf; + protoBuf.SerializeToString(&(respBuf.element)); + + auto sendData = std::make_shared(1000); // this message should be fairly small. + outMsg.appendToData(*sendData); + respBuf.appendToData(*sendData); + return sendData; +} + + +BufferUdp::Ptr MasterServer::workerAddRequest(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + + /// Message contains the network address of a worker to add to our list. + int tcpPort = 0; + auto addReq = NetworkAddress::create(data, tcpPort, "MasterServer::workerAddRequest"); + if (addReq == nullptr) { + return prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, + "STATUS_PARSE_ERR parse error workerAddRequest "); + } + + // Once the worker has been added, its name will be sent to all other workers. + _centralMaster->addWorker(addReq->ip, addReq->port, tcpPort); + + LOGS(_log, LOG_LVL_INFO, "Adding worker ip=" << addReq->ip << + " udp=" << addReq->port << " tcp=" << tcpPort); + + return nullptr; +} + + +BufferUdp::Ptr MasterServer::workerListRequest(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + std::string funcName("MasterServer::workerListRequest"); + LOGS(_log, LOG_LVL_DEBUG, funcName); + + int tcpPort = 0; // only needed for create parameter + auto addr = NetworkAddress::create(data, tcpPort, funcName); + if (addr == nullptr) { + std::string errStr("STATUS_PARSE_ERR parse error in " + funcName); + LOGS(_log, LOG_LVL_ERROR, errStr); + return prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, errStr); + } + + // TODO: put this in a separate thread. + auto workerList = _centralMaster->getWorkerList(); + workerList->sendListTo(inMsg.msgId->element, addr->ip, addr->port, getOurHostName(), getOurPort()); + LOGS(_log, LOG_LVL_INFO, funcName << " done sendListTo "); + + return nullptr; +} + + +BufferUdp::Ptr MasterServer::workerKeysInfo(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + + std::string funcName("MasterServer::workerKeysInfo"); + LOGS(_log, LOG_LVL_DEBUG, funcName); + + try { + uint32_t name; + NeighborsInfo nInfo; + KeyRange strRange; + ProtoHelper::workerKeysInfoExtractor(*data, name, nInfo, strRange); + LOGS(_log, LOG_LVL_INFO, funcName << " name=" << name << " keyCount=" << nInfo.keyCount << + " recentAdds=" << nInfo.recentAdds << " range=" << strRange); + // TODO move to separate thread. + _centralMaster->updateWorkerInfo(name, nInfo, strRange); + } catch (LoaderMsgErr &msgErr) { + LOGS(_log, LOG_LVL_ERROR, msgErr.what()); + return prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, msgErr.what()); + } + return nullptr; +} + + +BufferUdp::Ptr MasterServer::workerInfoRequest(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + // TODO HIGH Wrap this up in a command and put it on a queue. + try { + std::string const funcName("MasterServer::workerInfoRequest"); + int tcpPort = 0; + NetworkAddress::UPtr requestorAddr = NetworkAddress::create(data, tcpPort, funcName); + if (requestorAddr == nullptr) { + throw LoaderMsgErr(ERR_LOC, "requestorAddr nullptr"); + } + + auto protoItem = StringElement::protoParse(*data); + if (protoItem == nullptr) { + throw LoaderMsgErr(ERR_LOC, "protoItem nullptr"); + } + + auto workerId = protoItem->wid(); + LOGS(_log, LOG_LVL_INFO, funcName << " Master got wId=" << workerId); + + /// Find the worker name in the map. + auto workerItem = _centralMaster->getWorkerWithId(protoItem->wid()); + if (workerItem == nullptr) { + /// TODO construct message for invalid worker + return nullptr; + } + + /// Return worker's name, netaddress, and range in MAST_WORKER_INFO msg + proto::WorkerListItem protoWorker; + proto::LdrNetAddress* protoAddr = protoWorker.mutable_address(); + proto::WorkerRange* protoRange = protoWorker.mutable_range(); + protoWorker.set_wid(workerItem->getId()); + auto udp = workerItem->getUdpAddress(); + protoAddr->set_ip(udp.ip); + protoAddr->set_udpport(udp.port); + protoAddr->set_tcpport(workerItem->getTcpAddress().port); + auto range = workerItem->getRangeString(); + LOGS(_log, LOG_LVL_INFO, funcName << " workerInfoRequest range = " << range); + range.loadProtoRange(*protoRange); + StringElement seItem(protoWorker.SerializeAsString()); + + LoaderMsg masterWorkerInfoMsg(LoaderMsg::MAST_WORKER_INFO, _centralMaster->getNextMsgId(), + _centralMaster->getMasterHostName(), _centralMaster->getMasterPort()); + + BufferUdp sendBuf; + masterWorkerInfoMsg.appendToData(sendBuf); + seItem.appendToData(sendBuf); + + // Send the response to the worker that asked for it. + try { + _centralMaster->sendBufferTo(requestorAddr->ip, requestorAddr->port, sendBuf); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "MasterServer::workerInfoRequest boost system_error=" << e.what() << + " inMsg=" << inMsg); + } + } catch (LoaderMsgErr const& msgErr) { + LOGS(_log, LOG_LVL_ERROR, msgErr.what()); + return prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, msgErr.what()); + } + return nullptr; +} + +}}} // namespace lsst:qserv::loader + + diff --git a/core/modules/loader/MasterServer.h b/core/modules/loader/MasterServer.h new file mode 100644 index 0000000000..9e3f8246c6 --- /dev/null +++ b/core/modules/loader/MasterServer.h @@ -0,0 +1,81 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_MASTERSERVER_H +#define LSST_QSERV_LOADER_MASTERSERVER_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/ServerUdpBase.h" +#include "loader/MWorkerList.h" + +namespace lsst { +namespace qserv { +namespace loader { + +class LoaderMsg; +class CentralMaster; + +class MasterServer : public ServerUdpBase { +public: + MasterServer(boost::asio::io_service& io_service, std::string const& host, int port, CentralMaster* centralMaster) + : ServerUdpBase(io_service, host, port), _centralMaster(centralMaster) {} + + ~MasterServer() override = default; + + BufferUdp::Ptr parseMsg(BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& endpoint) override; + + + BufferUdp::Ptr workerAddRequest(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint); + + BufferUdp::Ptr workerListRequest(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint); + + BufferUdp::Ptr workerInfoRequest(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint); + + BufferUdp::Ptr workerKeysInfo(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint); + + // Replies should only be sent on errors and always be sent to the server port. A reply + // that gets lost in transmission is going to be a nuisance, requiring extra timeouts. + BufferUdp::Ptr prepareReplyMsg(boost::asio::ip::udp::endpoint const& senderEndpoint, + LoaderMsg const& inMsg, + int status, std::string const& msgTxt); // TODO shows up in both MasterServer and WorkerServer + +private: + CentralMaster* _centralMaster; +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_MASTERSERVER_H diff --git a/core/modules/loader/MsgElement.cc b/core/modules/loader/MsgElement.cc new file mode 100644 index 0000000000..6184af9684 --- /dev/null +++ b/core/modules/loader/MsgElement.cc @@ -0,0 +1,183 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/MsgElement.h" + +// System headers +#include + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.MsgElement"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +bool MsgElement::retrieveType(BufferUdp &data, char& elemType) { + return data.retrieve(&elemType, sizeof(elemType)); +} + + +MsgElement::Ptr MsgElement::retrieve(BufferUdp& data, std::string const& note, bool throwOnMissing) { + char elemT; + if (not retrieveType(data, elemT)) { + LOGS(_log, LOG_LVL_INFO, note << "no type retrieved "); + return nullptr; // the data probably has not been read from the socket yet. + } + MsgElement::Ptr msgElem = create(elemT); + if (msgElem != nullptr && not msgElem->retrieveFromData(data)) { + if (throwOnMissing) { + // For UDP, No good way to recover from missing data from a know type. + throw LoaderMsgErr(ERR_LOC, note + "static retrieve, incomplete data for type=" + + std::to_string((int)elemT) + " data:" + data.dumpStr()); + } + // For TCP, data can arrive later. + return nullptr; + + } + return msgElem; +} + + +MsgElement::Ptr MsgElement::create(char elementType) { + switch (elementType) { + case STRING_ELEM: + return std::make_shared(); + case UINT16_ELEM: + return std::make_shared(); + case UINT32_ELEM: + return std::make_shared(); + case UINT64_ELEM: + return std::make_shared(); + case NOTHING: + // Fallthrough + default: + throw LoaderMsgErr(ERR_LOC, "MsgElement::create Unexpected type " + + std::to_string(elementType)); + } +} + + +// Returns data pointer after inserted string. +bool StringElement::appendToData(BufferUdp& data) { + auto len = element.length(); + auto sz = sizeof(S_LEN_TYPE); + auto totalLength = len + sz + 1; // string, length of string, data type. + if (not data.isAppendSafe(totalLength)) { + LOGS(_log, LOG_LVL_INFO, "StringElement append makes data too long total=" << totalLength << + " data.writeLen=" << data.getAvailableWriteLength() << + " max=" << data.getMaxLength()); + return false; + } + + // Insert type + _appendType(data); + + // Insert the length + S_LEN_TYPE lenLT = len; + S_LEN_TYPE netLen = htonl(lenLT); + + data.append(&netLen, sz); + + // Insert the string + if (not data.append(element.data(), len)) { + throw LoaderMsgErr(ERR_LOC, "StringElement append unexpectedly failed element=" + element + + " data=" + data.dumpStr()); + } + return true; + +} + + +bool StringElement::retrieveFromData(BufferUdp& data) { + // Get the length. + S_LEN_TYPE netLen; + if (not data.retrieve(&netLen, sizeof(S_LEN_TYPE))) { + LOGS(_log, LOG_LVL_WARN, "retrieveFromData failed to retrieve length"); + return false; + } + S_LEN_TYPE len = ntohl(netLen); + + // Get the string. + bool res = data.retrieveString(element, len); + return res; +} + + +bool StringElement::compare(StringElement* other, std::ostream& os) { + bool equal = true; + os << "compare "; + if (other == nullptr) { + os << "other is nullptr"; + return false; + } + + os << "len("; + if (element.length() == other->element.length()) { + os << "eq " << element.length(); + } else { + os << "!! " << element.length() << "<>" << other->element.length(); + equal = false; + } + os << ")"; + + auto iterT = element.begin(); + auto endT = element.end(); + auto iterO = other->element.begin(); + auto endO = other->element.end(); + int pos = 0; + int errCount = 0; + for (;iterT != endT && iterO != endO; iterT++, iterO++) { + if (*iterT != *iterO) { + os << "\n !! pos=" << pos << " T=" << std::hex << (int)*iterT; + os << " O=" << std::hex << (int)*iterO; + if (++errCount > 5) { + os << "\n stopping after 5 errors"; + break; + } + } + ++pos; + } + os << "\n pos=" << pos; + if (iterT != endT) { + os << "\n this did not reach the end."; + equal = false; + } + if (iterO != endO) { + os << "\n other did not reach the end."; + equal = false; + } + + os << "\n equal=" << equal; + return equal; +} + + +}}} // namespace lsst::qserv::loader diff --git a/core/modules/loader/MsgElement.h b/core/modules/loader/MsgElement.h new file mode 100644 index 0000000000..5ab059e946 --- /dev/null +++ b/core/modules/loader/MsgElement.h @@ -0,0 +1,309 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_MSGELEMENT_H +#define LSST_QSERV_LOADER_MSGELEMENT_H + +// system headers +#include +#include +#include +#include + +// Qserv headers +#include "loader/BufferUdp.h" +#include "proto/ProtoImporter.h" +#include "util/Issue.h" + +#define MAX_MSG_STRING_LENGTH 5000 + +namespace lsst { +namespace qserv { +namespace loader { + +/// Class for throwing communication/parsing exceptions. +class LoaderMsgErr : public util::Issue { +public: + LoaderMsgErr(util::Issue::Context const& ctx, std::string const& message) : + util::Issue(ctx, message) {} +}; + +/// Base class for message elements. It include methods for appending or retrieving +/// the different types of MsgElements from BufferUdp objects. +/// Parsing and communication errors may throw LoaderMsgErr. +class MsgElement { +public: + using Ptr = std::shared_ptr; + enum ElementType { + NOTHING = 0, + STRING_ELEM = 1, + UINT16_ELEM = 2, + UINT32_ELEM = 3, + UINT64_ELEM = 4 + }; + + explicit MsgElement(char elementType) : _elementType(elementType) {} + MsgElement() = delete; + MsgElement(MsgElement const&) = delete; + MsgElement& operator=(MsgElement const&) = delete; + virtual ~MsgElement() = default; + + /// This method appends the data in the MsgElement to 'data'. + /// Pointers in 'data are updated appropriately. + virtual bool appendToData(BufferUdp& data)=0; + + /// This method retrieves a single MsgElement from 'data'. Pointers + /// in 'data' are moved to where the next MsgElement should be located. + /// @return true if a MsgElement could be read safely, otherwise + /// it returns false. + virtual bool retrieveFromData(BufferUdp &data)=0; + + /// Return the TRANSMITTED size of the element. For StringElement, this is not know until + /// the string has been constructed. Numeric elements have constant size. + virtual size_t transmitSize() const =0; + + /// Create the correct MsgElement child class for 'elemenetType' + static MsgElement::Ptr create(char elementType); + + /// Retrieve the type of the element in 'data' and put the type in 'elemType'. + /// The value in 'elemType' is only valid if the method returns true. Pointers + /// in 'data' are moved appropriately. + /// @return - True if a type could be retrieved from 'data'. False otherwise. + static bool retrieveType(BufferUdp &data, char& elemType); + + /// Retrieve a MsgElement from 'data' and return it. Pointers in 'data' + /// are updated appropriately. + /// if 'throwOnMissing' is true it will throw an error on missing data + /// if the type is known. For UDP messages, it should be all or nothing, + /// so a type without data indicates an error. This is not the case with TCP. + /// @return a pointer to the MsgElement retrieved or nullptr if + /// no MsgElement could be retrieved. + static MsgElement::Ptr retrieve(BufferUdp& data, std::string const& note, bool throwOnMissing=true); + + /// @return True if 'a' and 'b' are equivalent. False otherwise. + static bool equal(MsgElement* a, MsgElement* b) { + if (a == b) return true; + if (a == nullptr || b == nullptr) return false; + if (a->_elementType != b->_elementType) return false; + if (a->_elementType == NOTHING) return true; + return a->equal(b); + } + + /// @return the type of this MsgElement. + char getElementType() const { return _elementType; } + + /// @return a string that is a good representation of what is in the MsgElement. + static std::string getStringVal(MsgElement::Ptr const& msgElem) { + if (msgElem == nullptr) return std::string("nullptr"); + return msgElem->getStringVal(); + } + + /// @return a string that is a good representation of what is in the MsgElement. + virtual std::string getStringVal()=0; + + /// @return true if this MsgElement is equivalent to 'other' + virtual bool equal(MsgElement* other)=0; + + /// @return the size of the base class data members in bytes. + size_t sizeOfBase() const { return sizeof(_elementType); } + +protected: + /// Append _elementType to 'data' advancing pointers appropriately. + /// @return true if _elementType was successfully appended. + bool _appendType(BufferUdp &data) const { + return data.append(&_elementType, sizeof(_elementType)); + } + + /// Get the type from from 'data'. + /// @return a pointer to the byte immediately after the type information in 'data'. + const char* _retrieveType(const char* data) { + _elementType = *data; + const char* ptr = data + 1; + return ptr; + } + +private: + char _elementType{NOTHING}; ///< The type of this MsgElement. +}; + + +/// Generic numeric type for network transmission. The class provides +/// big<->little endian conversion for network transfers as well as +/// definitions for the virtual functions in MsgElement. +template ::value, T>::type> +class NumElement : public MsgElement { +public: + explicit NumElement(char myType) : MsgElement(myType) {} + NumElement(char myType, T element_) : MsgElement(myType), element(element_) {} + NumElement() = delete; + NumElement(NumElement const&) = delete; + NumElement& operator=(NumElement const&) = delete; + + bool appendToData(BufferUdp &data) override { + if (_appendType(data)) { + T netElem = changeEndianessOnLittleEndianOnly(element); + return data.append(&netElem, _sizeT); + } + return false; + } + + bool retrieveFromData(BufferUdp &data) override { + T netElem; + if (data.retrieve(&netElem, sizeof(T))) { + element = changeEndianessOnLittleEndianOnly(netElem); + return true; + } + return false; + } + + size_t transmitSize() const override { + return _sizeT + sizeOfBase(); + } + + std::string getStringVal() override { return std::to_string(element); } + + T element{0}; ///< The actual numeric value of this MsgElement. + + /// This function will change endianess only on little endian machines. + /// It is effectively a no-op on big endian machines. + T changeEndianessOnLittleEndianOnly(T const& in) { + uint8_t data[_sizeT]; + memcpy(&data, &in, _sizeT); + T res = 0; + int shift = 0; + int pos = _sizeT -1; + for (size_t j=0; j < _sizeT; ++j) { + res |= static_cast(data[pos]) << shift; + shift += 8; + --pos; + } + return res; + } + + bool equal(MsgElement* other) override { + NumElement* ptr = dynamic_cast*>(other); + if (ptr == nullptr) return false; + return (element == ptr->element); + } + +private: + static const size_t _sizeT{sizeof(T)}; ///< Size of the numeric type in bytes. +}; + + +class UInt16Element : public NumElement { +public: + using Ptr = std::shared_ptr; + static const int MYTYPE = UINT16_ELEM; + + UInt16Element() : NumElement(MYTYPE) {} + explicit UInt16Element(uint16_t element_) : NumElement(MYTYPE, element_) {} + UInt16Element(UInt16Element const&) = delete; + UInt16Element& operator=(UInt16Element const&) = delete; +}; + + +class UInt32Element : public NumElement { +public: + using Ptr = std::shared_ptr; + static const int MYTYPE = UINT32_ELEM; + + UInt32Element() : NumElement(MYTYPE) {} + explicit UInt32Element(uint32_t element_) : NumElement(MYTYPE, element_) {} + UInt32Element(UInt32Element const&) = delete; + UInt32Element& operator=(UInt32Element const&) = delete; +}; + + +class UInt64Element : public NumElement { +public: + using Ptr = std::shared_ptr; + static const int MYTYPE = UINT64_ELEM; + + UInt64Element() : NumElement(MYTYPE) {} + explicit UInt64Element(uint64_t element_) : NumElement(MYTYPE, element_) {} + UInt64Element(UInt64Element const&) = delete; + UInt64Element& operator=(UInt64Element const&) = delete; +}; + + +// Using uint32_t (LEN_TYPE) for length in all cases. +class StringElement : public MsgElement { +public: + using Ptr = std::shared_ptr; + using UPtr = std::unique_ptr; + static const int MYTYPE = STRING_ELEM; + typedef uint32_t S_LEN_TYPE; // If this type changes, so must associated htonl and ntohl calls. + + StringElement(std::string const& element_) : MsgElement(MYTYPE), element(element_) {} + StringElement() : MsgElement(MYTYPE) {} + StringElement(StringElement const&) = delete; + StringElement& operator=(StringElement const&) = delete; + + ~StringElement() override = default; + + bool appendToData(BufferUdp& data) override; + bool retrieveFromData(BufferUdp& data) override; + std::string getStringVal() override { return element; } + + std::string element; + + bool equal(MsgElement* other) override { + StringElement* ptr = dynamic_cast(other); + if (ptr == nullptr) { return false; } + return (ptr->element == ptr->element); + } + + /// @return true if this element is equal to other. + /// 'os' is filled with a description of the comparison. + bool compare(StringElement* other, std::ostream& os); + + /// The size of StringElement changes! + size_t transmitSize() const override { + // char in string, variable to transmit string length, size of base class. + return element.size()+ sizeof(S_LEN_TYPE) + sizeOfBase(); + } + + template + std::unique_ptr protoParse() { + std::unique_ptr protoItem(new T()); + bool success = proto::ProtoImporter::setMsgFrom(*protoItem, element.data(), element.length()); + if (not success) { + return nullptr; + } + return protoItem; + } + + /// This function is only usable if it is know that all data has been read from the socket. + /// This the case with UDP, and boost asio async reads that return after X bytes read. + template + static std::unique_ptr protoParse(BufferUdp& data) { + StringElement::Ptr itemData = std::dynamic_pointer_cast(MsgElement::retrieve(data, "protoParse")); + if (itemData == nullptr) { return nullptr; } + return itemData->protoParse(); + } +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_MSGELEMENT_H diff --git a/core/modules/loader/Neighbor.cc b/core/modules/loader/Neighbor.cc new file mode 100644 index 0000000000..455d50d0de --- /dev/null +++ b/core/modules/loader/Neighbor.cc @@ -0,0 +1,56 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ + +// Class header +#include "loader/Neighbor.h" + +// System headers + +// Third-party headers + +// Qserv headers + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.Neighbor"); +} + +namespace lsst { +namespace qserv { +namespace loader { + +void Neighbor::setId(uint32_t id) { + std::lock_guard lck(_nMtx); + if (_id != id) { + LOGS(_log, LOG_LVL_INFO, getTypeStr() << "Neighbor changing id from(" << _id <<") to(" << id << ")"); + _established = false; + _addressTcp.reset(new NetworkAddress("", -1)); + } + _id = id; +} + + +}}} // namespace lsst::qserv::loader diff --git a/core/modules/loader/Neighbor.h b/core/modules/loader/Neighbor.h new file mode 100644 index 0000000000..aa7909d528 --- /dev/null +++ b/core/modules/loader/Neighbor.h @@ -0,0 +1,127 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_NEIGHBOR_H +#define LSST_QSERV_LOADER_NEIGHBOR_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/Central.h" + + +namespace lsst { +namespace qserv { +namespace loader { + + +/// Class to describe one of a worker's neighbors. +class Neighbor { +public: + enum Type { + LEFT = 1, + RIGHT = 2 + }; + + Neighbor() = delete; + explicit Neighbor(Type t) : _type(t) {} + + std::string getTypeStr() { return _type == LEFT ? "LEFT" : "RIGHT"; } + + void setAddressTcp(std::string const& hostName, int port) { + std::lock_guard lck(_nMtx); + _addressTcp.reset(new NetworkAddress(hostName, port)); + } + + void setAddressTcp(NetworkAddress const& addr) { + std::lock_guard lck(_nMtx); + _addressTcp.reset(new NetworkAddress(addr)); + } + + NetworkAddress getAddressTcp() { + std::lock_guard lck(_nMtx); + return *_addressTcp; + } + + void setAddressUdp(std::string const& hostName, int port) { + std::lock_guard lck(_nMtx); + _addressUdp.reset(new NetworkAddress(hostName, port)); + } + + void setAddressUdp(NetworkAddress const& addr) { + std::lock_guard lck(_nMtx); + _addressUdp.reset(new NetworkAddress(addr)); + } + + NetworkAddress getAddressUdp() { + std::lock_guard lck(_nMtx); + return *_addressUdp; + } + + void setId(uint32_t id); + uint32_t getId() const { return _id; } + + void setEstablished(bool val) { + std::lock_guard lck(_nMtx); + _established = val; + } + + void setKeyCount(int count) { + std::lock_guard lck(_nMtx); + _keyCount = count; + } + + void setRange(KeyRange const& range) { + std::lock_guard lck(_nMtx); + _strRange = range; + } + + void getKeyData(int& keyCount, KeyRange& range) { + std::lock_guard lck(_nMtx); + keyCount = _keyCount; + range = _strRange; + } + + + bool getEstablished() const { return _established; } + +private: + NetworkAddress::UPtr _addressTcp{new NetworkAddress("", -1)}; + NetworkAddress::UPtr _addressUdp{new NetworkAddress("", -1)}; + uint32_t _id{0}; ///< Id of neighbor, 0 means no neighbor. + bool _established{false}; + std::mutex _nMtx; + Type _type; + int _keyCount{0}; + KeyRange _strRange; +}; + +}}} // namespace lsst::qserv::loader + + +#endif // LSST_QSERV_LOADER_NEIGHBOR_H diff --git a/core/modules/loader/NetworkAddress.cc b/core/modules/loader/NetworkAddress.cc new file mode 100644 index 0000000000..db31494545 --- /dev/null +++ b/core/modules/loader/NetworkAddress.cc @@ -0,0 +1,78 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/NetworkAddress.h" + +// System headers +#include + +// Third-party headers + +// Qserv headers +#include "loader/LoaderMsg.h" +#include "loader/LoaderMsg.h" +#include "proto/ProtoImporter.h" +#include "proto/loader.pb.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.NetworkAddress"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +NetworkAddress::UPtr NetworkAddress::create(BufferUdp::Ptr const& bufData, int& tcpPort, std::string const& note) { + + StringElement::Ptr data = std::dynamic_pointer_cast(MsgElement::retrieve(*bufData, "NetworkAddress::create")); + + if (data == nullptr) { + LOGS(_log, LOG_LVL_WARN, "NetworkAddress::create data==nullptr " + note); + return nullptr; + } + + auto addr = data->protoParse(); + if (addr == nullptr) { + LOGS(_log, LOG_LVL_WARN, "NetworkAddress::create STATUS_PARSE_ERR in " + note); + } + + UPtr netAddr(new NetworkAddress(addr->ip(), addr->udpport())); + tcpPort = addr->tcpport(); + return netAddr; +} + + +std::ostream& operator<<(std::ostream& os, NetworkAddress const& adr) { + os << "ip(" << adr.ip << ":" << adr.port << ")"; + return os; +} + +}}} // namespace lsst::qserv::loader + + diff --git a/core/modules/loader/NetworkAddress.h b/core/modules/loader/NetworkAddress.h new file mode 100644 index 0000000000..ad656ed8d0 --- /dev/null +++ b/core/modules/loader/NetworkAddress.h @@ -0,0 +1,114 @@ + +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_NETWORKADDRESS_H +#define LSST_QSERV_LOADER_NETWORKADDRESS_H + +// system headers +#include +#include + +// Qserv headers +#include "util/ThreadPool.h" +#include "loader/BufferUdp.h" + +namespace lsst { +namespace qserv { +namespace loader { + +class StringElement; + +/// Comparable network addresses. +/// The member variables are immutable as these will be used as keys in std::map. +struct NetworkAddress { + using Ptr = std::shared_ptr; + using UPtr = std::unique_ptr; + + const std::string ip; + const int port; + + NetworkAddress(std::string const& ip_, int port_) : ip(ip_), port(port_) {} + NetworkAddress() = delete; + NetworkAddress(NetworkAddress const&) = default; + + static UPtr create(BufferUdp::Ptr const& bufData, int& tcpPort, std::string const& note); + + bool operator==(NetworkAddress const& other) const { + return (port == other.port && ip == other.ip); + } + + bool operator!=(NetworkAddress const& other) const { + return !(*this == other); + } + + bool operator<(NetworkAddress const& other) const { + auto compRes = ip.compare(other.ip); + if (compRes < 0) { return true; } + if (compRes > 0) { return false; } + return port < other.port; + } + + bool operator>(NetworkAddress const& other) const { + return (other < *this); + } + + friend std::ostream& operator<<(std::ostream& os, NetworkAddress const& adr); +}; + + +/// This class is used to create latched NetworkAddress's. These are addresses +/// that will never change after they have been set and are thread safe. +class NetworkAddressLatch { +public: + NetworkAddressLatch() = default; + NetworkAddressLatch(NetworkAddressLatch const&) = delete; + NetworkAddressLatch& operator=(NetworkAddressLatch const&) = delete; + + ~NetworkAddressLatch() = default; + + NetworkAddress getAddress() const { + if (_valid) return *_address; + return NetworkAddress("", 0); + } + + /// Set the address to 'addr'. This can only be done once, + /// so 'addr' needs to be correct. + /// @return true if the address was set to 'addr' and 'addr' was valid. + bool setAddress(NetworkAddress const& addr) { + if (addr.ip.empty()) return false; + if (_valid) return false; + _address.reset(new NetworkAddress(addr)); + _valid = true; // must be set after address is set. + return true; + } + +private: + std::atomic _valid{false}; ///< Indicates the _address is valid for use when true. + NetworkAddress::UPtr _address{new NetworkAddress("",0)}; ///< empty string indicates address invalid. +}; + +}}} // namespace lsst::qserv::loader + + +#endif // LSST_QSERV_LOADER_NETWORKADDRESS_H diff --git a/core/modules/loader/SConscript b/core/modules/loader/SConscript new file mode 100644 index 0000000000..c2c1f93e1f --- /dev/null +++ b/core/modules/loader/SConscript @@ -0,0 +1,31 @@ +# -*- python -*- +Import('env') +Import('standardModule') + +import os.path + +# Harvest special binary products - files starting with the package's name +# followed by underscore: +# +# qserv-.cc +# + +bin_cc_files = {} +path = "./" +for f in env.Glob(os.path.join(path, "app*.cc"), source=True, strings=True): + print(f) + bin_cc_files[f] = [ + "loader", + "qserv_common", + "util", + "protobuf", + "boost_filesystem", + "boost_system", + "log", + "log4cxx"] + +# Initiate the standard sequence of actions for this module by excluding +# the above discovered binary sources + +standardModule(env, bin_cc_files=bin_cc_files, unit_tests="") + diff --git a/core/modules/loader/ServerTcpBase.cc b/core/modules/loader/ServerTcpBase.cc new file mode 100644 index 0000000000..0d5b996284 --- /dev/null +++ b/core/modules/loader/ServerTcpBase.cc @@ -0,0 +1,648 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/ServerTcpBase.h" + +// System headers +#include +#include + +// Third-party headers + + +// qserv headers +#include "loader/CentralWorker.h" +#include "loader/LoaderMsg.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.ServerTcpBase"); + +const int testNewNodeName = 73; // TODO Get rid of this, possibly make NodeName member of ServerTCPBase +unsigned int testNewNodeValuePairCount = 81; +const int testOldNodeName = 42; // TODO Get rid of this, possibly make NodeName member of ServerTCPBase +unsigned int testOldNodeKeyCount = 1231; +} + +namespace lsst { +namespace qserv { +namespace loader { + + +void ServerTcpBase::_startAccept() { + TcpBaseConnection::Ptr newConnection = + TcpBaseConnection::create(_acceptor.get_executor().context(), this); + + auto handleAcceptFunc = [this, newConnection](const boost::system::error_code& error) { + if (!error) { + _connections.insert(newConnection); + newConnection->start(); + } + _startAccept(); + }; + _acceptor.async_accept(newConnection->socket(), handleAcceptFunc); +} + + +bool ServerTcpBase::writeData(AsioTcp::socket& socket, BufferUdp& data) { + while (data.getBytesLeftToRead() > 0) { + // Read cursor advances (manually in this case) as data is read from the buffer. + auto res = boost::asio::write(socket, + boost::asio::buffer(data.getReadCursor(), data.getBytesLeftToRead())); + data.advanceReadCursor(res); + } + return true; +} + + +uint32_t ServerTcpBase::getOurName() { + return (_centralWorker == nullptr) ? 0 : _centralWorker->getOurId(); +} + + +bool ServerTcpBase::testConnect() { + try + { + LOGS(_log, LOG_LVL_INFO, "ServerTcpBase::testConnect 1"); + boost::asio::io_context io_context; + + AsioTcp::resolver resolver(io_context); + AsioTcp::resolver::results_type endpoints = resolver.resolve("127.0.0.1", std::to_string(_port)); + + AsioTcp::socket socket(io_context); + boost::asio::connect(socket, endpoints); + + + // Get name from server + BufferUdp data(500); + auto msgElem = data.readFromSocket(socket, "ServerTcpBase::testConnect"); + // First element should be UInt32Element with the other worker's name + UInt32Element::Ptr nghName = std::dynamic_pointer_cast(msgElem); + if (nghName == nullptr) { + throw LoaderMsgErr(ERR_LOC, "testConnect() first element wasn't correct type " + + msgElem->getStringVal()); + } + + LOGS(_log, LOG_LVL_INFO, "server name=" << nghName->element); + + data.reset(); + UInt32Element kind(LoaderMsg::TEST); + kind.appendToData(data); + UInt32Element bytes(1234); // dummy value + bytes.appendToData(data); + writeData(socket, data); + + // send back our name and left neighbor message. + data.reset(); + UInt32Element imRightKind(LoaderMsg::IM_YOUR_R_NEIGHBOR); + imRightKind.appendToData(data); + UInt32Element ourName(testNewNodeName); + ourName.appendToData(data); + UInt64Element valuePairCount(testNewNodeValuePairCount); + valuePairCount.appendToData(data); + writeData(socket, data); + + // Get back left neighbor information + auto msgKind = std::dynamic_pointer_cast( + data.readFromSocket(socket, "testConnect 2 kind")); + auto msgLNName = std::dynamic_pointer_cast( + data.readFromSocket(socket, "testConnect 2 LNName")); + auto msgLKeyCount = std::dynamic_pointer_cast( + data.readFromSocket(socket, "testConnect 2 LKeyCount")); + if (msgKind == nullptr || msgLNName == nullptr || msgLKeyCount == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "ServerTcpBase::testConnect 2 - nullptr" << + " msgKind=" << (msgKind ? "ok" : "null") << + " msgLNName=" << (msgLNName ? "ok" : "null") << + " msgLKeyCount=" << (msgLKeyCount ? "ok" : "null")); + return false; + } + + if (msgKind->element != LoaderMsg::IM_YOUR_L_NEIGHBOR || + msgLNName->element != testOldNodeName || + msgLKeyCount->element != testOldNodeKeyCount) { + LOGS(_log, LOG_LVL_ERROR, "ServerTcpBase::testConnect 2 - incorrect data" << + " Kind=" << msgKind->element << + " LNName=" << msgLNName->element << + " LKeyCount=" << msgLKeyCount->element); + return false; + } + LOGS(_log, LOG_LVL_INFO, "ServerTcpBase::testConnect 2 - ok data" << + " Kind=" << msgKind->element << + " LNName=" << msgLNName->element << + " LKeyCount=" << msgLKeyCount->element); + + data.reset(); + UInt32Element verified(LoaderMsg::NEIGHBOR_VERIFIED); + verified.appendToData(data); + writeData(socket, data); + + boost::system::error_code ec; + socket.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ec); + if (ec) { + LOGS(_log, LOG_LVL_ERROR, "ServerTcpBase::testConnect shutdown ec=" << ec.message()); + return false; + } + // socket.close(); socket should close when it falls out of scope. + } + catch (std::exception const& e) { + std::cerr << e.what() << std::endl; + return false; + } + + return true; +} + + +void TcpBaseConnection::start() { + uint32_t ourName = _serverTcpBase->getOurName(); + UInt32Element name(ourName); + name.appendToData(_buf); + auto self = shared_from_this(); + boost::asio::async_write(_socket, boost::asio::buffer(_buf.getReadCursor(), _buf.getBytesLeftToRead()), + [self](boost::system::error_code const& error, size_t bytesTransferred) { + self->_readKind(error, bytesTransferred); + } + ); +} + + +void TcpBaseConnection::shutdown() { + boost::system::error_code ec; + _socket.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ec); + _socket.close(); +} + + +void TcpBaseConnection::_freeConnect() { + auto centralW = _serverTcpBase->getCentralWorker(); + if (centralW != nullptr) { + centralW->cancelShiftsWithLeftNeighbor(); + } + _serverTcpBase->freeConnection(shared_from_this()); +} + + +/// Find out what KIND of message is coming in. +void TcpBaseConnection::_readKind(boost::system::error_code const&, size_t /*bytes_transferred*/) { + _buf.reset(); + + UInt32Element elem; + size_t const bytes = 2*elem.transmitSize(); // uint32 for kind + uint32 for length of message + + if (bytes > _buf.getAvailableWriteLength()) { + LOGS(_log, LOG_LVL_ERROR, "_readKind Buffer would have overflowed"); + _freeConnect(); + return; + } + + LOGS(_log, LOG_LVL_DEBUG, "TcpBaseConnection::_readKind _recvKind reset _buf=" << _buf.dumpStr()); + auto self = shared_from_this(); + boost::asio::async_read(_socket, boost::asio::buffer(_buf.getWriteCursor(), bytes), + boost::asio::transfer_at_least(bytes), + [self](const boost::system::error_code& ec, size_t bytesTrans) { + self->_recvKind(ec, bytesTrans); + } + ); +} + + +void TcpBaseConnection::_recvKind(const boost::system::error_code& ec, size_t bytesTrans) { + if (ec) { + LOGS(_log, LOG_LVL_ERROR, "_recvKind ec=" << ec); + _freeConnect(); + return; + } + // Fix the buffer with the information given. + _buf.advanceWriteCursor(bytesTrans); + auto msgElem = MsgElement::retrieve(_buf, "1TcpBaseConnection::_recvKind"); + auto msgKind = std::dynamic_pointer_cast(msgElem); + if (msgKind == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "_recvKind unexpected type of msg"); + _freeConnect(); + return; + } + msgElem = MsgElement::retrieve(_buf, "2TcpBaseConnection::_recvKind"); + auto msgBytes = std::dynamic_pointer_cast(msgElem); + if (msgBytes == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "_recvKind missing bytes"); + _freeConnect(); + return; + } + LOGS(_log, LOG_LVL_INFO, "_recvKind kind=" << msgKind->element << " bytes=" << msgBytes->element); + switch (msgKind->element) { + case LoaderMsg::IM_YOUR_L_NEIGHBOR: + LOGS(_log, LOG_LVL_INFO, "_recvKind IM_YOUR_L_NEIGHBOR"); + _handleImYourLNeighbor(msgBytes->element); + break; + case LoaderMsg::SHIFT_TO_RIGHT: + LOGS(_log, LOG_LVL_INFO, "_recvKind SHIFT_TO_RIGHT our left neighbor is shifting to us"); + _handleShiftToRight(msgBytes->element); + break; + case LoaderMsg::SHIFT_FROM_RIGHT: + LOGS(_log, LOG_LVL_INFO, + "_recvKind SHIFT_FROM_RIGHT our left neighbor needs keys shifted from this"); + _handleShiftFromRight(msgBytes->element); + break; + case LoaderMsg::TEST: + LOGS(_log, LOG_LVL_INFO, "_recvKind TEST"); + _handleTest(); + break; + default: + LOGS(_log, LOG_LVL_ERROR, "_recvKind unexpected kind=" << msgKind->element); + _freeConnect(); + } +} + + +void TcpBaseConnection::_handleTest() { + _buf.reset(); + + UInt32Element kind; + UInt32Element rNName; + UInt64Element valuePairCount; + size_t bytes = kind.transmitSize() + rNName.transmitSize() + valuePairCount.transmitSize(); + + if (bytes > _buf.getAvailableWriteLength()) { + LOGS(_log, LOG_LVL_ERROR, "_handleTest Buffer would have overflowed"); + _freeConnect(); + return; + } + auto self = shared_from_this(); + boost::asio::async_read(_socket, boost::asio::buffer(_buf.getWriteCursor(), bytes), + boost::asio::transfer_at_least(bytes), + [self](const boost::system::error_code& ec, size_t bytesTrans) { + self->_handleTest2(ec, bytesTrans); + } + ); +} + + +void TcpBaseConnection::_handleTest2(const boost::system::error_code& ec, size_t bytesTrans) { + if (ec) { + LOGS(_log, LOG_LVL_ERROR, "_recvKind ec=" << ec); + _freeConnect(); + return; + } + // Fix the buffer with the information given. + _buf.advanceWriteCursor(bytesTrans); + auto msgElem = MsgElement::retrieve(_buf, "_handleTest2_a"); + auto msgKind = std::dynamic_pointer_cast(msgElem); + msgElem = MsgElement::retrieve(_buf, "_handleTest2_b"); + auto msgName = std::dynamic_pointer_cast(msgElem); + msgElem = MsgElement::retrieve(_buf, " _handleTest2_c"); + auto msgKeys = std::dynamic_pointer_cast(msgElem); + + // TODO move most of this to CentralWorker + // test that this is the neighbor that was expected. + if (msgKind->element != LoaderMsg::IM_YOUR_R_NEIGHBOR || + msgName->element != testNewNodeName || + msgKeys->element != testNewNodeValuePairCount) { + LOGS(_log, LOG_LVL_ERROR, "_handleTest2 unexpected element or name" << + " kind=" << msgKind->element << " msgName=" << msgName->element << + " keys=" << msgKeys->element); + _freeConnect(); + return; + } else { + LOGS(_log, LOG_LVL_INFO, "_handleTest2 kind=" << msgKind->element << " msgName=" + << msgName->element << " keys=" << msgKeys->element); + } + + // send im_left_neighbor message, how many elements we have. If it had zero elements, an element will be sent + // so that new neighbor gets a range. + _buf.reset(); + // build the protobuffer + msgKind = std::make_shared(LoaderMsg::IM_YOUR_L_NEIGHBOR); + msgKind->appendToData(_buf); + UInt32Element ourName(testOldNodeName); + ourName.appendToData(_buf); + UInt64Element keyCount(testOldNodeKeyCount); + keyCount.appendToData(_buf); + auto self = shared_from_this(); + boost::asio::async_write(_socket, boost::asio::buffer(_buf.getReadCursor(), _buf.getBytesLeftToRead()), + [self](const boost::system::error_code& ec, size_t bytesTrans) { + self->_handleTest2b(ec, bytesTrans); + } + ); +} + + +void TcpBaseConnection::_handleTest2b(const boost::system::error_code& ec, size_t bytesTrans) { + UInt32Element kind; + size_t bytes = kind.transmitSize(); + _buf.reset(); + auto self = shared_from_this(); + boost::asio::async_read(_socket, boost::asio::buffer(_buf.getWriteCursor(), bytes), + boost::asio::transfer_at_least(bytes), + [self](const boost::system::error_code& ec, size_t bytesTrans) { + self->_handleTest2c(ec, bytesTrans); + } + ); +} + + +void TcpBaseConnection::_handleTest2c(const boost::system::error_code& ec, size_t bytesTrans) { + if (ec) { + LOGS(_log, LOG_LVL_ERROR, "_recvKind ec=" << ec); + _freeConnect(); + return; + } + // Fix the buffer with the information given. + _buf.advanceWriteCursor(bytesTrans); + auto msgElem = MsgElement::retrieve(_buf, "_handleTest2c"); + if (msgElem == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "_handleTest2b Kind nullptr error"); + _freeConnect(); + return; + } + auto msgKind = std::dynamic_pointer_cast(msgElem); + if (msgKind != nullptr && msgKind->element != LoaderMsg::NEIGHBOR_VERIFIED) { + LOGS(_log, LOG_LVL_ERROR, "_handleTest2b NEIGHBOR_VERIFIED error" << + " kind=" << msgKind->element); + _freeConnect(); + return; + } + LOGS(_log, LOG_LVL_INFO, "TcpBaseConnection::_handleTest SUCCESS"); + _freeConnect(); // Close the connection at the end of the test. +} + + +void TcpBaseConnection::_handleImYourLNeighbor(uint32_t bytesInMsg) { + // Need to figure out the difference between bytes read and bytes in _buf + if (bytesInMsg > _buf.getAvailableWriteLength()) { + LOGS(_log, LOG_LVL_ERROR, "_handleImYourLNeighbor Buffer would have overflowed"); + _freeConnect(); + return; + } + LOGS(_log, LOG_LVL_INFO, "_handleImYourLNeighbor bytes=" << bytesInMsg << + " buf=" << _buf.dumpStr(false)); + auto self = shared_from_this(); + boost::asio::async_read(_socket, boost::asio::buffer(_buf.getWriteCursor(), bytesInMsg), + boost::asio::transfer_at_least(bytesInMsg), + [self](boost::system::error_code const& ec, size_t bytesTrans) { + self->_handleImYourLNeighbor1(ec, bytesTrans); + } + ); +} + +void TcpBaseConnection::_handleImYourLNeighbor1(boost::system::error_code const& ec, size_t bytesTrans) { + std::string const funcName = "_handleImYourLNeighbor1"; + if (ec) { + LOGS(_log, LOG_LVL_ERROR, funcName << " ec=" << ec); + _freeConnect(); + return; + } + // Fix the buffer with the information given. + _buf.advanceWriteCursor(bytesTrans); + LOGS(_log, LOG_LVL_INFO, funcName << " bytes=" << bytesTrans << " _buf" << _buf.dumpStr(false)); + try { + // TODO move as much of this to CentralWorker as possible + // Parse left neighbor's key and range information. + LOGS(_log, LOG_LVL_INFO, funcName << " parsing bytes=" << bytesTrans << + " _buf" << _buf.dumpStr(false)); + auto protoItem = StringElement::protoParse(_buf); + if (protoItem == nullptr) { + throw LoaderMsgErr(ERR_LOC, "protoItem nullptr"); + } + NeighborsInfo nInfo; + auto workerName = protoItem->wid(); + nInfo.keyCount = protoItem->mapsize(); + nInfo.recentAdds = protoItem->recentadds(); + proto::WorkerRange protoRange = protoItem->range(); + LOGS(_log, LOG_LVL_INFO, funcName << " WorkerKeysInfo name=" << workerName << + " keyCount=" << nInfo.keyCount << " recentAdds=" << nInfo.recentAdds); + bool valid = protoRange.valid(); + KeyRange leftRange; + KeyRange newLeftRange; + if (valid) { + CompositeKey minKey(protoRange.minint(), protoRange.minstr()); + CompositeKey maxKey(protoRange.maxint(), protoRange.maxstr()); + bool unlimited = protoRange.maxunlimited(); + leftRange.setMinMax(minKey, maxKey, unlimited); + LOGS(_log, LOG_LVL_INFO, funcName << " leftRange=" << leftRange); + newLeftRange = _serverTcpBase->getCentralWorker()->updateRangeWithLeftData(leftRange); + } + proto::Neighbor protoLeftNeigh = protoItem->left(); + nInfo.neighborLeft->update(protoLeftNeigh.wid()); // Not really useful in this case. + proto::Neighbor protoRightNeigh = protoItem->right(); + nInfo.neighborRight->update(protoRightNeigh.wid()); // This should be our name + if (nInfo.neighborRight->get() != _serverTcpBase->getOurName()) { + LOGS(_log, LOG_LVL_ERROR, "Our (" << _serverTcpBase->getOurName() << + ") left neighbor does not have our name as its right neighbor" ); + } + + _serverTcpBase->getCentralWorker()->setNeighborInfoLeft(workerName, nInfo.keyCount, newLeftRange); + + // Need to send our range and key count back to left neighbor so it + // can figure out what to do with its range. + _buf.reset(); + StringElement strWKI; + std::unique_ptr protoWKI = + _serverTcpBase->getCentralWorker()->workerKeysInfoBuilder(); + protoWKI->SerializeToString(&(strWKI.element)); + UInt32Element bytesInMsg(strWKI.transmitSize()); + // Send the number of bytes in the message so TCP client knows how many bytes to read. + bytesInMsg.appendToData(_buf); + strWKI.appendToData(_buf); + ServerTcpBase::writeData(_socket, _buf); + LOGS(_log, LOG_LVL_INFO, funcName << " done"); + } catch (LoaderMsgErr const& ex) { + LOGS(_log, LOG_LVL_ERROR, funcName << " Buffer failed " << ex.what()); + _freeConnect(); + return; + } catch (boost::system::system_error const& ex) { + LOGS(_log, LOG_LVL_ERROR, funcName << " write failed " << ex.what()); + _freeConnect(); + return; + } + boost::system::error_code ecode; + _readKind(ecode, 0); // get next message TODO cleaner way to make this call? +} + + +// Our left neighbor is shifting key value pairs to this. +void TcpBaseConnection::_handleShiftToRight(uint32_t bytesInMsg) { + // Need to figure out the difference between bytes read and bytes in _buf + if (bytesInMsg > _buf.getAvailableWriteLength()) { + LOGS(_log, LOG_LVL_ERROR, "_handleShiftToRight Buffer would have overflowed bytes=" << bytesInMsg); + _freeConnect(); + return; + } + LOGS(_log, LOG_LVL_INFO, " _handleShiftToRight bytes=" << bytesInMsg << " buf=" << _buf.dumpStr(false)); + auto self = shared_from_this(); + boost::asio::async_read(_socket, boost::asio::buffer(_buf.getWriteCursor(), bytesInMsg), + boost::asio::transfer_at_least(bytesInMsg), + [self](boost::system::error_code const& ec, size_t bytesTrans) { + self->_handleShiftToRight1(ec, bytesTrans); + } + ); +} + + +void TcpBaseConnection::_handleShiftToRight1(boost::system::error_code const& ec, size_t bytesTrans) { + std::string const funcName = "_handleShiftToRight1"; + if (ec) { + LOGS(_log, LOG_LVL_ERROR, funcName << " ec=" << ec); + _freeConnect(); + return; + } + // Fix the buffer with the information given. + _buf.advanceWriteCursor(bytesTrans); + LOGS(_log, LOG_LVL_INFO, funcName << " bytes=" << bytesTrans << " _buf" << _buf.dumpStr(false)); + try { + // TODO move as much of this to CentralWorker as possible + LOGS(_log, LOG_LVL_INFO, funcName << " parsing bytes=" << bytesTrans << + " _buf" << _buf.dumpStr(false)); + auto protoKeyList = StringElement::protoParse(_buf); + if (protoKeyList == nullptr) { + throw LoaderMsgErr(ERR_LOC, "protoKeyList nullptr"); + } + // Extract key pairs from the protobuffer + int keyCount = protoKeyList->keycount(); // TODO delete keycount from KeyList + int sz = protoKeyList->keypair_size(); + if (keyCount != sz) { + LOGS(_log, LOG_LVL_WARN, funcName << " keyCount(" << keyCount << ") != sz(" << sz << ")"); + } + std::vector keyList; + for (int j=0; j < sz; ++j) { + proto::KeyInfo const& protoKI = protoKeyList->keypair(j); + ChunkSubchunk chSub(protoKI.chunk(), protoKI.subchunk()); + CompositeKey key(protoKI.keyint(), protoKI.keystr()); + keyList.push_back(std::make_pair(key, chSub)); + } + + // Now that the proto buffer was read without error, insert into map and adjust our range. + _serverTcpBase->getCentralWorker()->insertKeys(keyList, true); + + // Send the SHIFT_TO_RIGHT_KEYS_RECEIVED response back. + _buf.reset(); + UInt32Element elem(LoaderMsg::SHIFT_TO_RIGHT_RECEIVED); + elem.appendToData(_buf); + ServerTcpBase::writeData(_socket, _buf); + LOGS(_log, LOG_LVL_INFO, funcName << " done dumpKeys " << + _serverTcpBase->getCentralWorker()->dumpKeysStr(2)); + } catch (LoaderMsgErr const& ex) { + LOGS(_log, LOG_LVL_ERROR, funcName << " keyShift failed " << ex.what()); + _freeConnect(); + return; + } catch (boost::system::system_error const& ex) { + LOGS(_log, LOG_LVL_ERROR, funcName << " keyShift write failed " << ex.what()); + _freeConnect(); + return; + } + boost::system::error_code ecode; + _readKind(ecode, 0); // get next message TODO cleaner way to make this call? +} + +// Our left neighbor wants this node to shift key value pairs to it. +void TcpBaseConnection::_handleShiftFromRight(uint32_t bytesInMsg) { + std::string const funcName("TcpBaseConnection::_handleShiftFromRight"); + // Need to figure out the difference between bytes read and bytes in _buf + if (bytesInMsg > _buf.getAvailableWriteLength()) { + LOGS(_log, LOG_LVL_ERROR, funcName << " Buffer would have overflowed bytes=" << bytesInMsg); + _freeConnect(); + return; + } + LOGS(_log, LOG_LVL_INFO, funcName << " bytes=" << bytesInMsg << " buf=" << _buf.dumpStr(false)); + auto self = shared_from_this(); + boost::asio::async_read(_socket, boost::asio::buffer(_buf.getWriteCursor(), bytesInMsg), + boost::asio::transfer_at_least(bytesInMsg), + [self](boost::system::error_code const& ec, size_t bytesTrans) { + self->_handleShiftFromRight1(ec, bytesTrans); + } + ); +} + + +void TcpBaseConnection::_handleShiftFromRight1(boost::system::error_code const& ec, size_t bytesTrans) { + std::string const fName = "_handleShiftFromRight1"; + if (ec) { + LOGS(_log, LOG_LVL_ERROR, fName << " ec=" << ec); + _freeConnect(); + return; + } + // Fix the buffer with the information given. + _buf.advanceWriteCursor(bytesTrans); + LOGS(_log, LOG_LVL_INFO, fName << " bytes=" << bytesTrans << " _buf" << _buf.dumpStr(false)); + try { + // TODO move as much of this to CentralWorker as possible + LOGS(_log, LOG_LVL_INFO, fName << " parsing bytes=" << bytesTrans << " _buf" << _buf.dumpStr(false)); + auto protoKeyShiftReq = StringElement::protoParse(_buf); + if (protoKeyShiftReq == nullptr) { + throw LoaderMsgErr(ERR_LOC, " KeyShiftRequest parse failure "); + } + // Extract keysToShift from the protobuffer + int keyShiftReq = protoKeyShiftReq->keystoshift(); + LOGS(_log, LOG_LVL_INFO, fName << " keystoshift=" << keyShiftReq); + if (keyShiftReq < 1) { + throw LoaderMsgErr(ERR_LOC, " KeyShiftRequest for < 1 key"); + } + // Build and send the KeyList message back (send smallest keys to right node) + StringElement::UPtr keyList = _serverTcpBase->getCentralWorker()->buildKeyList(keyShiftReq); + auto keyListTransmitSz = keyList->transmitSize(); + BufferUdp data(keyListTransmitSz); + if (data.getMaxLength() > TcpBaseConnection::getMaxBufSize()) { + std::string errMsg = fName + " SHIFT_FROM_RIGHT FAILED message too big sz=" + + std::to_string(data.getMaxLength()) + + " max=" + std::to_string(TcpBaseConnection::getMaxBufSize()); + LOGS(_log, LOG_LVL_ERROR, errMsg); + // This will keep getting thrown and never work, but at least it will show up + // in the logs. + // TODO create new exception, catch it and halve the number of keys to shift ??? + throw LoaderMsgErr(ERR_LOC, errMsg); + } + keyList->appendToData(data); + ServerTcpBase::writeData(_socket, data); + + // Wait for the SHIFT_FROM_RIGHT_KEYS_RECEIVED response back. + _buf.reset(); + auto msgElem = _buf.readFromSocket(_socket, fName +" waiting for SHIFT_FROM_RIGHT_KEYS_RECEIVED"); + UInt32Element::Ptr received = std::dynamic_pointer_cast(msgElem); + if (received == nullptr || received->element != LoaderMsg::SHIFT_FROM_RIGHT_RECEIVED) { + LOGS(_log, LOG_LVL_INFO, fName << " did not get SHIFT_FROM_RIGHT_RECEIVED"); + throw LoaderMsgErr(ERR_LOC, " receive failure"); + } + _serverTcpBase->getCentralWorker()->finishShiftFromRight(); + LOGS(_log, LOG_LVL_INFO, fName << " done dumpKeys " << + _serverTcpBase->getCentralWorker()->dumpKeysStr(2)); + } catch (LoaderMsgErr const& ex) { + LOGS(_log, LOG_LVL_ERROR, fName << " keyShift failed " << ex.what()); + _freeConnect(); + return; + } catch (boost::system::system_error const& ex) { + LOGS(_log, LOG_LVL_ERROR, fName << " keyShift write failed " << ex.what()); + _freeConnect(); + return; + } + boost::system::error_code ecode; + _readKind(ecode, 0); // get next message TODO cleaner way to make this call? +} + + +}}} // namespace lsst::qserrv::loader + + + diff --git a/core/modules/loader/ServerTcpBase.h b/core/modules/loader/ServerTcpBase.h new file mode 100644 index 0000000000..30fcbe3b9e --- /dev/null +++ b/core/modules/loader/ServerTcpBase.h @@ -0,0 +1,172 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_SERVERTCPBASE_H +#define LSST_QSERV_LOADER_SERVERTCPBASE_H + +// system headers +#include +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/BufferUdp.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +typedef boost::asio::ip::tcp AsioTcp; + +class CentralWorker; +class ServerTcpBase; + +/// Class to maintain a worker's TCP connection using boost::asio. +/// TODO: Rename as this has become specialized for a worker. +class TcpBaseConnection : public std::enable_shared_from_this { +public: + typedef std::shared_ptr Ptr; + + static Ptr create(boost::asio::io_context& io_context, ServerTcpBase* tcpBase) { + return Ptr(new TcpBaseConnection(io_context, tcpBase)); + } + + ~TcpBaseConnection() { shutdown(); } + + AsioTcp::socket& socket() { + return _socket; + } + + void start(); + void shutdown(); + + /// @return the maximum size of _buf. + static size_t getMaxBufSize() { return BufferUdp::MAX_MSG_SIZE_TCP; } + +private: + TcpBaseConnection(boost::asio::io_context& io_context, ServerTcpBase* tcpBase) : + _socket(io_context), _serverTcpBase(tcpBase) {} + + void _readKind(const boost::system::error_code&, size_t /*bytes_transferred*/); + void _recvKind(const boost::system::error_code&, size_t bytesTrans); + + /// Free the connection and cancel shifts from this server. + void _freeConnect(); + + AsioTcp::socket _socket; + ServerTcpBase* _serverTcpBase; // _serverTcpBase controls this class' lifetime. + BufferUdp _buf{BufferUdp::MAX_MSG_SIZE_TCP}; + + /// Handle the series of messages where another worker is claiming to be our left neighbor. + void _handleImYourLNeighbor(uint32_t bytes); + void _handleImYourLNeighbor1(boost::system::error_code const& ec, size_t bytesTrans); + void _handleImYourLNeighbor2(boost::system::error_code const& ec, size_t bytesTrans); + + /// Handle the series of messages for shifting to our right neighbor. + void _handleShiftToRight(uint32_t bytes); + void _handleShiftToRight1(boost::system::error_code const& ec, size_t bytesTrans); + + /// Handle the series of messages for shifting from our right neighbor. + void _handleShiftFromRight(uint32_t bytesInMsg); + void _handleShiftFromRight1(boost::system::error_code const& ec, size_t bytesTrans); + + /// Handle TCP functionality test messages. + void _handleTest(); + void _handleTest2(boost::system::error_code const& ec, size_t bytesTrans); + void _handleTest2b(boost::system::error_code const& ec, size_t bytesTrans); + void _handleTest2c(boost::system::error_code const& ec, size_t bytesTrans); +}; + + +/// This class maintains the TCP server using boost::asio for a worker. +/// TODO: Rename as this has become specialized for a worker. +class ServerTcpBase { +public: + typedef std::shared_ptr Ptr; + ServerTcpBase(boost::asio::io_context& io_context, int port) : + _io_context(io_context), + _acceptor(io_context, AsioTcp::endpoint(AsioTcp::v4(), port)), _port(port) { + _startAccept(); + } + + ServerTcpBase(boost::asio::io_context& io_context, int port, CentralWorker* cw) : + _io_context(io_context), + _acceptor(io_context, AsioTcp::endpoint(AsioTcp::v4(), port)), _port(port), + _centralWorker(cw){ + _startAccept(); + } + + + ~ServerTcpBase() { + _io_context.stop(); + for (std::thread& t : _threads) { + t.join(); + } + // The server is expected to live until program termination. + // If a connection is doing something, and this is called, what happens? + // Check _connections empty before deleting? + for (auto&& conn:_connections) { + conn->shutdown(); + } + _connections.clear(); + } + + void runThread() { + auto func = [this]() { + _io_context.run(); + }; + _threads.push_back(std::thread(func)); + } + + bool testConnect(); + + void freeConnection(TcpBaseConnection::Ptr const& conn) { + _connections.erase(conn); + } + + uint32_t getOurName(); + + CentralWorker* getCentralWorker() const { return _centralWorker; } + + static bool writeData(AsioTcp::socket& socket, BufferUdp& data); + +private: + void _startAccept(); + + boost::asio::io_context& _io_context; + AsioTcp::acceptor _acceptor; + int _port; + std::vector _threads; + std::set _connections; + + CentralWorker* _centralWorker{nullptr}; // not too thrilled with this +}; + + +}}} // namespace lsst:qserv:loader + +#endif // LSST_QSERV_LOADER_SERVERTCPBASE_H diff --git a/core/modules/loader/ServerUdpBase.cc b/core/modules/loader/ServerUdpBase.cc new file mode 100644 index 0000000000..0eab1316e8 --- /dev/null +++ b/core/modules/loader/ServerUdpBase.cc @@ -0,0 +1,149 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/ServerUdpBase.h" + +// System headers +#include +#include + +// Third-party headers + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.ServerUdpBase"); +} + +namespace lsst { +namespace qserv { +namespace loader { + +std::atomic ServerUdpBase::_msgIdSeq{1}; + + +ServerUdpBase::ServerUdpBase(boost::asio::io_service& io_service, std::string const& host, int port) + : _ioService(io_service), + _socket(io_service, boost::asio::ip::udp::endpoint(boost::asio::ip::udp::v4(), port)), + _hostName(host), _port(port) { + _receivePrepare(); // Prime the server for an incoming message. +} + + +void ServerUdpBase::_receiveCallback(boost::system::error_code const& error, size_t bytesRecvd) { + _data->advanceWriteCursor(bytesRecvd); // _data needs to know the valid portion of the buffer. + if (!error && bytesRecvd > 0) { + LOGS(_log, LOG_LVL_DEBUG, "rCb received(" << bytesRecvd << "):" << + ", code=" << error << ", from endpoint=" << _senderEndpoint); + + _sendData = parseMsg(_data, _senderEndpoint); + if (_sendData != nullptr) { + _sendResponse(); + } else { + _receivePrepare(); + } + } else { + LOGS(_log, LOG_LVL_ERROR, "ServerUdpBase::_receiveCallback got empty message, ignoring"); + _receivePrepare(); + } + +} + + +void ServerUdpBase::_sendResponse() { + _socket.async_send_to(boost::asio::buffer(_sendData->getReadCursor(), _sendData->getBytesLeftToRead()), + _senderEndpoint, + [this](boost::system::error_code const& ec, std::size_t bytes_transferred) { + _sendCallback(ec, bytes_transferred); + } + ); +} + + +void ServerUdpBase::sendBufferTo(std::string const& hostName, int port, BufferUdp& sendBuf) { + using namespace boost::asio; + LOGS(_log, LOG_LVL_DEBUG, "ServerUdpBase::sendBufferTo hostName=" << hostName << " port=" << port); + try { + NetworkAddress addr(hostName, port); + ip::udp::endpoint dest; + auto iter = _resolvMap.find(addr); + if (iter == _resolvMap.end()) { + dest = resolve(hostName, port); // may throw boost::system::system_error + _resolvMap[addr] = dest; + } else { + // TODO if the entry is old, call resolv to freshen. + dest = iter->second; + } + _socket.send_to(buffer(sendBuf.getReadCursor(), sendBuf.getBytesLeftToRead()), dest); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "ServerUdpBase::sendBufferTo boost system_error=" << e.what() << + " host=" << hostName << " port=" << port << " buf=" << sendBuf); + throw; + } +} + + +/// This function, and its derived children, should return quickly. Handing 'data' off to another thread +/// for handling. +BufferUdp::Ptr ServerUdpBase::parseMsg(BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + // echo server, so send back what we got + BufferUdp::Ptr sendData = data; + LOGS(_log, LOG_LVL_INFO, "pM dump(" << sendData->dumpStr() << ") from endpoint " << senderEndpoint); + return sendData; +} + + +void ServerUdpBase::_sendCallback(const boost::system::error_code& error, size_t bytes_sent) { + LOGS(_log, LOG_LVL_INFO, " _sendCallback bytes_sent=" << bytes_sent); + _receivePrepare(); +} + +void ServerUdpBase::_receivePrepare() { + _data = std::make_shared(); // New buffer for next response, the old buffer + // may still be in use elsewhere. + _socket.async_receive_from(boost::asio::buffer(_data->getWriteCursor(), + _data->getAvailableWriteLength()), _senderEndpoint, + [this](boost::system::error_code const& ec, std::size_t bytes_transferred) { + _receiveCallback(ec, bytes_transferred); + } + ); +} + + +boost::asio::ip::udp::endpoint ServerUdpBase::resolve(std::string const& hostName, int port) { + std::lock_guard lg(_resolveMtx); + using namespace boost::asio; + // Resolver returns an iterator. This uses the first item only. + // Failure to resolve anything throws a boost::system::error. + // There's a 5 second timeout, which is extremely painful and frequent. + ip::udp::endpoint dest = + *_resolver.resolve(ip::udp::v4(), hostName, std::to_string(port)).begin(); + return dest; +} + + +}}} // namespace lsst::qserrv::loader diff --git a/core/modules/loader/ServerUdpBase.h b/core/modules/loader/ServerUdpBase.h new file mode 100644 index 0000000000..149fd50930 --- /dev/null +++ b/core/modules/loader/ServerUdpBase.h @@ -0,0 +1,108 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_SERVERUDPBASE_H +#define LSST_QSERV_LOADER_SERVERUDPBASE_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/BufferUdp.h" +#include "loader/NetworkAddress.h" + +namespace lsst { +namespace qserv { +namespace loader { + + +/// This class provides a basic UDP server. Derived classes can identify messages +/// and take appropriate action. +class ServerUdpBase { +public: + using Ptr = std::shared_ptr; + + // This constructor can throw boost::system::system_error + ServerUdpBase(boost::asio::io_service& io_service, std::string const& host, int port); + + ServerUdpBase() = delete; + ServerUdpBase(ServerUdpBase const&) = delete; + ServerUdpBase& operator=(ServerUdpBase const&) = delete; + + virtual ~ServerUdpBase() = default; + + virtual BufferUdp::Ptr parseMsg(BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& endpoint); + + uint64_t getNextMsgId() { return _msgIdSeq++; } + std::string getOurHostName() const { return _hostName; } + int getOurPort() const { return _port; } + uint32_t getErrCount() const { return _errCount; } + + /// This waits for the message to be sent before returning. + /// throws boost::system::system_error on failure. + void sendBufferTo(std::string const& host, int port, BufferUdp& sendBuf); + + /// This throws boost::system::system_error on failure. + boost::asio::ip::udp::endpoint resolve(std::string const& hostName, int port); + +protected: + std::atomic _errCount{0}; + +private: + void _receivePrepare(); ///< Give the io_service our callback for receiving. + void _receiveCallback(const boost::system::error_code& error, size_t bytes_recvd); + void _sendCallback(const boost::system::error_code& error, size_t bytes_sent); + void _sendResponse(); ///< Send the contents of _sendData as a response; + + static std::atomic _msgIdSeq; ///< Counter for unique message ids from this server. + boost::asio::io_service& _ioService; + boost::asio::ip::udp::socket _socket; + boost::asio::ip::udp::endpoint _senderEndpoint; + + BufferUdp::Ptr _data; ///< data buffer for receiving + BufferUdp::Ptr _sendData; ///< data buffer for sending. + std::string _hostName; + int _port; + + /// Map and mutex to store ip destinations + // TODO: add occasional checks to see if addresses changed + std::map _resolvMap; + + /// Items for resolving UDP addresses + /// There appear to be concurrency issues even with + /// separate io_contexts, so re-using existing objects. + boost::asio::io_context _ioContext; + boost::asio::ip::udp::resolver _resolver{_ioContext}; + std::mutex _resolveMtx; ///< protects _ioContext, _resolver +}; + + + +}}} // namespace lsst:qserv:loader + +#endif // LSST_QSERV_LOADER_SERVERUDPBASE_H diff --git a/core/modules/loader/Updateable.h b/core/modules/loader/Updateable.h new file mode 100644 index 0000000000..80f7171a3b --- /dev/null +++ b/core/modules/loader/Updateable.h @@ -0,0 +1,100 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_UPDATEABLE_H +#define LSST_QSERV_LOADER_UPDATEABLE_H + +// system headers +#include +#include + +// Qserv headers + + + +namespace lsst { +namespace qserv { +namespace loader { + +/// The purpose of the classes here is to help detect when a desired state has been achieved. +/// These objects do not know their desired state. The entities that do know the desired state +/// are found on the DoList objects. An alternative approach would be to have these objects +/// store the target state, and send messages until they actual value match the target value. + + +/// A class to allow an object to be notified when the value of an Updatable +/// object is updated. +template +class UpdateNotify { +public: + UpdateNotify() = default; + virtual ~UpdateNotify() = default; + + virtual void updateNotify(T& oldVal, T& newVal) = 0; +}; + + +/// A class that keeps a list of objects interested in the value of an object, +/// and contacts them when its value is updated. (Updated means the value was set, maybe not changed.) +template +class Updatable { +public: + Updatable() = default; + explicit Updatable(T const& val) : _value(val) {} + Updatable(Updatable const&) = delete; + Updatable& operator=(Updatable const&) = delete; + + void update(T const& val) { + T oldVal = _value; + _value = val; + _notifyAll(oldVal, _value); + } + + T get() { return _value; } + + void registerNotify(std::shared_ptr> const& un) { + _notifyList.push_back(un); + } +private: + void _notifyAll(T& oldVal, T& newVal) { + auto iter = _notifyList.begin(); + while (iter != _notifyList.end()) { + auto unPtr = (*iter).lock(); + if (unPtr == nullptr) { + iter = _notifyList.erase(iter); + } else { + unPtr->updateNotify(oldVal, newVal); + ++iter; + } + } + } + + std::list>> _notifyList; + + T _value; +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_UPDATEABLE_H diff --git a/core/modules/loader/Util.cc b/core/modules/loader/Util.cc new file mode 100644 index 0000000000..d62b590808 --- /dev/null +++ b/core/modules/loader/Util.cc @@ -0,0 +1,141 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "loader/WorkerConfig.h" + +// System headers +#include +#include +#include + +// Third party headers +#include "boost/asio.hpp" + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "util/ConfigStore.h" +#include "util/ConfigStoreError.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.Util"); +} + +namespace lsst { +namespace qserv { +namespace loader { + +std::vector split(std::string const& in, std::function func) { + std::vector result; + // special case of empty string + if (in.empty()) { + result.push_back(""); + return result; + } + + auto pos = in.begin(); + while (pos != in.end()) { + std::string str(""); + while (pos != in.end() && !func(*pos)) { + str += *pos; + ++pos; + } + result.push_back(str); + if (pos != in.end()) { + ++pos; + // Another special case. The last character was a match + // for func so append an empty string. Basically ensure that + // ".com" is distinguishable from ".com." in the input. + if (pos == in.end()) { + result.push_back(""); + } + } + } + return result; +} + + +/// TODO Test to be put in unit tests +bool splitTest() { + auto out = split("www.github.com", [](char c) {return c == '.';}); + auto test = (out[0] == "www" && out[1] == "github" && out[2] == "com"); + if (!test) return false; + + out = split("", [](char c) {return c == '.';}); + test = (out[0] == "" && out.size() == 1); + if (!test) return false; + + out = split(".com.", [](char c) {return c == '.';}); + test = (out[0] == "" && out[1] == "com" && out[2] == ""); + if (!test) return false; + return true; +} + + +std::string getOurHostName(unsigned int domains=0) { + std::string out(""); + std::string const ourHost = boost::asio::ip::host_name(); + std::string ourHostIp; + LOGS(_log, LOG_LVL_INFO, "ourHost=" << ourHost); + boost::asio::io_service ioService; + boost::asio::io_context ioContext; + + + char *IPbuffer; + struct hostent *host_entry; + + host_entry = gethostbyname(ourHost.c_str()); + + // convert to ASCII + IPbuffer = inet_ntoa(*((struct in_addr*)host_entry->h_addr_list[0])); + LOGS(_log, LOG_LVL_DEBUG, "host_entry=" << host_entry << " IP=" << IPbuffer); + ourHostIp = IPbuffer; + + hostent *he; + in_addr ipv4addr; + + inet_pton(AF_INET, ourHostIp.c_str(), &ipv4addr); + he = gethostbyaddr(&ipv4addr, sizeof ipv4addr, AF_INET); + if (he == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "getOurHostName() no hostname found!"); + return out; + } else { + LOGS(_log, LOG_LVL_INFO, " host name=" << he->h_name); // full name + if (domains == 0) { + out = he->h_name; + return out; + } else { + auto splitName = split(he->h_name, [](char c) {return c == '.';}); + out = splitName[0]; + for(unsigned int j=1; j < domains && j < splitName.size(); ++j) { + out += "." + splitName.at(j); + } + } + } + return out; +} + +}}} // namespace lsst::qserv::loader + diff --git a/core/modules/loader/Util.h b/core/modules/loader/Util.h new file mode 100644 index 0000000000..5ea8d3abfd --- /dev/null +++ b/core/modules/loader/Util.h @@ -0,0 +1,67 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2019 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_UTIL_H +#define LSST_QSERV_LOADER_UTIL_H + +// system headers +#include +#include +#include + +// Qserv headers + + + +/// Header file for misc things that should probably be added to qserv/util when +/// this code is ready to be merged to master. + + +namespace lsst { +namespace qserv { +namespace loader { + +/// @return - Returns the the hostname for this system, possibly including the entire domain. +/// ** Non-reentrant - This function uses inet_ntoa, which is non-reentrant. +/// @param domains - This indicates how much of the hostname and domain to return in the string. +/// ex: "iworker-sts-0.iworker-svc.default.svc.cluster.local" +/// domains=0 returns the entire hostname and domain. +/// "iworker-sts-0.iworker-svc.default.svc.cluster.local" +/// domains=1 returns "iworker-sts-0" +/// domains=2 returns "iworker-sts-0.iworker-svc" (kubernetes needs at least this much) +/// domains=3 returns "iworker-sts-0.iworker-svc.default" +/// ... +/// domains=11 returns "iworker-sts-0.iworker-svc.default.svc.cluster.local" +std::string getOurHostName(unsigned int domains=0); + +/// Split a string into a vector of strings based on function func. +/// @return a vector of strings, which will never contain less than 1 string. +/// @param func is expected to be a lambda similar to [](char c) {return c == '.';} +/// which would split the string on '.'/ +/// ex: auto out = split("www.github.com", [](char c) {return c == '.';}); +/// out contains "www", "github", "com" +std::vector split(std::string const& in, std::function func); + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_UTIL_H diff --git a/core/modules/loader/WWorkerList.cc b/core/modules/loader/WWorkerList.cc new file mode 100644 index 0000000000..9641325672 --- /dev/null +++ b/core/modules/loader/WWorkerList.cc @@ -0,0 +1,382 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/WWorkerList.h" + +// System headers +#include + +// Third-party headers +#include "boost/asio.hpp" + +// qserv headers +#include "loader/CentralFollower.h" +#include "loader/LoaderMsg.h" +#include "proto/ProtoImporter.h" +#include "proto/loader.pb.h" + + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.WWorkerList"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +util::CommandTracked::Ptr WWorkerList::createCommand() { + return createCommandWorker(_central); +} + + +util::CommandTracked::Ptr WWorkerList::createCommandWorker(CentralFollower* centralF) { + // On the worker and clients, need to occasionally ask for a list of workers + // from the master and make sure each of those workers is on the doList + class MastWorkerListReqCmd : public util::CommandTracked { + public: + MastWorkerListReqCmd(CentralFollower* centralF, std::map nameMap) + : _centralF(centralF), _wIdMap(nameMap) {} + + void action(util::CmdData *data) override { + /// Request a list of all workers. + // TODO make a function for this, it's always going to be the same. + proto::LdrNetAddress protoOurAddress; + protoOurAddress.set_ip(_centralF->getHostName()); + protoOurAddress.set_udpport(_centralF->getUdpPort()); + protoOurAddress.set_tcpport(_centralF->getTcpPort()); + StringElement eOurAddress(protoOurAddress.SerializeAsString()); + + LoaderMsg workerInfoReqMsg(LoaderMsg::MAST_WORKER_LIST_REQ, _centralF->getNextMsgId(), + _centralF->getHostName(), _centralF->getUdpPort()); + BufferUdp sendBuf(1000); + workerInfoReqMsg.appendToData(sendBuf); + eOurAddress.appendToData(sendBuf); + + // Send the request to master. + auto masterHost = _centralF->getMasterHostName(); + auto masterPort = _centralF->getMasterPort(); + LOGS(_log, LOG_LVL_DEBUG, "MastWorkerListReqCmd::action host=" << masterHost << + " port=" << masterPort); + try { + _centralF->sendBufferTo(masterHost, masterPort, sendBuf); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "MastWorkerListReqCmd::action boost system_error=" << e.what()); + } + + /// Go through the existing list and add any that have not been add to the doList + for (auto const& item : _wIdMap) { + item.second->addDoListItems(_centralF); + } + } + + private: + CentralFollower* _centralF; + std::map _wIdMap; + }; + + LOGS(_log, LOG_LVL_DEBUG, "WorkerList::createCommandWorker"); + return std::make_shared(centralF, _wIdMap); +} + + +bool WWorkerList::workerListReceive(BufferUdp::Ptr const& data) { + std::string const funcName("WWorkerList::workerListReceive"); + LOGS(_log, LOG_LVL_INFO, funcName << " data=" << data->dumpStr()); + // Open the data protobuffer and add it to our list. + StringElement::Ptr sData = std::dynamic_pointer_cast(MsgElement::retrieve(*data, "WWorkerList::workerListReceive")); + if (sData == nullptr) { + LOGS(_log, LOG_LVL_WARN, funcName << " Failed to parse list"); + return false; + } + auto protoList = sData->protoParse(); + if (protoList == nullptr) { + LOGS(_log, LOG_LVL_WARN, funcName << " Failed to parse list"); + return false; + } + + // TODO put this in separate thread, the part above can probably be put in a separate function in _central + int sizeChange = 0; + std::string strNames; + { + std::lock_guard lock(_mapMtx); + size_t initialSize = _wIdMap.size(); + // There may be more workers than will fit in a message. + _totalNumberOfWorkers = protoList->workercount(); + int sz = protoList->worker_size(); + + for (int j=0; j < sz; ++j) { + proto::WorkerListItem const& protoItem = protoList->worker(j); + uint32_t wId = protoItem.wid(); + // Most of the time, the worker will already be in the map. + auto item = _wIdMap[wId]; + if (item == nullptr) { + item = WWorkerListItem::create(wId, _central); + _wIdMap[wId] = item; + strNames += std::to_string(wId) + ","; + item->addDoListItems(_central); + } + // TODO: Should this call updateEntry() to fill in the information for the worker? + } + sizeChange = _wIdMap.size() - initialSize; + if (sizeChange > 0) { + _flagListChange(); + } + } + infoReceived(); // This causes it to avoid asking for this info for a while. + LOGS(_log, LOG_LVL_INFO, funcName << " added " << sizeChange << " names=" << strNames); + return true; +} + +// must lock _mapMtx before calling this function +void WWorkerList::_flagListChange() { + _wListChanged = true; +} + + +bool WWorkerList::equal(WWorkerList& other) const { + std::string const funcName("WWorkerList::equal"); + // Have to lock it this way as 'other' could call it's own equal function which + // would try to lock in reverse order. + std::lock(_mapMtx, other._mapMtx); + std::lock_guard lk1(_mapMtx, std::adopt_lock); + std::lock_guard lk2(other._mapMtx, std::adopt_lock); + + if (_wIdMap.size() != other._wIdMap.size()) { + LOGS(_log, LOG_LVL_INFO, funcName << " map sizes do not match this=" << _wIdMap.size() << + " other=" << other._wIdMap.size()); + return false; + } + auto thisIter = _wIdMap.begin(); + auto otherIter = other._wIdMap.begin(); + for (;thisIter != _wIdMap.end() && otherIter != other._wIdMap.end(); + ++thisIter, ++otherIter) { + if (thisIter->first != otherIter->first) { + LOGS(_log, LOG_LVL_INFO, funcName << " map first not equal"); + return false; + } + if (not thisIter->second->equal(*(otherIter->second))) { + LOGS(_log, LOG_LVL_INFO, funcName << " map second not equal"); + return false; + } + } + return true; +} + + +std::string WWorkerList::dump() const { + std::stringstream os; + os << "WWorkerList name:\n"; + { + std::lock_guard lck(_mapMtx); + for (auto elem:_wIdMap) { + os << " " << *elem.second << "\n"; + } + + os << "WWorkerList ip:\n"; + for (auto elem:_ipMap) { + os << " " << *elem.second << "\n"; + } + } + return os.str(); +} + + +/// There must be a name. However, ip, port, and range may be invalid. +// TODO believe our neighbors range over the master +void WWorkerList::updateEntry(uint32_t wId, + std::string const& ip, int portUdp, int portTcp, + KeyRange& strRange) { + std::unique_lock lk(_mapMtx); + auto iter = _wIdMap.find(wId); + if (iter == _wIdMap.end()) { + // This should rarely happen, make an entry for it + auto newItem = WWorkerListItem::create(wId, _central); + auto res = _wIdMap.insert(std::make_pair(wId, newItem)); + iter = res.first; + LOGS(_log, LOG_LVL_INFO, "updateEntry created entry for name=" << wId << + " res=" << res.second); + } + WWorkerListItem::Ptr const& item = iter->second; + if (ip != "") { + if (item->getUdpAddress().ip == "" ) { + NetworkAddress nAddr(ip, portUdp); + item->setUdpAddress(nAddr); + item->setTcpAddress(NetworkAddress(ip, portTcp)); + auto res = _ipMap.insert(std::make_pair(nAddr, item)); + LOGS(_log, LOG_LVL_INFO, "updateEntry set wId=" << wId << " Udp=" << nAddr << + " res=" << res.second); + } + } + + LOGS(_log, LOG_LVL_INFO, "wId=" << wId << " updateEntry strRange=" << strRange); + if (strRange.getValid()) { + // Does the new range match the old range? + auto oldRange = item->setRangeString(strRange); + LOGS(_log, LOG_LVL_INFO, "updateEntry set name=" << wId << " range=" << strRange); + if (not oldRange.equal(strRange)) { + // Since the value changed, it needs to be removed and reinserted. + // No invalid ranges should be in the map. + if (oldRange.getValid()) { + // The old value was valid, so it is likely in the map. + auto rangeIter = _rangeMap.find(oldRange); + if (rangeIter != _rangeMap.end()) { + _rangeMap.erase(rangeIter); + } + } + if (strRange.getValid()) { + _rangeMap[strRange] = item; + } + } + } +} + + +WWorkerListItem::Ptr WWorkerList::findWorkerForKey(CompositeKey const& key) { + std::string const funcName("WWorkerList::findWorkerForKey"); + std::unique_lock lk(_mapMtx); + // TODO Really could use a custom container for _rangeMap to speed this up. + for (auto const& elem : _rangeMap) { + if (elem.second->containsKey(key)) { + LOGS(_log, LOG_LVL_INFO, funcName << " key=" << elem.first << " -> " << *(elem.second)); + return elem.second; + } + } + LOGS(_log, LOG_LVL_WARN, funcName << " did not find worker for key=" << key); + return nullptr; +} + + + +void WWorkerListItem::addDoListItems(Central *central) { + std::lock_guard lck(_mtx); + if (_workerUpdateNeedsMasterData == nullptr) { + _workerUpdateNeedsMasterData.reset(new WorkerNeedsMasterData(getThis(), _central)); + central->addDoListItem(_workerUpdateNeedsMasterData); + } +} + + +bool WWorkerListItem::equal(WWorkerListItem& other) const { + std::string const funcName("WWorkerListItem::equal"); + + if (_wId != other._wId) { + LOGS(_log, LOG_LVL_INFO, funcName << " item name not equal t=" << _wId << " o=" << other._wId); + return false; + } + auto thisUdp = getUdpAddress(); + auto otherUdp = other.getUdpAddress(); + if (thisUdp != otherUdp) { + LOGS(_log, LOG_LVL_INFO, funcName << " item addr != name=" << _wId << + " t=" << thisUdp << " o=" << otherUdp); + return false; + } + + std::lock(_mtx, other._mtx); + std::lock_guard lck1(_mtx, std::adopt_lock); + std::lock_guard lck2(other._mtx, std::adopt_lock); + if (not _range.equal(other._range)) { + LOGS(_log, LOG_LVL_INFO, funcName << " item range != name=" << _wId << + " t=" << _range << " o=" << other._range); + } + return true; +} + + +util::CommandTracked::Ptr WWorkerListItem::WorkerNeedsMasterData::createCommand() { + auto item = wWorkerListItem.lock(); + if (item == nullptr) { + // TODO: should mark set the removal flag for this doListItem + return nullptr; + } + return item->createCommandWorkerInfoReq(central); +} + +util::CommandTracked::Ptr WWorkerListItem::createCommandWorkerInfoReq(CentralFollower* centralF) { + // Create a command to put on the pool to + // - ask the master about a server with our _name + + class WorkerReqCmd : public util::CommandTracked { + public: + WorkerReqCmd(CentralFollower* centralF, uint32_t name) : _centralF(centralF), _wId(name) {} + + void action(util::CmdData *data) override { + /// Request all information the master has for one worker. + LOGS(_log, LOG_LVL_INFO, "WWorkerListItem::createCommand::WorkerReqCmd::action " << + "ourName=" << _centralF->getOurLogId() << " req name=" << _wId); + + // TODO make a function for this, it's always going to be the same. + proto::LdrNetAddress protoOurAddress; + protoOurAddress.set_ip(_centralF->getHostName()); + protoOurAddress.set_udpport(_centralF->getUdpPort()); + protoOurAddress.set_tcpport(_centralF->getTcpPort()); + StringElement eOurAddress(protoOurAddress.SerializeAsString()); + + proto::WorkerListItem protoItem; + protoItem.set_wid(_wId); + StringElement eItem(protoItem.SerializeAsString()); + + LoaderMsg workerInfoReqMsg(LoaderMsg::MAST_WORKER_INFO_REQ, _centralF->getNextMsgId(), + _centralF->getHostName(), _centralF->getUdpPort()); + BufferUdp sendBuf(1000); + workerInfoReqMsg.appendToData(sendBuf); + eOurAddress.appendToData(sendBuf); + eItem.appendToData(sendBuf); + + // Send the request to master. + auto masterHost = _centralF->getMasterHostName(); + auto masterPort = _centralF->getMasterPort(); + try { + _centralF->sendBufferTo(masterHost, masterPort, sendBuf); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "WorkerReqCmd::action boost system_error=" << e.what() << + " wId=" << _wId); + } + } + + private: + CentralFollower* _centralF; + uint32_t _wId; ///< worker id + }; + + LOGS(_log, LOG_LVL_INFO, "WWorkerListItem::createCommandWorker this=" << + centralF->getOurLogId() << " name=" << _wId); + return std::make_shared(centralF, _wId); +} + + +bool WWorkerListItem::containsKey(CompositeKey const& key) const { + std::lock_guard lck(_mtx); + return _range.isInRange(key); +} + + +}}} // namespace lsst::qserv::loader + + + diff --git a/core/modules/loader/WWorkerList.h b/core/modules/loader/WWorkerList.h new file mode 100644 index 0000000000..bcd9b2bcf9 --- /dev/null +++ b/core/modules/loader/WWorkerList.h @@ -0,0 +1,151 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_WWORKERLIST_H +#define LSST_QSERV_LOADER_WWORKERLIST_H + +// system headers +#include +#include +#include +#include + +// Qserv headers +#include "loader/BufferUdp.h" +#include "loader/DoList.h" +#include "loader/WorkerListItemBase.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +class CentralFollower; +class LoaderMsg; + + +/// Standard information for a single worker, IP address, key range, timeouts. +class WWorkerListItem : public WorkerListItemBase { +public: + using Ptr = std::shared_ptr; + using WPtr = std::weak_ptr; + + static WWorkerListItem::Ptr create(uint32_t wId, CentralFollower *central) { + return WWorkerListItem::Ptr(new WWorkerListItem(wId, central)); + } + + WWorkerListItem() = delete; + WWorkerListItem(WWorkerListItem const&) = delete; + WWorkerListItem& operator=(WWorkerListItem const&) = delete; + + virtual ~WWorkerListItem() = default; + + /// @return a properly typed shared pointer to this object. + Ptr getThis() { + Ptr ptr = std::static_pointer_cast(shared_from_this()); + return ptr; + } + + void addDoListItems(Central *central) override; + + util::CommandTracked::Ptr createCommandWorkerInfoReq(CentralFollower* centralF); + + /// @return true if this item is equal to other. + bool equal(WWorkerListItem &other) const; + + /// @return true if 'key' can be found in this item's map. + bool containsKey(CompositeKey const& key) const; + +private: + WWorkerListItem(uint32_t wId, CentralFollower* central) : WorkerListItemBase(wId), _central(central) {} + + CentralFollower* _central; + + struct WorkerNeedsMasterData : public DoListItem { + WorkerNeedsMasterData(WWorkerListItem::Ptr const& wWorkerListItem_, CentralFollower* central_) : + wWorkerListItem(wWorkerListItem_), central(central_) {} + WWorkerListItem::WPtr wWorkerListItem; + CentralFollower* central; + util::CommandTracked::Ptr createCommand() override; + }; + DoListItem::Ptr _workerUpdateNeedsMasterData; +}; + + + + +class WWorkerList : public DoListItem { +public: + using Ptr = std::shared_ptr; + + WWorkerList(CentralFollower* central) : _central(central) {} + WWorkerList() = delete; + WWorkerList(WWorkerList const&) = delete; + WWorkerList& operator=(WWorkerList const&) = delete; + + virtual ~WWorkerList() = default; + + /// Receive a list of workers from the master. + bool workerListReceive(BufferUdp::Ptr const& data); + + bool equal(WWorkerList& other) const; + + util::CommandTracked::Ptr createCommand() override; + util::CommandTracked::Ptr createCommandWorker(CentralFollower* centralF); + + //////////////////////////////////////////// + /// Nearly the same on Worker and Master + size_t getIdMapSize() { + std::lock_guard lck(_mapMtx); + return _wIdMap.size(); + } + WWorkerListItem::Ptr getWorkerWithId(uint32_t id) { + std::lock_guard lck(_mapMtx); + auto iter = _wIdMap.find(id); + if (iter == _wIdMap.end()) { return nullptr; } + return iter->second; + } + + void updateEntry(uint32_t wId, + std::string const& ipUdp, int portUdp, int portTcp, + KeyRange& strRange); + WWorkerListItem::Ptr findWorkerForKey(CompositeKey const& key); + + std::string dump() const; + +protected: + void _flagListChange(); + + CentralFollower* _central; + std::map _wIdMap; ///< worker id map + std::map _ipMap; + std::map _rangeMap; + bool _wListChanged{false}; ///< true if the list has changed + uint32_t _totalNumberOfWorkers{0}; ///< total number of workers according to the master. + mutable std::mutex _mapMtx; ///< protects _wIdMap, _ipMap, _rangeMap, _wListChanged +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_WWORKERLIST_H diff --git a/core/modules/loader/WorkerConfig.cc b/core/modules/loader/WorkerConfig.cc new file mode 100644 index 0000000000..fe275ee6de --- /dev/null +++ b/core/modules/loader/WorkerConfig.cc @@ -0,0 +1,62 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "loader/WorkerConfig.h" + +// System headers + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "util/ConfigStore.h" +#include "util/ConfigStoreError.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.Config"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +WorkerConfig::WorkerConfig(util::ConfigStore const& configStore) { + try { + setFromConfig(configStore); + } catch (util::ConfigStoreError const& e) { + throw ConfigErr(ERR_LOC, std::string("WorkerConfig ") + e.what()); + } +} + + +std::ostream& WorkerConfig::dump(std::ostream &os) const { + os << "(WorkerConfig(" << header << ") "; + ConfigBase::dump(os); + os << ")"; + return os; +} + +}}} // namespace lsst::qserv::css + diff --git a/core/modules/loader/WorkerConfig.h b/core/modules/loader/WorkerConfig.h new file mode 100644 index 0000000000..ca37aa521b --- /dev/null +++ b/core/modules/loader/WorkerConfig.h @@ -0,0 +1,101 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_WORKERCONFIG_H +#define LSST_QSERV_LOADER_WORKERCONFIG_H + +// Qserv headers +#include "loader/ConfigBase.h" + +namespace lsst { +namespace qserv { +namespace loader { + +/// A class for reading the configuration file for the worker which consists of +/// a collection of key-value pairs and provide access functions for those values. +/// +class WorkerConfig : public ConfigBase { +public: + explicit WorkerConfig(std::string const& configFileName) + : WorkerConfig(util::ConfigStore(configFileName)) {} + + WorkerConfig() = delete; + WorkerConfig(WorkerConfig const&) = delete; + WorkerConfig& operator=(WorkerConfig const&) = delete; + + std::string getMasterHost() const { return _masterHost->getValue(); } + int getMasterPortUdp() const { return _masterPortUdp->getInt(); } + int getWPortUdp() const { return _wPortUdp->getInt(); } + int getWPortTcp() const { return _wPortTcp->getInt(); } + int getThreadPoolSize() const { return _threadPoolSize->getInt(); } + int getRecentAddLimit() const { return _recentAddLimit->getInt(); } + double getThresholdNeighborShift() const { return _thresholdNeighborShift->getDouble(); } + int getMaxKeysToShift() const { return _maxKeysToShift->getInt(); } + int getLoopSleepTime() const { return _loopSleepTime->getInt(); } + int getIOThreads() const { return _iOThreads->getInt(); } + + std::ostream& dump(std::ostream &os) const override; + + std::string const header{"worker"}; +private: + WorkerConfig(util::ConfigStore const& configStore); + + /// Master host name + ConfigElement::Ptr _masterHost{ + ConfigElement::create(cfgList, header, "masterHost", ConfigElement::STRING, true)}; + /// Master UDP port + ConfigElement::Ptr _masterPortUdp{ + ConfigElement::create(cfgList, header, "masterPortUdp", ConfigElement::INT, true)}; + /// UDP port for this worker. Reasonable value - 9876 + ConfigElement::Ptr _wPortUdp{ + ConfigElement::create(cfgList, header, "wPortUdp", ConfigElement::INT, true)}; + /// TCP port for this worker. Reasonable value - 9877 + ConfigElement::Ptr _wPortTcp{ + ConfigElement::create(cfgList, header, "wPortTcp", ConfigElement::INT, true)}; + /// Size of the thread pool. Reasonable value - 10 + ConfigElement::Ptr _threadPoolSize{ + ConfigElement::create(cfgList, header, "threadPoolSize", ConfigElement::INT, true)}; + /// Time limit for for a key added to the system to be considered recent seconds - 60000 = 1 minute + ConfigElement::Ptr _recentAddLimit{ + ConfigElement::create(cfgList, header, "recentAddLimit", ConfigElement::INT, true)}; + /// If a worker has this many times the number of keys as the neighbor, keys should be shifted to + /// the neighbor. "1.10" indicates keys should be shifted if one worker has 10% or more keys + /// than the other. + ConfigElement::Ptr _thresholdNeighborShift{ + ConfigElement::create(cfgList, header, "thresholdNeighborShift", ConfigElement::FLOAT, true)}; + /// The maximum number of keys to shift in a single iteration. During a shift iteration, + /// there are no new key inserts or lookups. 10000 may be a reasonable value. + ConfigElement::Ptr _maxKeysToShift{ + ConfigElement::create(cfgList, header, "maxKeysToShift", ConfigElement::INT, true)}; + /// Time spent sleeping between checking elements in the DoList in micro seconds. 100000 + ConfigElement::Ptr _loopSleepTime{ + ConfigElement::create(cfgList, header, "loopSleepTime", ConfigElement::INT, false, "100000")}; + /// Number of IO threads the server should run. + ConfigElement::Ptr _iOThreads{ + ConfigElement::create(cfgList, header, "iOThreads", ConfigElement::INT, false, "5")}; +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_WORKERCONFIG_H diff --git a/core/modules/loader/WorkerListItemBase.cc b/core/modules/loader/WorkerListItemBase.cc new file mode 100644 index 0000000000..3f85d87433 --- /dev/null +++ b/core/modules/loader/WorkerListItemBase.cc @@ -0,0 +1,72 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "loader/WorkerListItemBase.h" + +// System headers +#include + +// third party headers +#include "boost/asio.hpp" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.WorkerListBase"); +} + +namespace lsst { +namespace qserv { +namespace loader { + + +KeyRange WorkerListItemBase::setRangeString(KeyRange const& strRange) { + std::lock_guard lck(_mtx); + auto oldRange = _range; + _range = strRange; + LOGS(_log, LOG_LVL_INFO, "setRangeStr name=" << _wId << " range=" << _range << + " oldRange=" << oldRange); + return oldRange; +} + + +std::ostream& WorkerListItemBase::dump(std::ostream& os) const { + os << "wId=" << _wId; + os << " UDP=" << getUdpAddress(); + os << " TCP=" << getTcpAddress(); + std::lock_guard lck(_mtx); + os << " range("<< _range << ")"; + return os; +} + + +std::ostream& operator<<(std::ostream& os, WorkerListItemBase const& item) { + return item.dump(os); +} + + +}}} // namespace lsst::qserv::loader + + diff --git a/core/modules/loader/WorkerListItemBase.h b/core/modules/loader/WorkerListItemBase.h new file mode 100644 index 0000000000..ac988c292b --- /dev/null +++ b/core/modules/loader/WorkerListItemBase.h @@ -0,0 +1,116 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_WORKERLISTBASE_H +#define LSST_QSERV_LOADER_WORKERLISTBASE_H + +// system headers +#include +#include +#include +#include + + +// Qserv headers +#include "loader/KeyRange.h" +#include "loader/NetworkAddress.h" +#include "loader/Updateable.h" + + +namespace lsst { +namespace qserv { +namespace loader { + +class Central; + +class WorkerListItemBase : public std::enable_shared_from_this { +public: + using BasePtr = std::shared_ptr; + using BaseWPtr = std::weak_ptr; + + WorkerListItemBase() = delete; + WorkerListItemBase(WorkerListItemBase const&) = delete; + WorkerListItemBase& operator=(WorkerListItemBase const&) = delete; + + virtual ~WorkerListItemBase() = default; + + /// @return return the previous range value. + KeyRange setRangeString(KeyRange const& strRange); + + /// @return the current range. + KeyRange getRangeString() const { + std::lock_guard lck(_mtx); + return _range; + } + + NetworkAddress getUdpAddress() const { return _udpAddress.getAddress(); } + + NetworkAddress getTcpAddress() const { return _tcpAddress.getAddress(); } + + /// Set the UDP address to 'addr'. This can only be done once, + /// so 'addr' needs to be correct. + /// @return true if the address was set to 'addr' + bool setUdpAddress(NetworkAddress const& addr) { return _udpAddress.setAddress(addr); } + + /// Set the TCP address to 'addr'. This can only be done once, + /// so 'addr' needs to be correct. + /// @return true if the address was set to 'addr' + bool setTcpAddress(NetworkAddress const& addr) { return _tcpAddress.setAddress(addr); } + + uint32_t getId() const { return _wId; } + + virtual void addDoListItems(Central *central) = 0; + + virtual std::ostream& dump(std::ostream& os) const; + + std::string dump() const { + std::stringstream os; + dump(os); + return os.str(); + } + + friend std::ostream& operator<<(std::ostream& os, WorkerListItemBase const& item); +protected: + WorkerListItemBase(uint32_t wId) : _wId(wId) {} + WorkerListItemBase(uint32_t wId, + NetworkAddress const& udpAddress, + NetworkAddress const& tcpAddress) + : _wId(wId) { + setUdpAddress(udpAddress); + setTcpAddress(tcpAddress); + } + + uint32_t const _wId; ///< worker id, immutable. + KeyRange _range; ///< min and max range for this worker. + mutable std::mutex _mtx; ///< protects _range. Child classes may use it to protect additional members. + +private: + /// _udpAddress and _tcpAddress only have their values set to a valid value once and then + /// they remain constant. + NetworkAddressLatch _udpAddress; ///< empty string indicates address invalid. + NetworkAddressLatch _tcpAddress; ///< empty string indicates address invalid. +}; + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_WORKERLISTBASE_H diff --git a/core/modules/loader/WorkerServer.cc b/core/modules/loader/WorkerServer.cc new file mode 100644 index 0000000000..92923fbd67 --- /dev/null +++ b/core/modules/loader/WorkerServer.cc @@ -0,0 +1,181 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// Class header +#include "loader/WorkerServer.h" + +// System headers +#include + +// Third-party headers + +// Qserv headers +#include "loader/CentralWorker.h" +#include "loader/LoaderMsg.h" +#include "proto/loader.pb.h" +#include "proto/ProtoImporter.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.WorkerServer"); +} + +namespace lsst { +namespace qserv { +namespace loader { + +BufferUdp::Ptr WorkerServer::parseMsg(BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + std::string const funcName("WorkerServer::parseMsg"); + BufferUdp::Ptr sendData; /// nullptr for empty response. + LoaderMsg inMsg; + inMsg.parseFromData(*data); + LOGS(_log, LOG_LVL_INFO, funcName << " sender " << senderEndpoint << + " kind=" << inMsg.msgKind->element << + " data length=" << data->getAvailableWriteLength()); + switch (inMsg.msgKind->element) { + case LoaderMsg::MAST_INFO: + // TODO handle a message with information about the master + break; + case LoaderMsg::MAST_WORKER_LIST: + _centralWorker->getWorkerList()->workerListReceive(data); + break; + case LoaderMsg::MSG_RECEIVED: + _msgRecieved(inMsg, data, senderEndpoint); + sendData.reset(); // never send a response back for one of these, infinite loop. + break; + case LoaderMsg::MAST_WORKER_INFO: + _centralWorker->workerInfoReceive(data); + break; + case LoaderMsg::KEY_INSERT_REQ: + _centralWorker->workerKeyInsertReq(inMsg, data); + break; + case LoaderMsg::KEY_LOOKUP_REQ: + _centralWorker->workerKeyInfoReq(inMsg, data); + break; + case LoaderMsg::WORKER_KEYS_INFO_REQ: + _centralWorker->workerWorkerKeysInfoReq(inMsg, data); + break; + case LoaderMsg::WORKER_RIGHT_NEIGHBOR: + _centralWorker->workerWorkerSetRightNeighbor(inMsg, data); + break; + case LoaderMsg::WORKER_LEFT_NEIGHBOR: + _centralWorker->workerWorkerSetLeftNeighbor(inMsg, data); + break; + + // Following not expected by worker + case LoaderMsg::KEY_LOOKUP: + case LoaderMsg::MAST_INFO_REQ: + case LoaderMsg::MAST_WORKER_LIST_REQ: + case LoaderMsg::MAST_WORKER_INFO_REQ: + case LoaderMsg::MAST_WORKER_ADD_REQ: + // Response for known but unexpected message. + sendData = prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, "unexpected Msg Kind"); + // Fallthrough + default: + sendData = prepareReplyMsg(senderEndpoint, inMsg, LoaderMsg::STATUS_PARSE_ERR, "unknownMsgKind"); + } + + return sendData; +} + + +BufferUdp::Ptr WorkerServer::prepareReplyMsg(boost::asio::ip::udp::endpoint const& senderEndpoint, + LoaderMsg const& inMsg, + int status, std::string const& msgTxt) { + + if (status != LoaderMsg::STATUS_SUCCESS) { + LOGS(_log,LOG_LVL_WARN, "Error response Original from " << senderEndpoint << + " msg=" << msgTxt << " inMsg=" << inMsg.getStringVal()); + } + + LoaderMsg outMsg(LoaderMsg::MSG_RECEIVED, inMsg.msgId->element, getOurHostName(), getOurPort()); + + // create the proto buffer + proto::LdrMsgReceived protoBuf; + protoBuf.set_originalid(inMsg.msgId->element); + protoBuf.set_originalkind(inMsg.msgKind->element); + protoBuf.set_status(LoaderMsg::STATUS_PARSE_ERR); + protoBuf.set_errmsg(msgTxt); + protoBuf.set_dataentries(0); + + StringElement respBuf; + protoBuf.SerializeToString(&(respBuf.element)); + + auto sendData = std::make_shared(1000); // this message should be fairly small. + outMsg.appendToData(*sendData); + respBuf.appendToData(*sendData); + return sendData; +} + + +void WorkerServer::_msgRecieved(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint) { + bool success = true; + // This is only really expected for parsing errors. Most responses to + // requests come in as normal messages. + StringElement::Ptr seData = std::dynamic_pointer_cast(MsgElement::retrieve(*data, "WorkerServer::_msgRecieved")); + if (seData == nullptr) { + success = false; + } + + std::unique_ptr protoBuf; + if (success) { + protoBuf = seData->protoParse(); + if (protoBuf == nullptr) { success = false; } + } + + std::stringstream os; + int status = LoaderMsg::STATUS_PARSE_ERR; + + if (success) { + auto originalId = protoBuf->originalid(); + auto originalKind = protoBuf->originalkind(); + status = protoBuf->status(); + auto errMsg = protoBuf->errmsg(); + os << " sender=" << senderEndpoint << + " id=" << originalId << " kind=" << originalKind << " status=" << status << + " msg=" << errMsg; + } else { + os << " Failed to parse MsgRecieved! sender=" << senderEndpoint; + } + + if (status != LoaderMsg::STATUS_SUCCESS) { + ++_errCount; + LOGS(_log, LOG_LVL_WARN, "MsgRecieved Message sent by this server caused error at its target" << + " errCount=" << _errCount << os.str()); + } else { + // There shouldn't be many of these, unless there's a need to time things. + LOGS(_log, LOG_LVL_INFO, "MsgRecieved " << os.str()); + } +} + +}}} // namespace lsst:qserv::loader + + + + + diff --git a/core/modules/loader/WorkerServer.h b/core/modules/loader/WorkerServer.h new file mode 100644 index 0000000000..24beef11d6 --- /dev/null +++ b/core/modules/loader/WorkerServer.h @@ -0,0 +1,71 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + * + */ +#ifndef LSST_QSERV_LOADER_WORKERSERVER_H +#define LSST_QSERV_LOADER_WORKERSERVER_H + +// system headers +#include +#include + +// third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/ServerUdpBase.h" +#include "loader/WWorkerList.h" + +namespace lsst { +namespace qserv { +namespace loader { + +class CentralWorker; + +class WorkerServer : public ServerUdpBase { +public: + WorkerServer(boost::asio::io_service& ioService, std::string const& host, int port, CentralWorker* centralWorker) + : ServerUdpBase(ioService, host, port), _centralWorker(centralWorker) {} + + WorkerServer() = delete; + + ~WorkerServer() override = default; + + BufferUdp::Ptr parseMsg(BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& endpoint) override; + + BufferUdp::Ptr prepareReplyMsg(boost::asio::ip::udp::endpoint const& senderEndpoint, + LoaderMsg const& inMsg, + int status, std::string const& msgTxt); // TODO shows up in both MasterServer and WorkerServer + +private: + void _msgRecieved(LoaderMsg const& inMsg, BufferUdp::Ptr const& data, + boost::asio::ip::udp::endpoint const& senderEndpoint); + + + CentralWorker* _centralWorker; +}; + + +}}} // namespace lsst::qserv::loader + +#endif // LSST_QSERV_LOADER_WORKERSERVER_H diff --git a/core/modules/loader/appClientNum.cc b/core/modules/loader/appClientNum.cc new file mode 100644 index 0000000000..0afdc23f9a --- /dev/null +++ b/core/modules/loader/appClientNum.cc @@ -0,0 +1,308 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2019 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// System headers +#include +#include + +// Third-party headers +#include "boost/lexical_cast.hpp" + +// qserv headers +#include "loader/CentralClient.h" +#include "loader/ClientConfig.h" +#include "loader/Util.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.appClient"); +} + +using namespace lsst::qserv::loader; +using boost::asio::ip::udp; + +typedef std::list KeyInfoDataList; + +/// @return true if the list is empty. +bool keyInsertListClean(KeyInfoDataList& kList, int& successCount, int& failedCount) { + for(auto iter=kList.begin(); iter != kList.end();) { + if (*iter == nullptr || (*iter)->isFinished()) { + KeyInfoData::Ptr const& kPtr = *iter; + if (kPtr->success) { + ++successCount; + } else { + ++failedCount; + LOGS(_log, LOG_LVL_WARN, "insert failed " << *kPtr); + } + iter = kList.erase(iter); + } else { + ++iter; + } + } + return kList.empty(); +} + + +/// Get a repeatable value for the chunk and subchunk numbers. It's arbitrary for +/// the test as there just needs to be some check that what was written in for +/// the key is the same as what was read +int calcChunkFrom(uint64_t j) { + return j % 10000; +} +int calcSubchunkFrom(uint64_t j) { + return j % 100; +} + + +KeyInfoData::Ptr clientAdd(CentralClient& central, uint64_t j) { + CompositeKey cKey(j); + int chunk = calcChunkFrom(j); + int subchunk = calcSubchunkFrom(j); + LOGS(_log, LOG_LVL_INFO, "clientAdd " << cKey); + return central.keyInsertReq(cKey, chunk, subchunk); +} + + +/// @return true if the list is empty. +bool keyLookupListClean(KeyInfoDataList& kList, int& successCount, int& failedCount) { + for(auto iter=kList.begin(); iter != kList.end();) { + if (*iter == nullptr || (*iter)->isFinished()) { + KeyInfoData::Ptr const& kPtr = *iter; + if (kPtr->success) { + // check the values + uint64_t j = kPtr->key.kInt; + // expected chunk and subchunk values. + int expChunk = calcChunkFrom(j); + int expSubchunk = calcSubchunkFrom(j); + if (kPtr->chunk == expChunk && kPtr->subchunk == expSubchunk) { + ++successCount; + } else { + ++failedCount; + LOGS(_log, LOG_LVL_WARN, "lookup failed, bad values, expected c=" << expChunk << + " sc=" << expSubchunk << " found=" << *kPtr); + } + } else { + ++failedCount; + LOGS(_log, LOG_LVL_WARN, "lookup failed " << *kPtr); + } + iter = kList.erase(iter); + } else { + ++iter; + } + } + return kList.empty(); +} + + +KeyInfoData::Ptr clientAddLookup(CentralClient& central, uint64_t j) { + CompositeKey cKey(j); + return central.keyLookupReq(cKey); +} + +std::string bitsStr(uint64_t in) { + std::string str; + uint64_t const bits = sizeof(in) * 8; + for (uint64_t j=bits; j>0; --j) { + uint64_t const base = 1; + if ((base << (j - 1)) & in) { + str += "1"; + } else { + str += "0"; + } + } + return str; +} + + +uint64_t reverseBits(uint64_t in) { + uint64_t out = 0; + uint64_t const bits = sizeof(in) * 8; + std::cout << "bits=" << bits << std::endl;; + for (uint64_t j=0; j " << bitsStr(out)); + return out; +} + + +int main(int argc, char* argv[]) { + + bool reverse = true; // When true, reverse bits before inserting or looking up. + std::string cCfgFile("core/modules/loader/config/client1.cnf"); + if (argc < 3) { + LOGS(_log, LOG_LVL_ERROR, "usage: appClientNum "); + return 1; + } + uint64_t numStart = boost::lexical_cast(argv[1]); + uint64_t numEnd = boost::lexical_cast(argv[2]); + if (argc > 3) { + cCfgFile = argv[3]; + } + LOGS(_log, LOG_LVL_INFO, "start=" << numStart << " end=" << numEnd << " cCfg=" << cCfgFile); + if (numEnd == 0) { + LOGS(_log, LOG_LVL_ERROR, "end cannot equal 0"); + return 1; + } + + + std::string const ourHost = getOurHostName(0); + LOGS(_log, LOG_LVL_INFO, "ourHost=" << ourHost); + boost::asio::io_service ioService; + + ClientConfig cCfg(cCfgFile); + LOGS(_log, LOG_LVL_INFO, "ClientConfig cCfg=" << cCfg); + CentralClient cClient(ioService, ourHost, cCfg); + try { + cClient.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "cWorker.start() failed e=" << e.what()); + return 1; + } + + cClient.runServer(); + + KeyInfoDataList kList; + int successCount = 0; + int failedCount = 0; + int totalKeyCount = 0; + + TimeOut::TimePoint insertBegin = TimeOut::Clock::now(); + + int modInsertCheck = cClient.getDoListMaxInserts()/4; + if (modInsertCheck < 1) modInsertCheck = 1; + if (numEnd >= numStart) { + totalKeyCount = (numEnd - numStart) + 1; + for (uint64_t j=numStart; j<=numEnd; ++j) { + uint64_t key = (reverse) ? reverseBits(j) : j; + kList.push_back(clientAdd(cClient, key)); + // occasionally trim the list + if (j%modInsertCheck == 0) keyInsertListClean(kList, successCount, failedCount); + } + } else { + totalKeyCount = (numStart - numEnd) + 1; + for (uint64_t j=numStart; j>=numEnd; --j) { + uint64_t key = (reverse) ? reverseBits(j) : j; + kList.push_back(clientAdd(cClient, key)); + // occasionally trim the list + if (j%modInsertCheck == 0) keyInsertListClean(kList, successCount, failedCount); + } + } + + int count = 0; + // If all the requests are done, the list should be empty. + // Wait up to 1 second per 1000 keys. (System does a bit better than 1000keys per second.) + int waitForKeysCount = totalKeyCount/1000; + int maxWaitCount = 16; // minimum wait to allow for a few retries. + if (waitForKeysCount > maxWaitCount) maxWaitCount = waitForKeysCount; + while (!keyInsertListClean(kList, successCount, failedCount) && count < waitForKeysCount) { + LOGS(_log, LOG_LVL_INFO, "waiting for inserts to finish count=" << count); + sleep(1); + ++count; + } + + + if (!kList.empty()) { + LOGS(_log, LOG_LVL_WARN, "kList not empty, size=" << kList.size()); + std::stringstream ss; + for (auto kPtr:kList) { + ss << "elem=" << *kPtr << "\n"; + } + LOGS(_log, LOG_LVL_WARN, ss.str()); + } + + if (!kList.empty() || failedCount > 0) { + LOGS(_log, LOG_LVL_ERROR, "FAILED to insert all elements. success=" << successCount << + " failed=" << failedCount << " size=" << kList.size()); + return 1; + } + + LOGS(_log, LOG_LVL_INFO, "inserted all elements. success=" << successCount << + " failed=" << failedCount << " size=" << kList.size()); + + TimeOut::TimePoint insertEnd = TimeOut::Clock::now(); + + // Lookup answers + auto nStart = numStart; + auto nEnd = numEnd; + if (nEnd < nStart) { + nStart = numEnd; + nEnd = numStart; + } + successCount = 0; + failedCount = 0; + int modLookupCheck = cClient.getDoListMaxLookups()/4; + if (modLookupCheck < 1) modLookupCheck = 1; + for (uint64_t j=nStart; j<=nEnd; ++j) { + uint64_t key = (reverse) ? reverseBits(j) : j; + kList.push_back(clientAddLookup(cClient, key)); + // occasionally trim the list + if (j%modLookupCheck == 0) keyLookupListClean(kList, successCount, failedCount); + } + + count = 0; + // If all the requests are done, the list should be empty. + // About 1 second per 1000 keys) + while (!keyLookupListClean(kList, successCount, failedCount) && count < waitForKeysCount) { + LOGS(_log, LOG_LVL_INFO, "waiting for lookups to finish count=" << count); + sleep(1); + ++count; + } + + if (!kList.empty()) { + LOGS(_log, LOG_LVL_WARN, "kList not empty, size=" << kList.size()); + std::stringstream ss; + for (auto kPtr:kList) { + ss << "elem=" << *kPtr << "\n"; + } + LOGS(_log, LOG_LVL_WARN, ss.str()); + } + + if (!kList.empty() || failedCount > 0) { + LOGS(_log, LOG_LVL_ERROR, "FAILED to lookup all elements. success=" << successCount << + " failed=" << failedCount << " size=" << kList.size()); + return 1; + } + + LOGS(_log, LOG_LVL_WARN, "lookup all elements. success=" << successCount << + " failed=" << failedCount << " size=" << kList.size()); + + TimeOut::TimePoint lookupEnd = TimeOut::Clock::now(); + + LOGS(_log, LOG_LVL_WARN, "DONE inserts seconds=" << + std::chrono::duration_cast(insertEnd - insertBegin).count()); + LOGS(_log, LOG_LVL_WARN, "DONE lookups seconds=" << + std::chrono::duration_cast(lookupEnd - insertEnd).count()); + ioService.stop(); + LOGS(_log, LOG_LVL_WARN, "client DONE"); + while(true) sleep(100); // prevent kubernetes from restarting this TODO: make this program run as a job. + return 0; +} + + diff --git a/core/modules/loader/appMaster.cc b/core/modules/loader/appMaster.cc new file mode 100644 index 0000000000..f141f70b59 --- /dev/null +++ b/core/modules/loader/appMaster.cc @@ -0,0 +1,71 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2019 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + + +// System headers +#include + +// qserv headers +#include "loader/CentralMaster.h" +#include "loader/Util.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.appMaster"); +} + +using namespace lsst::qserv::loader; +using boost::asio::ip::udp; + + +int main(int argc, char* argv[]) { + std::string mCfgFile("core/modules/loader/config/master.cnf"); + if (argc > 1) { + mCfgFile = argv[1]; + } + LOGS(_log, LOG_LVL_INFO, "masterCfg=" << mCfgFile); + + std::string const ourHost = getOurHostName(0); + LOGS(_log, LOG_LVL_INFO, "ourHost=" << ourHost); + boost::asio::io_service ioService; + + MasterConfig mCfg(mCfgFile); + CentralMaster cMaster(ioService, ourHost, mCfg); + try { + cMaster.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "cMaster.start() failed e=" << e.what()); + return 1; + } + cMaster.runServer(); + + bool loop = true; + while(loop) { + sleep(10); + } + ioService.stop(); + LOGS(_log, LOG_LVL_INFO, "master DONE"); +} + diff --git a/core/modules/loader/appTest.cc b/core/modules/loader/appTest.cc new file mode 100644 index 0000000000..458721b606 --- /dev/null +++ b/core/modules/loader/appTest.cc @@ -0,0 +1,669 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2018 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// System headers +#include + +// Third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "loader/CentralClient.h" +#include "loader/CentralMaster.h" +#include "loader/CentralWorker.h" +#include "loader/ClientConfig.h" +#include "loader/LoaderMsg.h" +#include "loader/MasterServer.h" +#include "loader/ServerTcpBase.h" +#include "loader/WorkerConfig.h" +#include "loader/WorkerServer.h" +#include "proto/loader.pb.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.test"); + + +void initMDC() { + LOG_MDC("LWP", std::to_string(lsst::log::lwpID())); +} + +} + +using namespace lsst::qserv::loader; +using boost::asio::ip::udp; + +struct KeyChSch { + KeyChSch(CompositeKey const& k, int c, int sc) : key(k), chunk(c), subchunk(sc) {} + CompositeKey key; + int chunk; + int subchunk; +}; + + +std::ostream& operator<<(std::ostream& os, KeyChSch const& kcs) { + os << "key=" << kcs.key << " chunk=" << kcs.chunk << " subchunk=" << kcs.subchunk; + return os; +} + + +int main(int argc, char* argv[]) { + + LOG_MDC_INIT(initMDC); + + UInt16Element num16(1 | 2 << 8); + uint16_t origin16 = num16.element; + uint16_t net16 = num16.changeEndianessOnLittleEndianOnly(num16.element); + uint16_t host16 = num16.changeEndianessOnLittleEndianOnly(net16); + LOGS(_log, LOG_LVL_INFO, "origin16=" << origin16 << " hex=" << std::hex << origin16); + LOGS(_log, LOG_LVL_INFO, "net16=" << net16 << " hex=" << std::hex << net16); + LOGS(_log, LOG_LVL_INFO, "host16=" << host16 << " hex=" << std::hex << host16); + if (host16 != origin16) { + LOGS(_log, LOG_LVL_ERROR, "UInt16NumElement did match host=" << host16 << " orig=" << origin16); + exit(-1); + } else { + LOGS(_log, LOG_LVL_INFO, "UInt16NumElement match host=origin=" << host16); + } + + UInt32Element num32(1 | 2 << 8 | 3 << 16 | 4 << 24); + uint32_t origin32 = num32.element; + uint32_t net32 = num32.changeEndianessOnLittleEndianOnly(num32.element); + uint32_t host32 = num32.changeEndianessOnLittleEndianOnly(net32); + LOGS(_log, LOG_LVL_INFO, "origin32=" << origin32 << " hex=" << std::hex << origin32); + LOGS(_log, LOG_LVL_INFO, "net32=" << net32 << " hex=" << std::hex << net32); + LOGS(_log, LOG_LVL_INFO, "host32=" << host32 << " hex=" << std::hex << host32); + if (host32 != origin32) { + LOGS(_log, LOG_LVL_ERROR, "UInt32NumElement did match host=" << host32 << " orig=" << origin32); + exit(-1); + } else { + LOGS(_log, LOG_LVL_INFO, "UInt32NumElement match host=origin=" << host32); + } + + + uint64_t testVal = 0; + for (uint64_t j=0; j < 8; ++j) { + testVal |= (j + 1) << (8*j); + } + UInt64Element num64(testVal); + uint64_t origin64 = num64.element; + uint64_t net64 = num64.changeEndianessOnLittleEndianOnly(num64.element); + uint64_t host64 = num64.changeEndianessOnLittleEndianOnly(net64); + LOGS(_log, LOG_LVL_INFO, "origin64=" << origin64 << " hex=" << std::hex << origin64); + LOGS(_log, LOG_LVL_INFO, "net64=" << net64 << " hex=" << std::hex << net64); + LOGS(_log, LOG_LVL_INFO, "host64=" << host64 << " hex=" << std::hex << host64); + if (host64 != origin64) { + LOGS(_log, LOG_LVL_ERROR, "UInt64NumElement did match host=" << host64 << " orig=" << origin64); + return -1; + } else { + LOGS(_log, LOG_LVL_INFO, "UInt64NumElement match host=origin=" << host64); + } + + + std::vector elements; + elements.push_back(std::make_shared("Simple")); + elements.push_back(std::make_shared("")); + elements.push_back(std::make_shared(" :lakjserhrfjb;iouha93219876$%#@#\n$%^ #$#%R@##$@@@@$kjhdghrnfgh ")); + elements.push_back(std::make_shared(25027)); + elements.push_back(std::make_shared(338999)); + elements.push_back(std::make_shared(1234567)); + elements.push_back(std::make_shared("One last string.")); + /// Add one really long string, which can happen when using this for TCP. Something + /// where the size would not fit in an uint16_t. + std::string reallyLongStr; + { + for(int j=0; j<100000; ++j) { + reallyLongStr += std::to_string(j%10); + } + } + elements.push_back(std::make_shared(reallyLongStr)); + + /// An exceptionally large buffer is needed as the sample data in 'elements' is far greater + /// than anything that should be sent in a UDP packet. + BufferUdp data(200000); + + // Write to the buffer. + try { + std::stringstream os; + for (auto& ele : elements) { + if (not ele->appendToData(data)) { + throw LoaderMsgErr(ERR_LOC, "Failed to append " + ele->getStringVal() + + " data:" + data.dumpStr()); + } + } + LOGS(_log, LOG_LVL_INFO, "data:" << data.dumpStr()); + } catch (LoaderMsgErr const& ex) { + LOGS(_log, LOG_LVL_ERROR, "Write to buffer FAILED msg=" << ex.what()); + exit(-1); + } + LOGS(_log, LOG_LVL_INFO, "Done writing to buffer."); + + std::vector outElems; + // Read from the buffer. + try { + for (auto& ele : elements) { + // check all elements + char elemType = MsgElement::NOTHING; + if (not MsgElement::retrieveType(data, elemType)) { + throw LoaderMsgErr(ERR_LOC, "Type was expected but not found!" + data.dumpStr()); + } + MsgElement::Ptr outEle = MsgElement::create(elemType); + if (not outEle->retrieveFromData(data)) { + throw LoaderMsgErr(ERR_LOC, "Failed to retrieve elem=" + outEle->getStringVal() + + " data:" + data.dumpStr()); + } + if (!MsgElement::equal(ele.get(), outEle.get())) { + LOGS(_log, LOG_LVL_ERROR, + "FAILED " << ele->getStringVal() << " != " << outEle->getStringVal()); + exit(-1); + } else { + LOGS(_log, LOG_LVL_INFO, "matched " << ele->getStringVal()); + } + } + } catch (LoaderMsgErr const& ex) { + LOGS(_log, LOG_LVL_ERROR, "Read from buffer FAILED msg=" << ex.what()); + exit(-1); + } + + ////////////////////////////////////////////////////////////////////////////// + + + // test for LoaderMsg serialize and parse + LoaderMsg lMsg(LoaderMsg::MAST_INFO_REQ, 1, "127.0.0.1", 9876); + BufferUdp lBuf; + lMsg.appendToData(lBuf); + { + LoaderMsg outMsg; + outMsg.parseFromData(lBuf); + if (lMsg.msgKind->element != outMsg.msgKind->element || + lMsg.msgId->element != outMsg.msgId->element || + lMsg.senderHost->element != outMsg.senderHost->element || + lMsg.senderPort->element != outMsg.senderPort->element) { + LOGS(_log, LOG_LVL_ERROR, + "FAILED messages didn't match out:" << outMsg.getStringVal() << + " != lMsg" << lMsg.getStringVal()); + return -1; + } else { + LOGS(_log, LOG_LVL_INFO, "msgs matched " << outMsg.getStringVal()); + } + } + + + { + try { + LOGS(_log, LOG_LVL_INFO, "ServTcpBase a"); + boost::asio::io_context io_context; + LOGS(_log, LOG_LVL_INFO, "ServTcpBase b"); + ServerTcpBase server(io_context, 1041); + LOGS(_log, LOG_LVL_INFO, "ServTcpBase c"); + server.runThread(); + LOGS(_log, LOG_LVL_INFO, "ServTcpBase d"); + + server.testConnect(); + LOGS(_log, LOG_LVL_INFO, "ServTcpBase e"); + sleep(5); + } + catch (std::exception const& e) { + std::cerr << e.what() << std::endl; + } + } + + + //////////////////////////////////////////////////////////////////////////// + { + bool threw = false; + try { + MasterConfig masterCfg("core/modules/loader/config/masterBad.cnf"); + } catch (ConfigErr const& e) { + threw = true; + LOGS(_log, LOG_LVL_INFO, "MasterConfig masterBad threw " << e.what()); + } + if (not threw) { + LOGS(_log, LOG_LVL_ERROR, "MasterConfig masterBad.cnf should have thrown!!"); + exit(-1); + } + } + + { + bool threw = false; + try { + WorkerConfig workerCfg("core/modules/loader/config/workerBad.cnf"); + } catch (ConfigErr const& e) { + threw = true; + LOGS(_log, LOG_LVL_INFO, "WorkerConfig workerBad threw " << e.what()); + } + if (not threw) { + LOGS(_log, LOG_LVL_ERROR, "WorkerConfig workerBad.cnf should have thrown!!"); + exit(-1); + } + } + + { + bool threw = false; + try { + ClientConfig workerCfg("core/modules/loader/config/clientBad.cnf"); + } catch (ConfigErr const& e) { + threw = true; + LOGS(_log, LOG_LVL_INFO, "ClientConfig clientBad threw " << e.what()); + } + if (not threw) { + LOGS(_log, LOG_LVL_ERROR, "ClientConfig workerBad.cnf should have thrown!!"); + exit(-1); + } + } + + MasterConfig masterCfg("core/modules/loader/config/master.cnf"); + LOGS(_log, LOG_LVL_INFO, "masterCfg=" << masterCfg); + + WorkerConfig workerCfg1("core/modules/loader/config/worker1.cnf"); + LOGS(_log, LOG_LVL_INFO, "workerCfg1=" << workerCfg1); + WorkerConfig workerCfg2("core/modules/loader/config/worker2.cnf"); + LOGS(_log, LOG_LVL_INFO, "workerCfg2=" << workerCfg2); + WorkerConfig workerCfg3("core/modules/loader/config/worker3.cnf"); + LOGS(_log, LOG_LVL_INFO, "workerCfg3=" << workerCfg3); + + + /// Start a master server + std::string ourHost = "127.0.0.1"; + std::string masterIP = ourHost; // normally would be get host name + boost::asio::io_service ioServiceMaster; + + boost::asio::io_service ioServiceWorker1; + boost::asio::io_context ioContext1; + + boost::asio::io_service ioServiceWorker2; + boost::asio::io_context ioContext2; + + boost::asio::io_service ioServiceClient1A; + + boost::asio::io_service ioServiceClient1B; + + boost::asio::io_service ioServiceClient2A; + + + CentralMaster cMaster(ioServiceMaster, masterIP, masterCfg); + try { + cMaster.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "cMaster.start() failed e=" << e.what()); + exit(-1); + } + cMaster.setMaxKeysPerWorker(4); + // Need to start several threads so messages aren't dropped while being processed. + cMaster.runServer(); + + /// Start worker server 1 + CentralWorker wCentral1(ioServiceWorker1, ioContext1, ourHost, workerCfg1); + try { + wCentral1.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "wCentral1.start() failed e=" << e.what()); + exit(-1); + } + + wCentral1.runServer(); + + /// Start worker server 2 + CentralWorker wCentral2(ioServiceWorker2, ioContext2, ourHost, workerCfg2); + try { + wCentral2.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "wCentral2.start() failed e=" << e.what()); + exit(-1); + } + wCentral2.runServer(); + + ClientConfig clientCfg1("core/modules/loader/config/client1.cnf"); + LOGS(_log, LOG_LVL_INFO, "clientCfg1=" << clientCfg1); + CentralClient cCentral1A(ioServiceClient1A, ourHost, clientCfg1); + try { + cCentral1A.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "cCentral1A.start() failed e=" << e.what()); + exit(-1); + } + cCentral1A.runServer(); + + + ClientConfig clientCfg2("core/modules/loader/config/client2.cnf"); + LOGS(_log, LOG_LVL_INFO, "clientCfg2=" << clientCfg2); + CentralClient cCentral2A(ioServiceClient2A, ourHost, clientCfg2); + try { + cCentral2A.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "cCentral2A.start() failed e=" << e.what()); + exit(-1); + } + cCentral2A.runServer(); + + ClientConfig clientCfg3("core/modules/loader/config/client3.cnf"); + LOGS(_log, LOG_LVL_INFO, "clientCfg3=" << clientCfg3); + CentralClient cCentral1B(ioServiceClient1A, ourHost, clientCfg3); + try { + cCentral1B.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "cCentral1B.start() failed e=" << e.what()); + exit(-1); + } + cCentral1B.runServer(); + + + /// Unknown message kind test. Pretending to be worker1. + { + auto originalErrCount = wCentral1.getErrCount(); + LOGS(_log, LOG_LVL_INFO, "1TSTAGE testSendBadMessage start"); + wCentral1.testSendBadMessage(); + sleep(2); // TODO handshaking instead of sleep + + if (originalErrCount == wCentral1.getErrCount()) { + LOGS(_log, LOG_LVL_ERROR, "testSendBadMessage errCount did not change " << originalErrCount); + exit(-1); + } + } + + LOGS(_log, LOG_LVL_INFO, "sleeping"); + sleep(5); // TODO change to 20 second timeout with a check every 0.1 seconds. + // The workers should agree on the worker list, and it should have 2 elements. + if (wCentral1.getWorkerList()->getIdMapSize() == 0) { + LOGS(_log, LOG_LVL_ERROR, "ERROR Worker list is empty!!!"); + exit(-1); + } + LOGS(_log, LOG_LVL_INFO, "MasterList " << cMaster.getWorkerList()->dump()); + LOGS(_log, LOG_LVL_INFO, "List1 " << wCentral1.getWorkerList()->dump()); + LOGS(_log, LOG_LVL_INFO, "List2 " << wCentral2.getWorkerList()->dump()); + if (not wCentral1.getWorkerList()->equal(*(wCentral2.getWorkerList()))) { + LOGS(_log, LOG_LVL_ERROR, "ERROR Worker lists do not match!!!"); + exit(-1); + } else { + LOGS(_log, LOG_LVL_INFO, "Worker lists match."); + } + + + /// Client + LOGS(_log, LOG_LVL_INFO, "3TSTAGE client register key A"); + KeyChSch keyA(CompositeKey("asdf_1"), 4001, 200001); + auto keyAInsert = cCentral1A.keyInsertReq(keyA.key, keyA.chunk, keyA.subchunk); + if (keyAInsert == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "ERROR failed insert keyA !!! " << keyA); + exit(-1); + } + + LOGS(_log, LOG_LVL_INFO, "4TSTAGE client register key B");; + KeyChSch keyB(CompositeKey("ndjes_bob"), 9871, 65008); + auto keyBInsert = cCentral1B.keyInsertReq(keyB.key, keyB.chunk, keyB.subchunk); + if (keyBInsert == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "ERROR failed insert keyB !!! " << keyB); + exit(-1); + } + + KeyChSch keyC(CompositeKey("asl_diebb"), 422001, 7373721); + + size_t arraySz = 1000; + std::vector keyList; + { + std::string bStr("a"); + for (size_t j=0; j keyListB; + { + for (size_t j=0; j<100000; ++j) { + std::string str("z"); + str += std::to_string(j); + keyListB.emplace_back(CompositeKey(str), j%10, j); + } + } + + // retrieve keys keyA and keyB + sleep(2); // need to sleep as it never gives up on inserts. + if (keyAInsert->isFinished() && keyBInsert->isFinished()) { + LOGS(_log, LOG_LVL_INFO, "both keyA and KeyB inserted."); + } else { + LOGS(_log, LOG_LVL_INFO, "\nkeyA and KeyB insert something did not finish"); + exit(-1); + } + + // Retrieve keyA and keyB + { + LOGS(_log, LOG_LVL_INFO, "5TSTAGE client retrieve keyB keyA"); + auto keyBInfo = cCentral1A.keyLookupReq(keyB.key); + auto keyAInfo = cCentral1A.keyLookupReq(keyA.key); + auto keyCInfo = cCentral1A.keyLookupReq(keyC.key); + + keyAInfo->waitComplete(); + keyBInfo->waitComplete(); + LOGS(_log, LOG_LVL_INFO, "5TSTAGE client retrieve DONE keyB keyA"); + LOGS(_log, LOG_LVL_INFO, "looked up keyA " << *keyAInfo); + LOGS(_log, LOG_LVL_INFO, "looked up keyB " << *keyBInfo); + + keyCInfo->waitComplete(); + LOGS(_log, LOG_LVL_INFO, "looked up (expect to fail) keyC " << *keyCInfo); + + if (keyAInfo->key != keyA.key || keyAInfo->chunk != keyA.chunk || keyAInfo->subchunk != keyA.subchunk || !keyAInfo->success) { + LOGS(_log, LOG_LVL_ERROR, "keyA lookup got incorrect value " << *keyAInfo); + exit(-1); + } + if (keyBInfo->key != keyB.key || keyBInfo->chunk != keyB.chunk || keyBInfo->subchunk != keyB.subchunk || !keyBInfo->success) { + LOGS(_log, LOG_LVL_ERROR, "keyB lookup got incorrect value " << *keyBInfo); + exit(-1); + } + if (keyCInfo->success) { + LOGS(_log, LOG_LVL_ERROR, "keyC lookup got incorrect value " << *keyCInfo); + exit(-1); + } + } + + + + // Add item to worker 2, test retrieval + { + LOGS(_log, LOG_LVL_INFO, "6TSTAGE client insert keyC lookup all keys"); + auto keyCInsert = cCentral2A.keyInsertReq(keyC.key, keyC.chunk, keyC.subchunk); + if (keyCInsert == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "ERROR failed insert keyC !!!" << keyC); + exit(-1); + } + + sleep(2); // need to sleep as it never gives up on inserts. + if (keyCInsert->isFinished()) { + LOGS(_log, LOG_LVL_INFO, "keyC inserted."); + } + + auto keyAInfo = cCentral1A.keyLookupReq(keyA.key); + LOGS(_log, LOG_LVL_INFO, "6TSTAGE waiting A"); + keyAInfo->waitComplete(); + + auto keyBInfo = cCentral2A.keyLookupReq(keyB.key); + LOGS(_log, LOG_LVL_INFO, "6TSTAGE waiting B"); + keyBInfo->waitComplete(); + + auto keyCInfo = cCentral2A.keyLookupReq(keyC.key); + LOGS(_log, LOG_LVL_INFO, "6TSTAGE waiting C"); + keyCInfo->waitComplete(); + + LOGS(_log, LOG_LVL_INFO, "6TSTAGE done waiting"); + if (keyAInfo->key != keyA.key || keyAInfo->chunk != keyA.chunk || keyAInfo->subchunk != keyA.subchunk || !keyAInfo->success) { + LOGS(_log, LOG_LVL_ERROR, "keyA lookup got incorrect value " << *keyAInfo); + exit(-1); + } + if (keyBInfo->key != keyB.key || keyBInfo->chunk != keyB.chunk || keyBInfo->subchunk != keyB.subchunk || !keyBInfo->success) { + LOGS(_log, LOG_LVL_ERROR, "keyB lookup got incorrect value " << *keyBInfo); + exit(-1); + } + if (keyCInfo->key != keyC.key || keyCInfo->chunk != keyC.chunk || keyCInfo->subchunk != keyC.subchunk || !keyCInfo->success) { + LOGS(_log, LOG_LVL_ERROR, "keyC lookup got incorrect value " << *keyCInfo); + exit(-1); + } + } + + + size_t kPos = 0; + { + LOGS(_log, LOG_LVL_INFO, "7TSTAGE insert several keys"); + std::vector keyInfoDataList; + + for (; kPos<10; ++kPos) { + auto& elem = keyList[kPos]; + auto keyInsertR = cCentral1A.keyInsertReq(elem.key, elem.chunk, elem.subchunk); + if (keyInsertR == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "ERROR failed insert a keyInsertR!!! " << elem); + exit(-1); + } + keyInfoDataList.push_back(keyInsertR); + } + + sleep(2); // need to sleep as it never gives up on inserts. + bool insertSuccess = true; + for(auto&& kiData : keyInfoDataList) { + if (not kiData->isFinished()) { + insertSuccess = false; + } + } + + if (insertSuccess) { + LOGS(_log, LOG_LVL_INFO, "insert success kPos=" << kPos); + } else { + LOGS(_log, LOG_LVL_ERROR, "insert failure kPos=" << kPos); + exit(-1); + } + + // The number of active servers should have increased from 1 to 2 + // TODO check number of servers + } + + + { + LOGS(_log, LOG_LVL_INFO, "8TSTAGE insert several keys"); + std::list keyInfoDataList; + + for (; kPosisFinished()) { + insertSuccess = false; + } else { + keyInfoDataList.erase(keyIter); + ++finished; + } + } + LOGS(_log, LOG_LVL_INFO, "seconds=" << seconds << " finished=" << finished << + " insertSuccess=" << insertSuccess); + } while (not insertSuccess); + + if (insertSuccess) { + LOGS(_log, LOG_LVL_INFO, "keyList insert success kPos=" << kPos << " sec=" << seconds); + } else { + LOGS(_log, LOG_LVL_ERROR, "keyList insert failure kPos=" << kPos << " sec=" << seconds); + exit(-1); + } + + // TODO check number of servers + + } + + { + LOGS(_log, LOG_LVL_INFO, "9TSTAGE insert many keys"); + std::list keyInfoDataList; + size_t pos = 0; + for (; posisFinished()) { + insertSuccess = false; + } else { + keyInfoDataList.erase(keyIter); + ++finished; + } + } + LOGS(_log, LOG_LVL_INFO, "seconds=" << seconds << " finished=" << finished << + " insertSuccess=" << insertSuccess << " " << keyInfoDataList.size()); + } while (not insertSuccess); + + if (insertSuccess) { + LOGS(_log, LOG_LVL_INFO, "keyListB insert success pos=" << pos << " sec=" << seconds); + } else { + LOGS(_log, LOG_LVL_ERROR, "keyListB insert failure pos=" << pos << " sec=" << seconds); + exit(-1); + } + + // TODO check number of servers + + } + + sleep(10); + { + // Tests for client worker lists. + /// Clients should have lists of workers cCentral2A cCentral1B + WWorkerList::Ptr wList2A = cCentral2A.getWorkerList(); + LOGS(_log, LOG_LVL_INFO, " wList2A size=" << wList2A->getIdMapSize() << " dump=" << wList2A->dump()); + WWorkerList::Ptr wList1B = cCentral1B.getWorkerList(); + LOGS(_log, LOG_LVL_INFO, " wList1B size=" << wList1B->getIdMapSize() << " dump=" << wList1B->dump()); + // TODO: A new pair of clients should have matching lists at this time. + } + + //ioService.stop(); // this doesn't seem to work cleanly + // mastT.join(); + + LOGS(_log, LOG_LVL_INFO, "DONE"); + exit(0); +} diff --git a/core/modules/loader/appWorker.cc b/core/modules/loader/appWorker.cc new file mode 100644 index 0000000000..bd2300d7d2 --- /dev/null +++ b/core/modules/loader/appWorker.cc @@ -0,0 +1,73 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2019 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// System headers +#include +#include + +// qserv headers +#include "loader/CentralWorker.h" +#include "loader/Util.h" + +// LSST headers +#include "lsst/log/Log.h" + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.loader.appWorker"); +} + +using namespace lsst::qserv::loader; +using boost::asio::ip::udp; + + +int main(int argc, char* argv[]) { + std::string wCfgFile("core/modules/loader/config/worker1.cnf"); + if (argc > 1) { + wCfgFile = argv[1]; + } + LOGS(_log, LOG_LVL_INFO, "workerCfg=" << wCfgFile); + + boost::asio::io_service ioService; + boost::asio::io_context ioContext; + + std::string ourHostName = getOurHostName(0); + LOGS(_log, LOG_LVL_INFO, "ourHostName=" << ourHostName); + + WorkerConfig wCfg(wCfgFile); + CentralWorker cWorker(ioService, ioContext, ourHostName, wCfg); + try { + cWorker.start(); + } catch (boost::system::system_error const& e) { + LOGS(_log, LOG_LVL_ERROR, "cWorker.start() failed e=" << e.what()); + return 1; + } + cWorker.runServer(); + + bool loop = true; + while(loop) { + sleep(10); + } + ioService.stop(); // this doesn't seem to work cleanly + LOGS(_log, LOG_LVL_INFO, "worker DONE"); +} + diff --git a/core/modules/loader/config/client-k8s-a1.cnf b/core/modules/loader/config/client-k8s-a1.cnf new file mode 100644 index 0000000000..630403cd79 --- /dev/null +++ b/core/modules/loader/config/client-k8s-a1.cnf @@ -0,0 +1,33 @@ +# Client k8s-a1 +# +[client] +# Master information - for discovering worker key ranges. +masterHost = imaster-sts-0.imaster-svc +masterPortUdp = 10042 + +# Default worker - if a specific worker cannot be identified for +# a request, send the request to this worker. +defWorkerHost = iworker-sts-0.iworker-svc +defWorkerPortUdp = 10043 + +# Client's port - request answers will be sent here. +clientPortUdp = 10050 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 50000 + +# Maximum number of lookups that can be on the DoList at a time. +maxLookups = 5000 + +# Maximum number of inserts that can be on the DoList at a time. +maxInserts = 10000 + +# How long to sleep before checking if any lookups or inserts have completed. +maxRequestSleepTime = 10000 + +# Client thread pool size +threadPoolSize = 20 + +# IO threads +iOThreads = 200 diff --git a/core/modules/loader/config/client-k8s-a2.cnf b/core/modules/loader/config/client-k8s-a2.cnf new file mode 100644 index 0000000000..a61dd36199 --- /dev/null +++ b/core/modules/loader/config/client-k8s-a2.cnf @@ -0,0 +1,33 @@ +# Client k8s-a2 +# +[client] +# Master information - for discovering worker key ranges. +masterHost = imaster-sts-0.imaster-svc +masterPortUdp = 10042 + +# Default worker - if a specific worker cannot be identified for +# a request, send the request to this worker. +defWorkerHost = iworker-sts-1.iworker-svc +defWorkerPortUdp = 10043 + +# Client's port - request answers will be sent here. +clientPortUdp = 10050 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 50000 + +# Maximum number of lookups that can be on the DoList at a time. +maxLookups = 5000 + +# Maximum number of inserts that can be on the DoList at a time. +maxInserts = 10000 + +# How long to sleep before checking if any lookups or inserts have completed. +maxRequestSleepTime = 10000 + +# Client thread pool size +threadPoolSize = 20 + +# IO threads +iOThreads = 200 diff --git a/core/modules/loader/config/client-k8s-a3.cnf b/core/modules/loader/config/client-k8s-a3.cnf new file mode 100644 index 0000000000..4dd5cd091e --- /dev/null +++ b/core/modules/loader/config/client-k8s-a3.cnf @@ -0,0 +1,33 @@ +# Client k8s-a3 +# +[client] +# Master information - for discovering worker key ranges. +masterHost = imaster-sts-0.imaster-svc +masterPortUdp = 10042 + +# Default worker - if a specific worker cannot be identified for +# a request, send the request to this worker. +defWorkerHost = iworker-sts-2.iworker-svc +defWorkerPortUdp = 10043 + +# Client's port - request answers will be sent here. +clientPortUdp = 10050 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 50000 + +# Maximum number of lookups that can be on the DoList at a time. +maxLookups = 5000 + +# Maximum number of inserts that can be on the DoList at a time. +maxInserts = 10000 + +# How long to sleep before checking if any lookups or inserts have completed. +maxRequestSleepTime = 10000 + +# Client thread pool size +threadPoolSize = 20 + +# IO threads +iOThreads = 200 diff --git a/core/modules/loader/config/client1.cnf b/core/modules/loader/config/client1.cnf new file mode 100644 index 0000000000..2ecee95406 --- /dev/null +++ b/core/modules/loader/config/client1.cnf @@ -0,0 +1,27 @@ +# Client 1 +# +[client] +# Master information - for discovering worker key ranges. +masterHost = 127.0.0.1 +masterPortUdp = 10042 + +# Default worker - if a specific worker cannot be identified for +# a request, send the request to this worker. +defWorkerHost = 127.0.0.1 +defWorkerPortUdp = 10043 + +# Client's port - request answers will be sent here. +clientPortUdp = 10050 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 100000 + +# Maximum number of lookups that can be on the DoList at a time. +maxLookups = 99000 + +# Maximum number of inserts that can be on the DoList at a time. +maxInserts = 99000 + +# Client thread pool size +threadPoolSize = 5 diff --git a/core/modules/loader/config/client2.cnf b/core/modules/loader/config/client2.cnf new file mode 100644 index 0000000000..ffded90fb4 --- /dev/null +++ b/core/modules/loader/config/client2.cnf @@ -0,0 +1,27 @@ +# Client 2 +# +[client] +# Master information - for discovering worker key ranges. +masterHost = 127.0.0.1 +masterPortUdp = 10042 + +# Default worker - if a specific worker cannot be identified for +# a request, send the request to this worker. +defWorkerHost = 127.0.0.1 +defWorkerPortUdp = 10044 + +# Client's port - request answers will be sent here. +clientPortUdp = 10051 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 100000 + +# Maximum number of lookups that can be on the DoList at a time. +maxLookups = 99000 + +# Maximum number of inserts that can be on the DoList at a time. +maxInserts = 99000 + +# Client thread pool size +threadPoolSize = 5 diff --git a/core/modules/loader/config/client3.cnf b/core/modules/loader/config/client3.cnf new file mode 100644 index 0000000000..c09be0d6dd --- /dev/null +++ b/core/modules/loader/config/client3.cnf @@ -0,0 +1,27 @@ +# Client 3 +# +[client] +# Master information - for discovering worker key ranges. +masterHost = 127.0.0.1 +masterPortUdp = 10042 + +# Default worker - if a specific worker cannot be identified for +# a request, send the request to this worker. +defWorkerHost = 127.0.0.1 +defWorkerPortUdp = 10043 + +# Client's port - request answers will be sent here. +clientPortUdp = 10052 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 100000 + +# Maximum number of lookups that can be on the DoList at a time. +maxLookups = 99000 + +# Maximum number of inserts that can be on the DoList at a time. +maxInserts = 99000 + +# Client thread pool size +threadPoolSize = 5 diff --git a/core/modules/loader/config/clientBad.cnf b/core/modules/loader/config/clientBad.cnf new file mode 100644 index 0000000000..223349eaaa --- /dev/null +++ b/core/modules/loader/config/clientBad.cnf @@ -0,0 +1,11 @@ +# Intentionally malformed for testing +# +[client] +masterHost = 127.0.0.1 +masterPortUdp = 10042 +defWorkerHost = 127.0.0.1 +defWorkerPortUdp = 10043 +clientortUdp = 10050 +loopSleepTime = 100000 +maxLookups = 99000 +maxInserts = 99000 diff --git a/core/modules/loader/config/master.cnf b/core/modules/loader/config/master.cnf new file mode 100644 index 0000000000..51d105b1eb --- /dev/null +++ b/core/modules/loader/config/master.cnf @@ -0,0 +1,13 @@ +# +# +[master] +portUdp = 10042 + +# When the average number of keys per worker is more than this +# a new worker should be activated if available. +maxKeysPerWorker = 999 + +# Size of the thread pool. +threadPoolSize = 10 +# Time to sleep in microseconds. +loopSleepTime = 100000 diff --git a/core/modules/loader/config/masterBad.cnf b/core/modules/loader/config/masterBad.cnf new file mode 100644 index 0000000000..11600fb846 --- /dev/null +++ b/core/modules/loader/config/masterBad.cnf @@ -0,0 +1,8 @@ +# This configuration file is intentionally incorrect for testing. +# +[master] +portdp = 9876 +maxKeysPerWorker = 1050 +threadPoolSize = 10 +# Time to sleep in microseconds. +loopSleepTime = 100000 diff --git a/core/modules/loader/config/worker-k8s-a.cnf b/core/modules/loader/config/worker-k8s-a.cnf new file mode 100644 index 0000000000..dfaf8ad04c --- /dev/null +++ b/core/modules/loader/config/worker-k8s-a.cnf @@ -0,0 +1,33 @@ +# Worker 1 +# +[worker] +# Master information +masterHost = imaster-sts-0.imaster-svc +masterPortUdp = 10042 + +# Worker ports +wPortUdp = 10043 +wPortTcp = 10143 + +# Worker thread pool size +threadPoolSize = 30 + +# IO threads +iOThreads = 100 + +# Period of time where a key insert is considered recent in milliseconds +recentAddLimit = 60000 + +# Difference in number of keys stored between neighbors +# A value of 1.2 would cause a shift if either worker had +# 20% more keys than its neighbor. +thresholdNeighborShift = 1.05 + +# Maximum number of keys to shift in a single iteration. +# An iteration would be transfer, insert, verify range. +maxKeysToShift = 10000 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 50000 + diff --git a/core/modules/loader/config/worker1.cnf b/core/modules/loader/config/worker1.cnf new file mode 100644 index 0000000000..fafa427299 --- /dev/null +++ b/core/modules/loader/config/worker1.cnf @@ -0,0 +1,30 @@ +# Worker 1 +# +[worker] +# Master information +masterHost = 127.0.0.1 +masterPortUdp = 10042 + +# Worker ports +wPortUdp = 10043 +wPortTcp = 10143 + +# Worker thread pool size +threadPoolSize = 50 + +# Period of time where a key insert is considered recent in milliseconds +recentAddLimit = 60000 + +# Difference in number of keys stored between neighbors +# A value of 1.2 would cause a shift if either worker had +# 20% more keys than its neighbor. +thresholdNeighborShift = 1.05 + +# Maximum number of keys to shift in a single iteration. +# An iteration would be transfer, insert, verify range. +maxKeysToShift = 10000 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 100000 + diff --git a/core/modules/loader/config/worker2.cnf b/core/modules/loader/config/worker2.cnf new file mode 100644 index 0000000000..73d4b512e3 --- /dev/null +++ b/core/modules/loader/config/worker2.cnf @@ -0,0 +1,30 @@ +# Worker 2 +# +[worker] +# Master information. +masterHost = 127.0.0.1 +masterPortUdp = 10042 + +# Worker ports +wPortUdp = 10044 +wPortTcp = 10144 + +# Worker thread pool size +threadPoolSize = 50 + +# Period of time where a key insert is considered recent in milliseconds +recentAddLimit = 60000 + +# Difference in number of keys stored between neighbors +# A value of 1.2 would cause a shift if either worker had +# 20% more keys than its neighbor. +thresholdNeighborShift = 1.05 + +# Maximum number of keys to shift in a single iteration. +# An iteration would be transfer, insert, verify range. +maxKeysToShift = 10000 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 100000 + diff --git a/core/modules/loader/config/worker3.cnf b/core/modules/loader/config/worker3.cnf new file mode 100644 index 0000000000..cc45d8d5ec --- /dev/null +++ b/core/modules/loader/config/worker3.cnf @@ -0,0 +1,29 @@ +# Worker 3 +# +[worker] +# Master information +masterHost = 127.0.0.1 +masterPortUdp = 10042 + +# Worker ports +wPortUdp = 10045 +wPortTcp = 10145 + +# Worker thread pool size +threadPoolSize = 50 + +# Period of time where a key insert is considered recent in milliseconds +recentAddLimit = 60000 + +# Difference in number of keys stored between neighbors +# A value of 1.2 would cause a shift if either worker had +# 20% more keys than its neighbor. +thresholdNeighborShift = 1.05 + +# Maximum number of keys to shift in a single iteration. +# An iteration would be transfer, insert, verify range. +maxKeysToShift = 10000 + +# Time to sleep between checking every item in the DoList +# in microseconds. +loopSleepTime = 100000 diff --git a/core/modules/loader/config/workerBad.cnf b/core/modules/loader/config/workerBad.cnf new file mode 100644 index 0000000000..463be4ff68 --- /dev/null +++ b/core/modules/loader/config/workerBad.cnf @@ -0,0 +1,12 @@ +# Intentionally bad config file for testing. +# +[worker] +masterHost = 127.0.0.1 +masterPortUdp = 10042 +wPortUdp = 10043 +wPortTcp = 10143 +wPoolSize = 11 +recentAdddLimit = 60000 +thresholdNeighborShift = 1000 +maxKeysToShift = 10000 +loopSleepTime = 100000 diff --git a/core/modules/loader/testLoader.cc b/core/modules/loader/testLoader.cc new file mode 100644 index 0000000000..15ec59c439 --- /dev/null +++ b/core/modules/loader/testLoader.cc @@ -0,0 +1,312 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * Copyright 2019 AURA/LSST. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "loader/CompositeKey.h" +#include "loader/ConfigBase.h" + +// Boost unit test header +#define BOOST_TEST_MODULE LoaderConfig +#include "boost/test/included/unit_test.hpp" + + +namespace test = boost::test_tools; +using namespace lsst::qserv::loader; + +BOOST_AUTO_TEST_SUITE(Suite) + +BOOST_AUTO_TEST_CASE(LoaderTest) { + + LOG_INFO("LoaderConfig test start"); + ConfigElement::CfgElementList cfgElemList; + std::string header("hdr"); + + LOG_INFO("Test valid values"); + + { + auto vString = ConfigElement::create(cfgElemList, header, "str1", ConfigElement::STRING, true); + vString->setValue("a string"); + BOOST_CHECK(vString->verifyValueIsOfKind()); + } + + { + auto vInt = ConfigElement::create(cfgElemList, header, "vInt1", ConfigElement::INT, true); + vInt->setValue("1234567890"); + BOOST_CHECK(vInt->verifyValueIsOfKind()); + } + + { + auto vInt = ConfigElement::create(cfgElemList, header, "vInt2", ConfigElement::INT, true); + vInt->setValue("0"); + BOOST_CHECK(vInt->verifyValueIsOfKind()); + } + + { + auto vInt = ConfigElement::create(cfgElemList, header, "vInt3", ConfigElement::INT, true); + vInt->setValue("-1"); + BOOST_CHECK(vInt->verifyValueIsOfKind()); + } + + { + auto vInt = ConfigElement::create(cfgElemList, header, "vInt4", ConfigElement::INT, true); + vInt->setValue("+7"); + BOOST_CHECK(vInt->verifyValueIsOfKind()); + } + + { + auto vFloat = ConfigElement::create(cfgElemList, header, "vFloat1", ConfigElement::FLOAT, true); + vFloat->setValue("1234567890.0987654321"); + BOOST_CHECK(vFloat->verifyValueIsOfKind()); + } + + { + auto vFloat = ConfigElement::create(cfgElemList, header, "vFloat2", ConfigElement::FLOAT, true); + vFloat->setValue("0"); + BOOST_CHECK(vFloat->verifyValueIsOfKind()); + } + + { + auto vFloat = ConfigElement::create(cfgElemList, header, "vFloat3", ConfigElement::FLOAT, true); + vFloat->setValue(".01"); + BOOST_CHECK(vFloat->verifyValueIsOfKind()); + } + + { + auto vFloat = ConfigElement::create(cfgElemList, header, "vFloat4", ConfigElement::FLOAT, true); + vFloat->setValue("-.01"); + BOOST_CHECK(vFloat->verifyValueIsOfKind()); + } + + { + auto vFloat = ConfigElement::create(cfgElemList, header, "vFloat5", ConfigElement::FLOAT, true); + vFloat->setValue("-.01"); + BOOST_CHECK(vFloat->verifyValueIsOfKind()); + } + + { + auto vFloat = ConfigElement::create(cfgElemList, header, "vFloat5", ConfigElement::FLOAT, true); + vFloat->setValue("+0.01"); + BOOST_CHECK(vFloat->verifyValueIsOfKind()); + } + + { + auto vFloat = ConfigElement::create(cfgElemList, header, "vFloat6", ConfigElement::FLOAT, true); + vFloat->setValue("1.03e-2"); + BOOST_CHECK(vFloat->verifyValueIsOfKind()); + } + + LOG_INFO("Test bad values"); + /// There aren't any rules about what would be an invalid STRING + + { + auto bInt = ConfigElement::create(cfgElemList, header, "bInt1", ConfigElement::INT, true); + bInt->setValue(" 1234567890a "); + BOOST_CHECK(not bInt->verifyValueIsOfKind()); + } + + { + auto bInt = ConfigElement::create(cfgElemList, header, "bInt2", ConfigElement::INT, true); + bInt->setValue(" "); + BOOST_CHECK(not bInt->verifyValueIsOfKind()); + } + + { + auto bInt = ConfigElement::create(cfgElemList, header, "bInt3", ConfigElement::INT, true); + bInt->setValue("z"); + BOOST_CHECK(not bInt->verifyValueIsOfKind()); + } + + { + auto bInt = ConfigElement::create(cfgElemList, header, "bInt3", ConfigElement::INT, true); + bInt->setValue("-"); + BOOST_CHECK(not bInt->verifyValueIsOfKind()); + } + + { + auto bInt = ConfigElement::create(cfgElemList, header, "bInt3", ConfigElement::INT, true); + bInt->setValue("+"); + BOOST_CHECK(not bInt->verifyValueIsOfKind()); + } + + { + auto bInt = ConfigElement::create(cfgElemList, header, "bInt3", ConfigElement::INT, true); + bInt->setValue("1.7"); + BOOST_CHECK(not bInt->verifyValueIsOfKind()); + } + + { + auto bFloat = ConfigElement::create(cfgElemList, header, "bFloat1", ConfigElement::FLOAT, true); + bFloat->setValue(" 1234567890a "); + BOOST_CHECK(not bFloat->verifyValueIsOfKind()); + } + + { + auto bFloat = ConfigElement::create(cfgElemList, header, "bFloat2", ConfigElement::FLOAT, true); + bFloat->setValue(" "); + BOOST_CHECK(not bFloat->verifyValueIsOfKind()); + } + + { + auto bFloat = ConfigElement::create(cfgElemList, header, "bFloat3", ConfigElement::FLOAT, true); + bFloat->setValue("z"); + BOOST_CHECK(not bFloat->verifyValueIsOfKind()); + } + + { + auto bFloat = ConfigElement::create(cfgElemList, header, "bFloat4", ConfigElement::FLOAT, true); + bFloat->setValue("-"); + BOOST_CHECK(not bFloat->verifyValueIsOfKind()); + } + + { + auto bFloat = ConfigElement::create(cfgElemList, header, "bFloat5", ConfigElement::FLOAT, true); + bFloat->setValue("+"); + BOOST_CHECK(not bFloat->verifyValueIsOfKind()); + } + + { + auto bFloat = ConfigElement::create(cfgElemList, header, "bFloat5", ConfigElement::FLOAT, true); + bFloat->setValue("."); + BOOST_CHECK(not bFloat->verifyValueIsOfKind()); + } + + LOGS_INFO("LoaderConfig test end"); + + + LOG_INFO("CompositeKey test start"); + + { + LOGS_INFO("Comparisons to self"); + CompositeKey a(); + BOOST_CHECK(a == a); + BOOST_CHECK(!(a != a)); + BOOST_CHECK(!(a < a)); + BOOST_CHECK(!(a > a)); + BOOST_CHECK(a <= a); + BOOST_CHECK(a >= a); + } + + { + LOGS_INFO("Comparisons integer equal"); + CompositeKey a(9876); + CompositeKey b(9876); + BOOST_CHECK(a == b); + BOOST_CHECK(!(a != b)); + BOOST_CHECK(!(a < b)); + BOOST_CHECK(!(a > b)); + BOOST_CHECK(a <= b); + BOOST_CHECK(a >= b); + } + + { + LOGS_INFO("Comparisons integer less than"); + CompositeKey a(875); + CompositeKey b(876); + BOOST_CHECK(!(a == b)); + BOOST_CHECK( (a != b)); + BOOST_CHECK( (a < b)); + BOOST_CHECK(!(a > b)); + BOOST_CHECK( (a <= b)); + BOOST_CHECK(!(a >= b)); + } + + { + LOGS_INFO("Comparisons integer greater than"); + CompositeKey a(1000000); + CompositeKey b(30); + BOOST_CHECK(!(a == b)); + BOOST_CHECK( (a != b)); + BOOST_CHECK(!(a < b)); + BOOST_CHECK( (a > b)); + BOOST_CHECK(!(a <= b)); + BOOST_CHECK( (a >= b)); + } + + { + LOGS_INFO("Comparisons integer greater than"); + CompositeKey a(1000000, "a"); + CompositeKey b(30, "b"); + BOOST_CHECK(!(a == b)); + BOOST_CHECK( (a != b)); + BOOST_CHECK(!(a < b)); + BOOST_CHECK( (a > b)); + BOOST_CHECK(!(a <= b)); + BOOST_CHECK( (a >= b)); + } + + { + LOGS_INFO("Comparisons string equal"); + CompositeKey a(0, "string%$testA"); + CompositeKey b(0, "string%$testA"); + BOOST_CHECK(a == b); + BOOST_CHECK(!(a != b)); + BOOST_CHECK(!(a < b)); + BOOST_CHECK(!(a > b)); + BOOST_CHECK(a <= b); + BOOST_CHECK(a >= b); + } + + { + LOGS_INFO("Comparisons string less than"); + CompositeKey a(875, "testa"); + CompositeKey b(875, "testb"); + BOOST_CHECK(!(a == b)); + BOOST_CHECK( (a != b)); + BOOST_CHECK( (a < b)); + BOOST_CHECK(!(a > b)); + BOOST_CHECK( (a <= b)); + BOOST_CHECK(!(a >= b)); + } + + { + LOGS_INFO("Comparisons string greater than"); + CompositeKey a(30, "testd"); + CompositeKey b(30, "testc"); + BOOST_CHECK(!(a == b)); + BOOST_CHECK( (a != b)); + BOOST_CHECK(!(a < b)); + BOOST_CHECK( (a > b)); + BOOST_CHECK(!(a <= b)); + BOOST_CHECK( (a >= b)); + } + + { + CompositeKey a(34568, "@#WSR$RT%fewsewer"); + CompositeKey b(a); + BOOST_CHECK(b == a); + } + + { + CompositeKey b; + CompositeKey a(98763, "AsdE$%342"); + b = a; + BOOST_CHECK(a == b); + } + + LOGS_INFO("CompositeKey test end"); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/core/modules/proto/SConscript b/core/modules/proto/SConscript index c32c4c7821..a738b309b6 100644 --- a/core/modules/proto/SConscript +++ b/core/modules/proto/SConscript @@ -7,5 +7,9 @@ env.Protoc(File("worker.proto"), PROTOC_PATH='.', PROTOC_CCOUT='.', PROTOC_PYOUT='.',) +env.Protoc(File("loader.proto"), + PROTOC_PATH='.', + PROTOC_CCOUT='.', + PROTOC_PYOUT='.',) standardModule(env, test_libs='log4cxx') diff --git a/core/modules/proto/loader.proto b/core/modules/proto/loader.proto new file mode 100644 index 0000000000..a3f98b18e2 --- /dev/null +++ b/core/modules/proto/loader.proto @@ -0,0 +1,115 @@ +/* + * LSST Data Management System + * Copyright 2018 LSST Corporation. + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +/// replication.proto +/// This defines the wire-messages sent between replication master and workers. + +package lsst.qserv.proto; + +/// Response to any maessage +message LdrMsgReceived { + /// + required uint64 originalid = 1; + required uint32 originalkind = 2; + required uint32 status = 3; + optional string errmsg = 4; + required uint32 dataentries = 5; +} + +/// Register a worker with the master. +message LdrNetAddress { + required string ip = 1; + required uint32 udpport = 2; + required uint32 tcpport = 3; +} + + +message WorkerRange { + required bool valid = 1; + required uint64 minint = 2; + required string minstr = 3; + required uint64 maxint = 4; + required string maxstr = 5; + required bool maxunlimited = 6; +} + +message WorkerListItem { + required uint32 wid = 1; // worker's id number + optional WorkerRange range = 2; + optional LdrNetAddress address = 3; +} + + +message LdrMastWorkerList { + required uint32 workercount = 1; + repeated WorkerListItem worker = 2; +} + + +message KeyInfo { + required string keystr = 1; + required uint64 keyint = 2; + required uint32 chunk = 3; + required uint32 subchunk = 4; + optional bool success = 5; +} + + +message KeyInfoInsert { + required LdrNetAddress requester = 1; + required KeyInfo keyinfo = 2; + required uint32 hops = 3; +} + + +message Neighbor { + required uint32 wid = 2; +} + + +// Information about a worker and the keys it holds. +message WorkerKeysInfo { + required uint32 wid = 1; + required uint32 mapsize = 2; + required uint32 recentadds = 3; + required WorkerRange range = 4; + required Neighbor left = 5; + required Neighbor right = 6; +} + + +message WorkerImNeighbor { + required uint32 wid = 1; + required uint64 keycount = 2; + required WorkerRange range = 4; +} + + +message KeyList { + required uint32 keycount = 1; // TODO this could be redundant + repeated KeyInfo keypair = 2; +} + + +message KeyShiftRequest { + required uint32 keystoshift = 1; +} + diff --git a/core/modules/qdisp/XrdSsiMocks.cc b/core/modules/qdisp/XrdSsiMocks.cc index 10c45dfa56..eeb4c1fb75 100644 --- a/core/modules/qdisp/XrdSsiMocks.cc +++ b/core/modules/qdisp/XrdSsiMocks.cc @@ -105,7 +105,7 @@ class Agent : public XrdSsiResponder, public XrdSsiStream { break; case RESP_ERRNR: _reqP->doNotRetry(); - // Fallthrough + // Fallthrough [[fallthrough]]; case RESP_ERROR: _ReplyError(); break;