From 8f163f2ef70b679e546cf0c7161aac9010d366ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicente=20Adolfo=20Bolea=20S=C3=A1nchez?= Date: Mon, 3 Apr 2017 00:52:25 +0900 Subject: [PATCH 1/8] Merge VeloxDFS v1.8.0 into VeloxMR - Added Java client API and refactored C++ client API - Parallel mapper and reducer is working - Great optimization - Separated the command line client interface from the C++ client API. - Added default options for the settings. - Append is finally fixed NOTE: The VeloxDFS parent commit is omited. --- .clang-format | 57 + .github/PULL_REQUEST_TEMPLATE.md | 17 +- .travis.yml | 8 +- Makefile.am | 67 +- README.md | 75 +- configure.ac | 3 - doc/Doxyfile | 10 +- java_compile.sh | 34 + set-env.sh | 20 + src/blocknode/block_node.cc | 97 + src/blocknode/block_node.hh | 36 + src/blocknode/block_node_router.cc | 80 + src/blocknode/block_node_router.hh | 18 + src/{nodes => blocknode}/local_io.cc | 40 +- src/{nodes => blocknode}/local_io.hh | 2 + src/cache/cache.hh | 81 - src/client/cli_driver.cc | 210 ++ src/client/cli_driver.hh | 22 + src/client/dfs.cc | 1723 ++++++++--------- src/client/dfs.hh | 90 +- src/client/model/metadata.hh | 25 + src/client/vdfs.cc | 192 +- src/client/vdfs.hh | 45 +- src/client/vmr.cc | 8 +- src/common/block.hh | 4 + src/common/blockmetadata.hh | 21 + src/common/context.cc | 12 +- src/common/dl_loader.hh | 3 +- src/common/ecfs.hh | 5 - src/common/histogram.cc | 2 +- src/common/histogram.hh | 2 +- src/common/logger.cc | 3 + src/common/logger.hh | 2 +- src/common/settings.cc | 37 + src/fileleader/directory.cc | 318 +++ src/fileleader/directory.hh | 40 + src/fileleader/file_leader.cc | 192 ++ src/fileleader/file_leader.hh | 58 + src/fileleader/file_leader_router.cc | 140 ++ src/fileleader/file_leader_router.hh | 26 + src/java/jni/velox_DFS.cc | 147 ++ src/java/jni/velox_VDFS.cc | 134 ++ src/java/jni/velox_VDFS.h | 77 + src/java/jni/velox_VeloxDFS.h | 77 + src/java/velox/VDFS.java | 35 + src/java/velox/VeloxDFS.java | 35 + src/java/velox/model/Metadata.java | 21 + src/mapreduce/executor.cc | 372 ++-- src/mapreduce/executor.hh | 10 +- src/mapreduce/fs/directorymr.cc | 277 ++- src/mapreduce/fs/directorymr.hh | 38 +- src/mapreduce/fs/ireader.cc | 10 +- src/mapreduce/fs/ireader.h | 2 +- src/mapreduce/fs/iwriter.cc | 4 +- src/mapreduce/job.cc | 94 - src/mapreduce/job.hh | 54 - src/mapreduce/messages/boost_impl.cc | 290 +++ src/mapreduce/messages/boost_impl.hh | 105 + src/mapreduce/messages/finish_shuffle.h | 6 +- src/{ => mapreduce}/messages/finishmap.cc | 0 src/{ => mapreduce}/messages/finishmap.hh | 4 +- src/{ => mapreduce}/messages/idatakeys.cc | 0 src/{ => mapreduce}/messages/idatakeys.hh | 2 +- src/mapreduce/messages/idatalist.hh | 4 +- src/{ => mapreduce}/messages/job.cc | 0 src/{ => mapreduce}/messages/job.hh | 2 +- src/{ => mapreduce}/messages/jobstatus.cc | 0 src/{ => mapreduce}/messages/jobstatus.hh | 2 +- src/mapreduce/messages/key_value_shuffle.h | 12 +- .../messages/nodes_shuffling.cc | 0 .../messages/nodes_shuffling.hh | 4 +- src/{ => mapreduce}/messages/task.cc | 0 src/{ => mapreduce}/messages/task.hh | 7 +- src/{ => mapreduce}/messages/taskstatus.cc | 0 src/{ => mapreduce}/messages/taskstatus.hh | 6 +- src/mapreduce/nodes/peermr.cc | 573 ------ src/mapreduce/nodes/peermr.h | 65 - src/mapreduce/nodes/remotemr.cc | 39 - src/mapreduce/nodes/remotemr.h | 18 - src/mapreduce/py_executor.cc | 22 +- src/mapreduce/py_executor.hh | 8 +- src/mapreduce/task_executor.cc | 363 ++++ src/mapreduce/task_executor.hh | 64 + src/mapreduce/task_executor_router.cc | 73 + src/mapreduce/task_executor_router.hh | 25 + src/messages/IOoperation.cc | 4 + src/messages/IOoperation.hh | 34 + src/messages/blockinfo.hh | 2 +- src/messages/blockrequest.hh | 5 +- src/messages/blockstatus.cc | 5 + src/messages/blockstatus.hh | 17 + src/messages/blockupdate.hh | 14 +- src/messages/boost_impl.cc | 340 +--- src/messages/boost_impl.hh | 104 +- src/messages/factory.cc | 3 + src/messages/factory.hh | 5 +- src/messages/filedescription.cc | 14 + src/messages/filedescription.hh | 11 +- src/messages/fileinfo.hh | 5 + src/messages/fileupdate.hh | 5 + src/messages/idatalist.cc | 10 - src/messages/idatalist.hh | 21 - src/messages/message.hh | 2 +- src/messages/subjob.cc | 9 - src/messages/subjob.hh | 23 - src/messages/subjobstatus.cc | 7 - src/messages/subjobstatus.hh | 17 - src/network/acceptor.cc | 31 - src/network/acceptor.hh | 33 - src/network/asyncchannel.cc | 149 +- src/network/asyncchannel.hh | 55 +- src/network/asyncnetwork.hh | 221 --- src/network/asyncnode.hh | 14 - src/network/channel.cc | 9 - src/network/channel.hh | 15 +- src/network/client_handler.cc | 118 ++ src/network/client_handler.hh | 35 + src/network/connector.cc | 47 - src/network/connector.hh | 35 - src/network/netobserver.hh | 8 +- src/network/network.hh | 22 - src/network/p2p.cc | 12 - src/network/p2p.hh | 15 - src/network/router.hh | 32 + src/network/router_decorator.cc | 14 + src/network/router_decorator.hh | 19 + src/network/server.cc | 8 +- src/network/server.hh | 2 +- src/network/server_handler.cc | 63 + src/network/server_handler.hh | 23 + src/network/simple_router.cc | 13 + src/network/simple_router.hh | 12 + src/nodes/block.hh | 4 + src/nodes/blockmetadata.hh | 21 + src/nodes/directory.cc | 402 ---- src/nodes/directory.hh | 53 - src/nodes/node.hh | 4 +- src/nodes/peerdfs.cc | 392 ---- src/nodes/peerdfs.hh | 65 - src/nodes/remotedfs.cc | 205 -- src/nodes/remotedfs.hh | 33 - src/nodes/router.cc | 36 - src/nodes/router.hh | 28 - src/targets/client.cc | 11 + src/targets/kmeans.cc | 40 +- src/targets/node_main.cc | 29 +- src/targets/node_main_mr.cc | 92 +- src/targets/veloxmr_mapreduce.cc | 1 + tests/Makefile.am | 40 +- tests/eclipse_debug.in | 135 -- tests/executor.cc | 18 - tests/integration_test.sh | 65 + tests/metadata_test.cc | 11 +- tests/nodes.cc | 29 - tests/nodes_executor.in | 8 - tests/nodes_test.cc | 14 - tests/vdfs.cc | 62 + 157 files changed, 5521 insertions(+), 4845 deletions(-) create mode 100644 .clang-format create mode 100644 java_compile.sh create mode 100644 set-env.sh create mode 100644 src/blocknode/block_node.cc create mode 100644 src/blocknode/block_node.hh create mode 100644 src/blocknode/block_node_router.cc create mode 100644 src/blocknode/block_node_router.hh rename src/{nodes => blocknode}/local_io.cc (69%) rename src/{nodes => blocknode}/local_io.hh (82%) delete mode 100644 src/cache/cache.hh create mode 100644 src/client/cli_driver.cc create mode 100644 src/client/cli_driver.hh create mode 100644 src/client/model/metadata.hh create mode 100644 src/common/block.hh create mode 100644 src/common/blockmetadata.hh delete mode 100644 src/common/ecfs.hh create mode 100644 src/fileleader/directory.cc create mode 100644 src/fileleader/directory.hh create mode 100644 src/fileleader/file_leader.cc create mode 100644 src/fileleader/file_leader.hh create mode 100644 src/fileleader/file_leader_router.cc create mode 100644 src/fileleader/file_leader_router.hh create mode 100644 src/java/jni/velox_DFS.cc create mode 100644 src/java/jni/velox_VDFS.cc create mode 100644 src/java/jni/velox_VDFS.h create mode 100644 src/java/jni/velox_VeloxDFS.h create mode 100644 src/java/velox/VDFS.java create mode 100644 src/java/velox/VeloxDFS.java create mode 100644 src/java/velox/model/Metadata.java delete mode 100644 src/mapreduce/job.cc delete mode 100644 src/mapreduce/job.hh create mode 100644 src/mapreduce/messages/boost_impl.cc create mode 100644 src/mapreduce/messages/boost_impl.hh rename src/{ => mapreduce}/messages/finishmap.cc (100%) rename src/{ => mapreduce}/messages/finishmap.hh (73%) rename src/{ => mapreduce}/messages/idatakeys.cc (100%) rename src/{ => mapreduce}/messages/idatakeys.hh (85%) rename src/{ => mapreduce}/messages/job.cc (100%) rename src/{ => mapreduce}/messages/job.hh (91%) rename src/{ => mapreduce}/messages/jobstatus.cc (100%) rename src/{ => mapreduce}/messages/jobstatus.hh (83%) rename src/{ => mapreduce}/messages/nodes_shuffling.cc (100%) rename src/{ => mapreduce}/messages/nodes_shuffling.hh (73%) rename src/{ => mapreduce}/messages/task.cc (100%) rename src/{ => mapreduce}/messages/task.hh (73%) rename src/{ => mapreduce}/messages/taskstatus.cc (100%) rename src/{ => mapreduce}/messages/taskstatus.hh (65%) delete mode 100644 src/mapreduce/nodes/peermr.cc delete mode 100644 src/mapreduce/nodes/peermr.h delete mode 100644 src/mapreduce/nodes/remotemr.cc delete mode 100644 src/mapreduce/nodes/remotemr.h create mode 100644 src/mapreduce/task_executor.cc create mode 100644 src/mapreduce/task_executor.hh create mode 100644 src/mapreduce/task_executor_router.cc create mode 100644 src/mapreduce/task_executor_router.hh create mode 100644 src/messages/IOoperation.cc create mode 100644 src/messages/IOoperation.hh create mode 100644 src/messages/blockstatus.cc create mode 100644 src/messages/blockstatus.hh delete mode 100644 src/messages/idatalist.cc delete mode 100644 src/messages/idatalist.hh delete mode 100644 src/messages/subjob.cc delete mode 100644 src/messages/subjob.hh delete mode 100644 src/messages/subjobstatus.cc delete mode 100644 src/messages/subjobstatus.hh delete mode 100644 src/network/acceptor.cc delete mode 100644 src/network/acceptor.hh delete mode 100644 src/network/asyncnetwork.hh delete mode 100644 src/network/asyncnode.hh delete mode 100644 src/network/channel.cc create mode 100644 src/network/client_handler.cc create mode 100644 src/network/client_handler.hh delete mode 100644 src/network/connector.cc delete mode 100644 src/network/connector.hh delete mode 100644 src/network/network.hh delete mode 100644 src/network/p2p.cc delete mode 100644 src/network/p2p.hh create mode 100644 src/network/router.hh create mode 100644 src/network/router_decorator.cc create mode 100644 src/network/router_decorator.hh create mode 100644 src/network/server_handler.cc create mode 100644 src/network/server_handler.hh create mode 100644 src/network/simple_router.cc create mode 100644 src/network/simple_router.hh create mode 100644 src/nodes/block.hh create mode 100644 src/nodes/blockmetadata.hh delete mode 100644 src/nodes/directory.cc delete mode 100644 src/nodes/directory.hh delete mode 100644 src/nodes/peerdfs.cc delete mode 100644 src/nodes/peerdfs.hh delete mode 100644 src/nodes/remotedfs.cc delete mode 100644 src/nodes/remotedfs.hh delete mode 100644 src/nodes/router.cc delete mode 100644 src/nodes/router.hh create mode 100644 src/targets/client.cc delete mode 100644 tests/eclipse_debug.in delete mode 100644 tests/executor.cc create mode 100644 tests/integration_test.sh delete mode 100644 tests/nodes.cc delete mode 100644 tests/nodes_executor.in delete mode 100644 tests/nodes_test.cc create mode 100644 tests/vdfs.cc diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..651e129 --- /dev/null +++ b/.clang-format @@ -0,0 +1,57 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +ConstructorInitializerIndentWidth: 4 +AlignEscapedNewlinesLeft: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AlwaysBreakTemplateDeclarations: true +AlwaysBreakBeforeMultilineStrings: true +BreakBeforeBinaryOperators: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BinPackParameters: true +ColumnLimit: 80 +ConstructorInitializerAllOnOneLineOrOnePerLine: true +DerivePointerAlignment: true +ExperimentalAutoDetectBinPacking: false +IndentCaseLabels: true +IndentWrappedFunctionNames: false +IndentFunctionDeclarationAfterType: false +MaxEmptyLinesToKeep: 1 +KeepEmptyLinesAtTheStartOfBlocks: false +NamespaceIndentation: None +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakString: 1000 +PenaltyBreakFirstLessLess: 120 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +SpacesBeforeTrailingComments: 2 +Cpp11BracedListStyle: true +Standard: Auto +IndentWidth: 2 +TabWidth: 8 +UseTab: Never +BreakBeforeBraces: Attach +SpacesInParentheses: false +SpacesInAngles: false +SpaceInEmptyParentheses: false +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: true +SpaceBeforeAssignmentOperators: true +ContinuationIndentWidth: 4 +CommentPragmas: '^ IWYU pragma:' +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +SpaceBeforeParens: ControlStatements +DisableFormat: false +... + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index bd6a9f8..b38acf2 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,7 +1,18 @@ This PR Fixes DICL/EclipseMETA#(Which issue?) -Checklist -- [ ] Follows Google Coding Style. -- [ ] At least one reviewer approved (for non-trivial changes). +## BRIEF + +## STATUS +- [ ] Its implemented. +- [ ] It compiles. +- [ ] Its tested. +- [ ] Its refactored. --- +Make sure that you squeeze all your commits before merging to master. +You might want to use the following command: + + $ git rebase -i #hash key of base commit + +--- +## EXTRA diff --git a/.travis.yml b/.travis.yml index af15687..0eec613 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ -sudo: false -dist: precise +sudo: false +dist: precise language: cpp cache: @@ -35,5 +35,5 @@ script: - cd build - sh ../configure --prefix=`pwd` - make -j4 - - make install - - make -j4 check + - make install + - make -j4 check diff --git a/Makefile.am b/Makefile.am index d2e4e97..7f34827 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,7 +18,6 @@ messages_files = src/messages/boundaries.cc \ src/messages/fileupdate.cc \ src/messages/blockinfo.cc \ src/messages/blockupdate.cc \ - src/messages/task.cc \ src/messages/reply.cc \ src/messages/filerequest.cc \ src/messages/blockrequest.cc \ @@ -30,6 +29,11 @@ messages_files = src/messages/boundaries.cc \ src/messages/formatrequest.cc \ src/messages/fileexist.cc \ src/messages/metadata.cc \ + src/messages/blockstatus.cc \ + src/messages/IOoperation.cc + +mr_messages_files= src/mapreduce/messages/task.cc \ + src/mapreduce/messages/boost_impl.cc \ src/mapreduce/messages/idatainsert.cc \ src/mapreduce/messages/igroupinsert.cc \ src/mapreduce/messages/iblockinsert.cc \ @@ -42,30 +46,32 @@ messages_files = src/messages/boundaries.cc \ src/mapreduce/messages/iblockinfo.cc \ src/mapreduce/messages/key_value_shuffle.cc \ src/mapreduce/messages/finish_shuffle.cc \ - src/messages/taskstatus.cc \ - src/messages/job.cc \ - src/messages/jobstatus.cc \ - src/messages/subjob.cc \ - src/messages/subjobstatus.cc \ - src/messages/idatakeys.cc \ - src/messages/finishmap.cc \ - src/messages/nodes_shuffling.cc + src/mapreduce/messages/taskstatus.cc \ + src/mapreduce/messages/job.cc \ + src/mapreduce/messages/jobstatus.cc \ + src/mapreduce/messages/idatakeys.cc \ + src/mapreduce/messages/finishmap.cc \ + src/mapreduce/messages/nodes_shuffling.cc # libs ----- -lib_LTLIBRARIES = libvdfs.la +lib_LTLIBRARIES = libvdfs.la libvmr.la libvdfs_la_SOURCES = $(messages_files) \ src/client/dfs.cc \ src/client/vdfs.cc \ src/common/hash.cc src/common/settings.cc \ src/common/logger.cc src/common/histogram.cc \ src/common/context.cc src/common/dl_loader.cc \ - src/common/context_singleton.cc \ - src/client/vmr.cc \ - src/mapreduce/output_collection.cc + src/common/context_singleton.cc libvdfs_la_LDFLAGS = $(BOOST_LDFLAGS) -version-info 0:0:0 libvdfs_la_LIBADD = -lboost_system -lboost_serialization -lboost_coroutine -lboost_thread -lboost_context -ldl +libvmr_la_LDFLAGS = $(BOOST_LDFLAGS) -version-info 0:0:0 +libvmr_la_LIBADD = -lboost_system -lboost_serialization -lboost_coroutine -lboost_thread -lboost_context -ldl +libvmr_la_SOURCES = $(mr_messages_files) \ + src/client/vmr.cc \ + src/mapreduce/output_collection.cc + # Binaries ---- if BOOST_STATIC AM_LDFLAGS = -static $(BOOST_LDFLAGS) -Wl,--start-group -Wl,-Bstatic,-lboost_system,-lboost_serialization,-lboost_coroutine,-lboost_thread,-lboost_context,-Bdynamic @@ -76,37 +82,44 @@ LDADD = libvdfs.la -lboost_system -lboost_serialization -lboost_co endif eclipse_node_SOURCES = src/targets/node_main_mr.cc \ - src/network/channel.cc \ src/network/asyncchannel.cc \ - src/network/p2p.cc \ src/network/server.cc \ - src/network/acceptor.cc \ - src/network/connector.cc \ + src/network/client_handler.cc \ + src/network/server_handler.cc \ + src/network/simple_router.cc \ + src/network/router_decorator.cc \ src/nodes/machine.cc \ - src/nodes/peerdfs.cc \ - src/nodes/local_io.cc \ - src/nodes/remotedfs.cc \ - src/nodes/router.cc \ - src/nodes/directory.cc \ src/nodes/node.cc \ - src/mapreduce/nodes/peermr.cc \ - src/mapreduce/nodes/remotemr.cc \ + src/blocknode/local_io.cc \ + src/blocknode/block_node.cc \ + src/blocknode/block_node_router.cc \ + src/fileleader/directory.cc \ + src/fileleader/file_leader.cc \ + src/fileleader/file_leader_router.cc \ + src/mapreduce/task_executor.cc \ + src/mapreduce/task_executor_router.cc \ src/mapreduce/fs/directorymr.cc \ src/mapreduce/fs/iwriter.cc \ src/mapreduce/fs/ireader.cc \ src/mapreduce/executor.cc \ src/mapreduce/py_executor.cc -eclipse_node_LDADD = $(LDADD) -lpython2.7 -__velox_mapreduce_SOURCES = src/targets/veloxmr_mapreduce.cc +eclipse_node_LDADD = libvmr.la $(LDADD) -lpython2.7 -dfs_SOURCES = src/client/client.cc +dfs_SOURCES = src/targets/client.cc \ + src/client/cli_driver.cc + +__velox_mapreduce_SOURCES = src/targets/veloxmr_mapreduce.cc +__velox_mapreduce_LDADD = libvmr.la $(LDADD) libvdfs.la pkginclude_HEADERS = src/client/vdfs.hh \ src/client/dfs.hh \ src/client/vmr.hh \ src/mapreduce/output_collection.hh +modelincludedir = $(pkgincludedir)/model +modelinclude_HEADERS = src/client/model/metadata.hh + if COPY_SAMPLES sysconf_DATA = doc/eclipse.json endif diff --git a/README.md b/README.md index 6d940b4..bef36e4 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ [![Build Status](https://travis-ci.org/DICL/VeloxMR.svg?branch=mapreduce)](https://travis-ci.org/DICL/VeloxMR) [![Slack room](https://img.shields.io/badge/slack-join-pink.svg)](https://dicl.slack.com/messages/general/) - +[![ZenHub](https://raw.githubusercontent.com/ZenHubIO/support/master/zenhub-badge.png)](https://zenhub.com) [![Analytics](https://ga-beacon.appspot.com/UA-87474237-1/veloxmr)](https://github.com/DICL/VeloxMR) -BRIEFING +--- +VeloxMR {#mainpage} ======== VeloxMR is a MapReduce framework implemented in C++ on the top of VeloxDF @@ -16,17 +17,19 @@ USAGE ===== VeloxMR default launcher is not included in this repository, if you want to use it you can find it [here][eclipsed]. -The reason to not to include the launcher inside the package is to let the user to chose any launcher, options are: +The reason not to include the launcher inside the package is to let the user to choose any launcher or service managers such as: - systemd/init.d - puppet/chef/salt -Once the system is running, you can interact with EclipseDFS with the following commands: +Once the system is up and running, you can interact with VeloxDFS with the following commands: ``` - $ dfs ls|put|get|rm|format|show + $ dfs put|get|cat|ls|rm|format|pget|update|append ``` COMPILING & INSTALLING -===================== +====================== + +_Detailed information can be found in the wiki of this repository_ Compiling requirements ---------------------- @@ -41,11 +44,11 @@ For single user installation for developers $ mkdir -p local_eclipse/{tmp,sandbox} # Create a sandbox directories $ cd local_eclipse # enter in the directory - $ git clone git@github.com:DICL/EclipseDFS.git # Clone the project from github - $ cd EclipseDFS + $ git clone git@github.com:DICL/VeloxDFS.git # Clone the project from github + $ cd VeloxDFS $ sh autogen.sh # Generate configure script $ cd ../tmp # Go to building folder - $ sh ../EclipseDFS/configure --prefix=`pwd`/../sandbox # Check requirements and generate the Makefile + $ sh ../VeloxDFS/configure --prefix=`pwd`/../sandbox # Check requirements and generate the Makefile # If you get a boost error go the FAQ section of the README @@ -57,11 +60,28 @@ Now edit in your **~/.bashrc** or **~/.profile**: export PATH="/home/*..PATH/To/eclipse/..*/sandbox/bin":$PATH export LIBRARY_PATH="/home/*..PATH/To/eclipse/..*/sandbox/lib" export C_INCLUDE_PATH="/home/*..PATH/To/eclipse/..*/sandbox/include" - export MANPATH=`manpath`:/home*..PATH/To/eclipse/..*/sandbox/share/man -For the configuration refer to the manpage: - $ man eclipsefs +Default settings for VELOXDFS +----------------------------- + + "log" : { + "type" : "LOG_LOCAL6" + "name" : "ECLIPSE" + "mask" : "DEBUG" + }, + + "cache" : { + "numbin" : 100, + "size" : 200000, + "concurrency" : 1 + }, + + "filesystem" : { + "block" : 137438953, + "buffer" : 512, + "replica" : 1 + } FAQ --- @@ -70,27 +90,18 @@ FAQ - _Answer_ : It probably means that you do not have boost library installed in the default location, in such case you should specify the boost library location. ``` - sh ../EclipseDFS/configure --prefix ~/sandbox --with-boost=/usr/local --with-boost-libdir=/usr/local/lib + sh ../VeloxDFS/configure --prefix ~/sandbox --with-boost=/usr/local --with-boost-libdir=/usr/local/lib ``` In this example we assume that the boost headers are in `/usr/local/include` while the library files are inside `/usr/local/lib`. -AUTHOR -====== - - - __AUTHOR:__ [Vicente Adolfo Bolea Sanchez] [vicente] - - __AUTHOR:__ [MooHyeon Nam] [mh] - - __AUTHOR:__ [WonBae Kim] [wb] - - __AUTHOR:__ [KiBeom Jin] [kb] - - __AUTHOR:__ [Prof. Nam Beomseok] [nb] - - __INSTITUTION:__ [DICL laboratory] [dicl] at [UNIST] - - -[vicente]: https://github.com/vicentebolea -[ym]: https://github.com/youngmoon01 -[dicl]: http://dicl.unist.ac.kr -[mh]: https://github.com/nammh -[wb]: https://github.com/zwigul -[kb]: https://github.com/kbjin -[eclipsed]: https://github.com/DICL/eclipsed -[nb]: http://dicl.unist.ac.kr +AUTHORS +======= + + - __AUTHOR:__ [Vicente Adolfo Bolea Sanchez](http://vicentebolea.me) + - __AUTHOR:__ [MooHyeon Nam](https://github.com/nammh) + - __AUTHOR:__ [WonBae Kim](https://github.com/zwigul) + - __AUTHOR:__ [KiBeom Jin](https://github.com/kbjin) + - __AUTHOR:__ [Deukyeon Hwang](https://github.com/deukyeon) + - __AUTHOR:__ [Prof. Nam Beomseok](http://dicl.unist.ac.kr) + - __INSTITUTION:__ [DICL laboratory](http://dicl.unist.ac.kr) at _UNIST_ diff --git a/configure.ac b/configure.ac index f00abe4..9f45bf5 100644 --- a/configure.ac +++ b/configure.ac @@ -7,11 +7,8 @@ AM_INIT_AUTOMAKE([foreign subdir-objects]) AM_SILENT_RULES([yes]) LT_INIT([dlopen]) -#AC_CONFIG_MACRO_DIR([./m4/]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([Makefile ]) -AC_CONFIG_FILES([tests/nodes_executor], [chmod +x tests/nodes_executor]) -AC_CONFIG_FILES([tests/eclipse_debug], [chmod +x tests/eclipse_debug]) AC_CONFIG_FILES([src/client/veloxmr], [chmod +x src/client/veloxmr]) AM_CONDITIONAL([DEFAULT_CXXFLAGS], [test -z "$CXXFLAGS"]) diff --git a/doc/Doxyfile b/doc/Doxyfile index 8524d96..4481a4d 100644 --- a/doc/Doxyfile +++ b/doc/Doxyfile @@ -33,7 +33,7 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "EclipseDFS" +PROJECT_NAME = "VeloxDFS" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version @@ -45,7 +45,7 @@ PROJECT_NUMBER = # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. -PROJECT_BRIEF = "Distributed FS" +PROJECT_BRIEF = "Distributed file system for Big Data processing" # With the PROJECT_LOGO tag one can specify an logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels @@ -178,7 +178,7 @@ SHORT_NAMES = NO # description.) # The default value is: NO. -JAVADOC_AUTOBRIEF = NO +JAVADOC_AUTOBRIEF = YES # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If @@ -217,7 +217,7 @@ SEPARATE_MEMBER_PAGES = NO # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. -TAB_SIZE = 4 +TAB_SIZE = 2 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: @@ -755,6 +755,8 @@ WARN_LOGFILE = # Note: If this tag is empty the current directory is searched. INPUT = ../src +INPUT += ../README.md +USE_MDFILE_AS_MAINPAGE = ../README.md # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/java_compile.sh b/java_compile.sh new file mode 100644 index 0000000..9382117 --- /dev/null +++ b/java_compile.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +JNI_CLASSES=( VeloxDFS VDFS ) + +JAVA_SOURCE_PATH=./src/java/velox +JAVA_BUILD_PATH=./build/java + +if [ ! -d "$JAVA_BUILD_PATH" ]; then + mkdir $JAVA_BUILD_PATH +fi + +LIB_PATH=./build/lib + +JNI_PATH=$JAVA_SOURCE_PATH/../jni +JNI_SOURCE_NAMES=( velox_DFS.cc velox_VDFS.cc ) +JNI_OUTPUT_NAMES=( libdfs_jni.so libvdfs_jni.so ) + +echo "COMPILING JAVA sources in \`$JAVA_BUILD_PATH\`.."; +MODEL_SOURCES=( Metadata.java ) +javac -d $JAVA_BUILD_PATH ${MODEL_SOURCES[@]/#/$JAVA_SOURCE_PATH\/model/}; + +SOURCES=( ${JNI_CLASSES[@]/%/.java} ) +javac -d $JAVA_BUILD_PATH ${SOURCES[@]/#/$JAVA_SOURCE_PATH/}; + +echo "CREATING jni header files in \`$JNI_PATH\`.."; +javah -jni -d $JNI_PATH ${JNI_CLASSES[@]/#/velox.}; + +echo "CREATING jni libraries in \`$LIB_PATH\`.."; +i=0 +for source_file in ${JNI_SOURCE_NAMES[*]}; do + echo ${JNI_OUTPUT_NAMES[$i]} + g++ --std=c++14 -shared -o $LIB_PATH/${JNI_OUTPUT_NAMES[$i]} -fPIC -L/usr/local/lib -I/opt/java/jdk1.7.0_51/include/linux -I/opt/java/jdk1.7.0_51/include $JNI_PATH/$source_file -lc -lboost_system -lboost_serialization -lboost_coroutine -lboost_thread -lboost_context -lvdfs; + let i=i+1 +done diff --git a/set-env.sh b/set-env.sh new file mode 100644 index 0000000..e9606a8 --- /dev/null +++ b/set-env.sh @@ -0,0 +1,20 @@ +# Path for VeloxDFS +VELOXHOME=~/VeloxDFS +cd $VELOXHOME + +# Your Install Path +INSTALL_PATH=`pwd`/build + +export PATH="$INSTALL_PATH/bin:$PATH" +export LIBRARY_PATH="$INSTALL_PATH/lib:/usr/local/lib:$LIBRARY_PATH" +export LD_LIBRARY_PATH="$INSTALL_PATH/lib:/usr/local/lib:$LD_LIBRARY_PATH" +export CPATH="$INSTALL_PATH/include:$CPATH" +export C_INCLUDE_PATH="$INSTALL_PATH/include:$C_INCLUDE_PATH" +export CPLUS_INCLUDE_PATH="$INSTALL_PATH/include:$CPLUS_INCLUDE_PATH" +export MANPATH=$MANPATH:"$INSTALL_PATH/share/man" +export LDFLAGS="-L/usr/local/lib" +export PYTHONPATH=$INSTALL_PATH/bin +export CLASSPATH=$INSTALL_PATH/java:$CLASSPATH + +# Come back +cd - diff --git a/src/blocknode/block_node.cc b/src/blocknode/block_node.cc new file mode 100644 index 0000000..27571eb --- /dev/null +++ b/src/blocknode/block_node.cc @@ -0,0 +1,97 @@ +// includes & usings {{{ +#include "block_node.hh" + +using namespace eclipse; +using namespace eclipse::messages; +using namespace eclipse::network; +using namespace std; + +// }}} + +namespace eclipse { +// Constructor & destructor {{{ +BlockNode::BlockNode (ClientHandler* net) : Node () { + network = net; + network_size = context.settings.get("network.nodes").size(); +} + +BlockNode::~BlockNode() { } +// }}} +// replicate_message {{{ +//! @brief Compute the right and left node of the current node +//! and send its replicas of the given block +void BlockNode::replicate_message(IOoperation* m) { + vector nodes; + for (int i=1; i < 3; i++) { + if(i%2 == 1) { + nodes.push_back ((id + (i+1)/2 + network_size) % network_size); + } else { + nodes.push_back ((id - i/2 + network_size) % network_size); + } + } + network->send_and_replicate(nodes, m); +} +// }}} +// block_insert_local {{{ +//! @brief This method insert the block locally and replicated it. +bool BlockNode::block_insert_local(Block& block, bool replicate) { + local_io.write(block.first, block.second); + + if (replicate) { + INFO("[DFS] Saving locally BLOCK: %s", block.first.c_str()); + IOoperation io; + io.operation = messages::IOoperation::OpType::BLOCK_INSERT_REPLICA; + io.block = move(block); + replicate_message(&io); + } else { + INFO("[DFS] Saving replica locally BLOCK: %s", block.first.c_str()); + } + + return true; +} +// }}} +// block_read_local {{{ +//! @brief This method read the block locally. +bool BlockNode::block_read_local(Block& block, uint64_t off, uint64_t len, bool ignore_params) { + INFO("BLOCK REQUEST: %s", block.first.c_str()); + block.second = local_io.read(block.first, off, len, ignore_params); + return true; +} +// }}} +// block_delete_local {{{ +//! @brief This method read the block locally. +bool BlockNode::block_delete_local(Block& block, bool replicate) { + local_io.remove(block.first); + + INFO("[DFS] Removed locally BLOCK: %s", block.first.c_str()); + + if (replicate) { + IOoperation io; + io.operation = messages::IOoperation::OpType::BLOCK_DELETE_REPLICA; + io.block = move(block); + replicate_message(&io); + } + + return true; +} +// }}} +// block_update_local {{{ +bool BlockNode::block_update_local(Block& block, uint32_t pos, uint32_t len, bool replicate) { + local_io.update(block.first, block.second, pos, len); + + if (replicate) { + INFO("Block %s updated real host", block.first.c_str()); + IOoperation io; + io.operation = messages::IOoperation::OpType::BLOCK_UPDATE_REPLICA; + io.pos = pos; + io.length = len; + io.block = move(block); + replicate_message(&io); + + } else { + INFO("Block replica %s updated real host", block.first.c_str()); + } + return true; +} +// }}} +} diff --git a/src/blocknode/block_node.hh b/src/blocknode/block_node.hh new file mode 100644 index 0000000..517531d --- /dev/null +++ b/src/blocknode/block_node.hh @@ -0,0 +1,36 @@ +#pragma once + +#include "../nodes/node.hh" +#include "local_io.hh" +#include "../messages/IOoperation.hh" +#include + +namespace eclipse { + +using vec_str = std::vector; + +class BlockNode: public Node { + public: + BlockNode (network::ClientHandler*); + ~BlockNode (); + + //! @brief Save to disk a block and replicate. + bool block_insert_local(Block&, bool replicate = true); + + //! @brief Delete Local block + bool block_delete_local(Block&, bool replicate = true); + + //! @brief Update the content of the block. + bool block_update_local(Block& block, uint32_t pos, uint32_t len, bool replicate = true); + + //! @brief Read block from the local node. + bool block_read_local(Block& block, uint64_t off = 0, uint64_t len = 0, bool ignore_params = true); + + protected: + void replicate_message(messages::IOoperation*); + + Local_io local_io; + int network_size; +}; + +} diff --git a/src/blocknode/block_node_router.cc b/src/blocknode/block_node_router.cc new file mode 100644 index 0000000..44ee4e2 --- /dev/null +++ b/src/blocknode/block_node_router.cc @@ -0,0 +1,80 @@ +#include "block_node_router.hh" +#include "../common/context_singleton.hh" +#include "../messages/boost_impl.hh" +#include +#include + +using namespace std; +using namespace eclipse; +using namespace eclipse::messages; +namespace ph = std::placeholders; + +// Constructor {{{ +BlockNodeRouter::BlockNodeRouter (BlockNode* b_node, Router* router) : RouterDecorator(router) { + block_node = b_node; + + using namespace std::placeholders; + using std::placeholders::_1; + using std::placeholders::_2; + auto& rt = routing_table; + rt.insert({"IOoperation", bind(&BlockNodeRouter::io_operation, this, _1, _2)}); +} +// }}} +// io_operation {{{ +void BlockNodeRouter::io_operation (messages::Message* m_, Channel* tcp_connection) { + auto m = dynamic_cast (m_); + + if (m->operation == messages::IOoperation::OpType::BLOCK_INSERT) { + auto ret = block_node->block_insert_local(m->block); + Reply reply; + + if (ret) { + reply.message = "TRUE"; + + } else { + reply.message = "FALSE"; + } + tcp_connection->do_write(&reply); + + } else if (m->operation == messages::IOoperation::OpType::BLOCK_INSERT_REPLICA) { + block_node->block_insert_local(m->block, false); + + } else if (m->operation == messages::IOoperation::OpType::BLOCK_DELETE) { + auto ret = block_node->block_delete_local(m->block); + Reply reply; + + if (ret) { + reply.message = "TRUE"; + + } else { + reply.message = "FALSE"; + } + tcp_connection->do_write(&reply); + + } else if (m->operation == messages::IOoperation::OpType::BLOCK_DELETE_REPLICA) { + block_node->block_delete_local(m->block, false); + + } else if (m->operation == messages::IOoperation::OpType::BLOCK_REQUEST) { + block_node->block_read_local(m->block, m->pos, m->length, (m->pos == 0 && m->length == 0)); + IOoperation io_ops; + io_ops.operation = messages::IOoperation::OpType::BLOCK_TRANSFER; + io_ops.block = move(m->block); + tcp_connection->do_write(&io_ops); + + } else if (m->operation == messages::IOoperation::OpType::BLOCK_UPDATE) { + auto ret = block_node->block_update_local(m->block, m->pos, m->length); + Reply reply; + + if (ret) { + reply.message = "TRUE"; + + } else { + reply.message = "FALSE"; + } + tcp_connection->do_write(&reply); + + } else if (m->operation == messages::IOoperation::OpType::BLOCK_UPDATE_REPLICA) { + block_node->block_update_local(m->block, m->pos, m->length, false); + } +} +// }}} diff --git a/src/blocknode/block_node_router.hh b/src/blocknode/block_node_router.hh new file mode 100644 index 0000000..727cc63 --- /dev/null +++ b/src/blocknode/block_node_router.hh @@ -0,0 +1,18 @@ +#pragma once +#include "../network/router_decorator.hh" +#include "block_node.hh" + +namespace eclipse { + +class BlockNodeRouter: public RouterDecorator { + public: + BlockNodeRouter(BlockNode*, Router*); + ~BlockNodeRouter() = default; + + void io_operation(messages::Message*, Channel*); + + private: + BlockNode* block_node = nullptr; +}; + +} /* eclipse */ diff --git a/src/nodes/local_io.cc b/src/blocknode/local_io.cc similarity index 69% rename from src/nodes/local_io.cc rename to src/blocknode/local_io.cc index fe804c4..6b91feb 100644 --- a/src/nodes/local_io.cc +++ b/src/blocknode/local_io.cc @@ -4,6 +4,7 @@ #include #include #include +#include using namespace eclipse; using namespace std; @@ -14,10 +15,14 @@ Local_io::Local_io() { } // }}} // write {{{ +//! @brief Unbuffered write to disk void Local_io::write (std::string name, std::string& v) { string file_path = disk_path + string("/") + name; - ofstream file (file_path); - file << v; + ofstream file; + + file.rdbuf()->pubsetbuf(0, 0); //! No buffer + file.open(file_path, ios::binary); //! Binary write + file.write(v.c_str(), v.length()); file.close(); } // }}} @@ -32,18 +37,28 @@ void Local_io::update (std::string name, std::string v, uint32_t pos, uint32_t l // }}} // read {{{ std::string Local_io::read (string name) { - string file_path = disk_path + string("/") + name; + return read(name, 0, 0, true); +} + +std::string Local_io::read (string name, uint32_t off, uint32_t len) { + return read(name, off, len, false); +} + +std::string Local_io::read (string name, uint32_t off, uint32_t len, bool is_whole = false) { + ifstream in (disk_path + string("/") + name, ios::in | ios::binary | ios::ate); + uint32_t file_size = (uint32_t)in.tellg(); + in.seekg(off, ios::beg); - while(access(file_path.c_str(), F_OK) == -1); - ifstream in (file_path, ios::in | ios::binary | ios::ate); - ifstream::pos_type fileSize = in.tellg(); - in.seekg(0, ios::beg); + if(is_whole) + len = file_size; - vector bytes(fileSize); - in.read(&bytes[0], fileSize); + uint32_t readable_len = std::min(len, (file_size - off)); + + vector bytes(readable_len); + in.read(&bytes[0], readable_len); in.close(); - return string(&bytes[0], fileSize); + return string(&bytes[0], readable_len); } // }}} // read_metadata {{{ @@ -81,6 +96,11 @@ bool Local_io::format () { while ( (next_file = readdir(theFolder)) != NULL ) { sprintf(filepath, "%s/%s", fs_path.c_str(), next_file->d_name); + if (strncmp(basename(filepath), "..", 256) == 0 or + strncmp(basename(filepath), "...", 256) == 0 or + strncmp(basename(filepath), ".", 256) == 0) + continue; + DEBUG("FORMAT: Removing %s", filepath); if (0 != ::remove(filepath)) { INFO("FORMAT: Can't remove %s.", filepath); diff --git a/src/nodes/local_io.hh b/src/blocknode/local_io.hh similarity index 82% rename from src/nodes/local_io.hh rename to src/blocknode/local_io.hh index ef89940..f17691f 100644 --- a/src/nodes/local_io.hh +++ b/src/blocknode/local_io.hh @@ -13,6 +13,8 @@ class Local_io { void write(std::string, std::string&); void update(std::string, std::string, uint32_t, uint32_t); std::string read(std::string); + std::string read(std::string, uint32_t, uint32_t); + std::string read(std::string, uint32_t, uint32_t, bool); std::string read_metadata(); std::string pread(std::string, uint32_t, uint32_t); void remove(std::string); diff --git a/src/cache/cache.hh b/src/cache/cache.hh deleted file mode 100644 index a2fff59..0000000 --- a/src/cache/cache.hh +++ /dev/null @@ -1,81 +0,0 @@ -/* - * File: lrucache.hpp - * Author: Alexander Ponomarev - * - * Created on June 20, 2013, 5:09 PM - */ - -#ifndef _LRUCACHE_HPP_INCLUDED_ -#define _LRUCACHE_HPP_INCLUDED_ - -#include -#include -#include -#include -#include - -namespace eclipse { - -template -class lru_cache { -public: - typedef typename std::pair key_value_pair_t; - typedef typename std::list::iterator list_iterator_t; - - lru_cache(size_t max_size) : - _max_size(max_size) { - } - - void put(const key_t& key, const value_t& value) { - auto it = _cache_items_map.find(key); - if (it != _cache_items_map.end()) { - _cache_items_list.erase(it->second); - _cache_items_map.erase(it); - } - - _cache_items_list.push_front(key_value_pair_t(key, value)); - _cache_items_map[key] = _cache_items_list.begin(); - - if (_cache_items_map.size() > _max_size) { - auto last = _cache_items_list.end(); - last--; - _cache_items_map.erase(last->first); - _cache_items_list.pop_back(); - } - } - - const value_t& get(const key_t& key) { - auto it = _cache_items_map.find(key); - if (it == _cache_items_map.end()) { - throw std::range_error("There is no such key in cache"); - } else { - _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second); - return it->second->second; - } - } - - bool exists(const key_t& key) const { - return _cache_items_map.find(key) != _cache_items_map.end(); - } - - size_t size() const { - return _cache_items_map.size(); - } - - std::vector dump_keys () { - std::vector keys; - for (auto& p : _cache_items_map) { - keys.push_back(p.first); - } - return keys; - } - -private: - std::list _cache_items_list; - std::unordered_map _cache_items_map; - size_t _max_size; -}; - -} // namespace lru - -#endif /* _LRUCACHE_HPP_INCLUDED_ */ diff --git a/src/client/cli_driver.cc b/src/client/cli_driver.cc new file mode 100644 index 0000000..43844dd --- /dev/null +++ b/src/client/cli_driver.cc @@ -0,0 +1,210 @@ +#include "cli_driver.hh" +#include "../common/context_singleton.hh" +#include "../common/histogram.hh" +#include +#include +#include +#include + +using vec_str = std::vector; +using namespace std; +using namespace velox; + +const string help = "veloxdfs [options] \n\n" +"Commands\n" +"========\n" +"\tput\t\t\tUpload a file\n" +"\tget\t\t\tDownload a file\n" +"\trm\t\t\tRemove a file\n" +"\tcat\t\t\tDisplay a file's content\n" +"\tshow\t\t\tShow block location of a file\n" +"\tls\t\t\tList all the files\n" +"\tformat\t\t\tFormat storage and metadata\n" +"\n" +"Options\n" +"=======\n" +"\t-h, --help\t\tPrint help\n" +"\t-v, --verbose\t\tEnable debugging messages\n" +; + + +cli_driver::cli_driver() { } + +// parse_args {{{ +bool cli_driver::parse_args (int argc, char** argv) { + if (argc < 2) { + cerr << help << endl; + return true; + } + + string command = argv[1]; + + if (command == "-h" or command == "--help") { + cerr << help << endl; + return true; + } + + if (command == "ls" or command == "format") { + if (command == "ls") { + + bool is_human_readable = false; + if (argc > 2 && argv[2] == string("-h")) + is_human_readable = true; + + list(is_human_readable); + } else format(); + + return true; + } + + vec_str files (argv + 2, argv + argc); + + for (auto& file : files) { + if (command == "put") { + file_upload(file); + + } else if (command == "get") { + file_download(file); + + } else if (command == "cat") { + file_cat(file); + + } else if (command == "show") { + file_show(file); + + } else if (command == "remove") { + file_remove(file); + + } else { + cerr << "[ERR] Unknown operation" << endl; + cout << help << endl; + } + } + return true; +} +// }}} +// file_upload {{{ +void cli_driver::file_upload (std::string file) { + dfs.upload(file, false); +} +// }}} +// file_download {{{ +void cli_driver::file_download (std::string file) { + dfs.download(file); +} +// }}} +// file_cat {{{ +void cli_driver::file_cat (std::string file) { + // Read file, display it + cout << dfs.read_all(file) << endl; +} +// }}} +// file_remove {{{ +void cli_driver::file_remove (std::string file) { + dfs.remove(file); +} +// }}} +// file_show {{{ +void cli_driver::file_show (std::string file) { + vec_str nodes = GET_VEC_STR("network.nodes"); + Histogram boundaries(nodes.size(), 100); + boundaries.initialize(); + + model::metadata md = dfs.get_metadata(file); + cout << file << endl; + + int block_seq = 0; + for (auto block_name : md.blocks) { + uint32_t hash_key = md.hash_keys[block_seq++]; + int which_node = boundaries.get_index(hash_key); + int tmp_node; + for (int i=0; i<(int)md.replica; i++) { + if (i%2 == 1) { + tmp_node = (which_node + (i+1)/2 + nodes.size()) % nodes.size(); + } else { + tmp_node = (which_node - i/2 + nodes.size()) % nodes.size(); + } + string ip = nodes[tmp_node]; + cout << "\t- " << setw(15) << block_name << " : " << setw(15) << ip << endl; + } + } + +} +// }}} +// list {{{ +void cli_driver::list (bool human_readable) { + const uint32_t KB = 1024; + const uint32_t MB = 1024 * 1024; + const uint64_t GB = (uint64_t) 1024 * 1024 * 1024; + const uint64_t TB = (uint64_t) 1024 * 1024 * 1024 * 1024; + const uint64_t PB = (uint64_t) 1024 * 1024 * 1024 * 1024 * 1024; + const uint32_t K = 1000; + const uint32_t M = 1000 * 1000; + const uint64_t G = (uint64_t) 1000 * 1000 * 1000; + const uint64_t T = (uint64_t) 1000 * 1000 * 1000 * 1000; + const uint64_t P = (uint64_t) 1000 * 1000 * 1000 * 1000 * 1000; + vec_str nodes = GET_VEC_STR("network.nodes"); + vector metadatas = dfs.get_metadata_all(); + + std::sort(metadatas.begin(), metadatas.end(), [] (const model::metadata& a, const model::metadata& b) { + return (a.name < b.name); + }); + cout + << setw(25) << "FileName" + << setw(14) << "Hash Key" + << setw(14) << "Size" + << setw(8) << "Blocks" + << setw(14) << "Host" + << setw(5) << "Repl" + << endl << string(80,'-') << endl; + + for (auto& md: metadatas) { + cout + << setw(25) << md.name + << setw(14) << md.hash_key; + if (human_readable) { + float hsize = 0; + int tabsize = 12; + string unit; + cout.precision(2); + if (md.size < K) { + hsize = (float)md.size; + unit = "B"; + tabsize++; + cout.precision(0); + } else if (md.size < M) { + hsize = (float)md.size / KB; + unit = "KB"; + } else if (md.size < G) { + hsize = (float)md.size / MB; + unit = "MB"; + } else if (md.size < T) { + hsize = (float)md.size / GB; + unit = "GB"; + } else if (md.size < P) { + hsize = (float)md.size / TB; + unit = "TB"; + } else { + hsize = (float)md.size / PB; + unit = "PB"; + } + cout << fixed; + cout << setw(tabsize) << hsize << unit; + } else { + cout << setw(14) << md.size; + } + + cout + << setw(8) << md.num_block + << setw(14) << nodes[md.hash_key % nodes.size()] + << setw(5) << md.replica + << endl; + } +} +// }}} +// format {{{ +void cli_driver::format () { + dfs.format(); +} +// }}} + diff --git a/src/client/cli_driver.hh b/src/client/cli_driver.hh new file mode 100644 index 0000000..78f0f24 --- /dev/null +++ b/src/client/cli_driver.hh @@ -0,0 +1,22 @@ +#pragma once +#include "dfs.hh" + +class cli_driver { + public: + cli_driver(); + ~cli_driver() = default; + + bool parse_args(int argc, char** argv); + + private: + void file_upload(std::string); + void file_download(std::string); + void file_cat(std::string); + void file_remove(std::string); + void file_show(std::string); + + void list(bool human_readable); + void format(); + + velox::DFS dfs; +}; diff --git a/src/client/dfs.cc b/src/client/dfs.cc index e8a9277..c49bb4b 100644 --- a/src/client/dfs.cc +++ b/src/client/dfs.cc @@ -4,32 +4,37 @@ #include "../messages/fileinfo.hh" #include "../messages/factory.hh" #include "../messages/fileinfo.hh" -#include "../messages/blockinfo.hh" -#include "../messages/blockupdate.hh" #include "../messages/fileexist.hh" +#include "../messages/filedescription.hh" #include "../messages/filerequest.hh" #include "../messages/filelist.hh" #include "../messages/blockdel.hh" -#include "../messages/filedescription.hh" #include "../messages/reply.hh" #include "../messages/blockrequest.hh" #include "../common/context.hh" #include "../common/hash.hh" #include "../common/histogram.hh" +#include "../common/block.hh" +#include "../common/blockmetadata.hh" #include "../messages/factory.hh" +#include "../messages/IOoperation.hh" +#include #include #include #include #include #include #include -#include +#include +#include +#include #include "../common/context_singleton.hh" using namespace std; using namespace eclipse; using namespace boost::archive; using namespace eclipse::messages; +using boost::asio::ip::tcp; // }}} namespace velox { @@ -40,437 +45,331 @@ enum class FILETYPE { Idata = 0x2 }; -// Constructors and misc {{{ -DFS::DFS() { } - -void DFS::load_settings() { - BLOCK_SIZE = context.settings.get("filesystem.block"); - NUM_NODES = context.settings.get>("network.nodes").size(); - replica = context.settings.get("filesystem.replica"); - port = context.settings.get ("network.ports.client"); - nodes = context.settings.get>("network.nodes"); -} +// Static functions {{{ +// +static unique_ptr connect(uint32_t hash_value) { + auto nodes = GET_VEC_STR("network.nodes"); + auto port = GET_INT("network.ports.client"); -unique_ptr DFS::connect(uint32_t hash_value) { auto socket = make_unique(context.io); string host = nodes[hash_value % nodes.size()]; tcp::resolver resolver(context.io); tcp::resolver::query query(host, to_string(port)); tcp::resolver::iterator it(resolver.resolve(query)); auto ep = make_unique(*it); - boost::system::error_code ec; - socket->connect(*ep, ec); - if (ec) { - cerr << "Connection was not possible : " << ec.message() << endl; - } + socket->connect(*ep); return socket; } -bool DFS::fexists(std::string filename) { +unique_ptr get_file_description +(std::function(uint32_t)> connect, std::string& fname, bool only_metadata) { + + uint32_t file_hash_key = h(fname); + auto socket = connect(file_hash_key); + + FileExist fe; + fe.name = fname; + send_message(socket.get(), &fe); + auto rep = read_reply (socket.get()); + + if (rep->message != "TRUE") { + cerr << "[ERR] " << fname << " doesn't exist." << endl; + return nullptr; + } + + FileRequest fr; + fr.name = fname; + + send_message(socket.get(), &fr); + unique_ptr fd = (read_reply (socket.get())); + socket->close(); + + return fd; +} + +unique_ptr get_file_description + (std::function(uint32_t)> connect, std::string& fname) { + + return get_file_description(connect, fname, false); +} + +static bool file_exists_local(std::string filename) { ifstream ifile(filename); return ifile; } // }}} -// put {{{ -int DFS::put(vec_str input) { - if (input.size() < 3) { - cout << "[INFO] dfs put file_1 file_2 ..." << endl; +// Constructors and misc {{{ +DFS::DFS() { + BLOCK_SIZE = context.settings.get("filesystem.block"); + NUM_NODES = context.settings.get>("network.nodes").size(); + replica = context.settings.get("filesystem.replica"); + nodes = context.settings.get>("network.nodes"); +} + +// }}} +// upload {{{ +int DFS::upload(std::string file_name, bool is_binary) { + FILETYPE type = FILETYPE::Normal; + int replica = GET_INT("filesystem.replica"); + + if (is_binary) { + replica = NUM_NODES; + type = FILETYPE::App; + } + + uint32_t file_hash_key = h(file_name); + if (not file_exists_local(file_name)) { + cerr << "[ERR] " << file_name << " cannot be found in your machine." << endl; return EXIT_FAILURE; + } - } else { - vector chunk(BLOCK_SIZE); - Histogram boundaries(NUM_NODES, 0); - boundaries.initialize(); + //! Does the file exists + if (this->exists(file_name)) { + cerr << "[ERR] " << file_name << " already exists in VeloxDFS." << endl; + return EXIT_FAILURE; + } - string op = input[2]; - FILETYPE type = FILETYPE::Normal; - uint32_t i = 2; - if (op.compare("-b") == 0) { - replica = NUM_NODES; - type = FILETYPE::App; - i++; - } - for (; i < input.size(); i++) { - string file_name = input[i]; - if (!this->fexists(file_name)) { - cerr << "[ERR] " << file_name << " does not exist." << endl; - continue; - } - FileExist fe; - fe.name = file_name; - uint32_t file_hash_key = h(file_name); - auto socket = connect(file_hash_key); - send_message(socket.get(), &fe); - auto rep = read_reply (socket.get()); - - if (rep->message == "TRUE") { - cerr << "[ERR] " << file_name << " already exists." << endl; - continue; - } + //! Insert the file + int fd = open(file_name.c_str(), 0); - int which_server = file_hash_key % NUM_NODES; - int fd = open(file_name.c_str(), 0); - - __gnu_cxx::stdio_filebuf filebuf(fd, std::ios::in | std::ios::binary); // 1 - istream myfile(&filebuf); - uint64_t start = 0; - uint64_t end = start + BLOCK_SIZE - 1; - uint32_t block_size = 0; - unsigned int block_seq = 0; - - FileInfo file_info; - file_info.name = file_name; - file_info.hash_key = file_hash_key; - file_info.type = static_cast(type); - file_info.replica = replica; - myfile.seekg(0, myfile.end); - file_info.size = myfile.tellg(); - BlockInfo block_info; - - while (1) { - if (end < file_info.size) { - myfile.seekg(start+BLOCK_SIZE-1, myfile.beg); - while (1) { - if (myfile.peek() =='\n') { - break; - } else { - myfile.seekg(-1, myfile.cur); - end--; - } - } - } else { - end = file_info.size; - } - block_size = (uint32_t) end - start; - bzero(chunk.data(), BLOCK_SIZE); - myfile.seekg(start, myfile.beg); - block_info.content.reserve(block_size); - myfile.read(chunk.data(), block_size); - block_info.content = chunk.data(); - posix_fadvise(fd, end, block_size, POSIX_FADV_WILLNEED); - - - block_info.name = file_name + "_" + to_string(block_seq); - block_info.file_name = file_name; - block_info.hash_key = boundaries.random_within_boundaries(which_server); - block_info.seq = block_seq++; - block_info.size = block_size; - block_info.type = static_cast(FILETYPE::Normal); - block_info.replica = replica; - block_info.node = nodes[which_server]; - block_info.l_node = nodes[(which_server-1+NUM_NODES)%NUM_NODES]; - block_info.r_node = nodes[(which_server+1+NUM_NODES)%NUM_NODES]; - block_info.is_committed = 1; - - send_message(socket.get(), &block_info); - auto reply = read_reply (socket.get()); + __gnu_cxx::stdio_filebuf filebuf(fd, std::ios::in | std::ios::binary); + istream myfile(&filebuf); + FileInfo file_info; + file_info.name = file_name; + file_info.hash_key = file_hash_key; + file_info.type = static_cast(type); + file_info.replica = replica; + myfile.seekg(0, ios_base::end); + file_info.size = myfile.tellg(); - if (reply->message != "OK") { - cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; - return EXIT_FAILURE; - } - if (end >= file_info.size) { - break; - } - start = end; - end = start + BLOCK_SIZE - 1; - which_server = (which_server + 1) % NUM_NODES; + //! Send file to be submitted; + auto socket = connect(file_hash_key); + send_message(socket.get(), &file_info); + + //! Get information of where to send the file + auto description = read_reply(socket.get()); + socket->close(); + + uint64_t start = 0; + uint64_t end = start + BLOCK_SIZE - 1; + uint32_t block_size = 0; + unsigned int block_seq = 0; + + //! Insert the blocks + uint32_t i = 0; + + vector blocks_metadata; + vector> slave_sockets; + vector chunk(BLOCK_SIZE); + Histogram boundaries(NUM_NODES, 100); + boundaries.initialize(); + + while (true) { + if (end < file_info.size) { + myfile.seekg(start+BLOCK_SIZE-1, ios_base::beg); + while (myfile.peek() != '\n') { + myfile.seekg(-1, ios_base::cur); + end--; } + } else { + end = file_info.size; + } + BlockMetadata metadata; + Block block; + + block_size = (uint32_t) end - start; + bzero(chunk.data(), BLOCK_SIZE); + myfile.seekg(start, myfile.beg); + block.second.reserve(block_size); + myfile.read(chunk.data(), block_size); + block.second = move(chunk.data()); + posix_fadvise(fd, end, block_size, POSIX_FADV_WILLNEED); + + //! Load block metadata info + int which_server = ((file_hash_key % NUM_NODES) + i) % NUM_NODES; + block.first = metadata.name = file_name + string("_") + to_string(i); + metadata.file_name = file_name; + metadata.hash_key = boundaries.random_within_boundaries(which_server); + metadata.seq = block_seq++; + metadata.size = block_size; + metadata.type = static_cast(FILETYPE::Normal); + metadata.replica = replica; + metadata.node = nodes[which_server]; + metadata.l_node = nodes[(which_server-1+NUM_NODES)%NUM_NODES]; + metadata.r_node = nodes[(which_server+1+NUM_NODES)%NUM_NODES]; + metadata.is_committed = 1; + + blocks_metadata.push_back(metadata); + + IOoperation io_ops; + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_INSERT; + io_ops.block = move(block); + + auto socket = connect(boundaries.get_index(metadata.hash_key)); + send_message(socket.get(), &io_ops); + + auto future = async(launch::async, [](unique_ptr socket) -> bool { + auto reply = read_reply (socket.get()); + socket->close(); - file_info.num_block = block_seq; - send_message(socket.get(), &file_info); - auto reply = read_reply (socket.get()); - close(fd); - socket->close(); + if (reply->message != "TRUE") { + cerr << "[ERR] Failed to upload block . Details: " << reply->details << endl; + return false; + } - if (reply->message != "OK") { - cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; - return EXIT_FAILURE; - } - cout << "[INFO] " << file_name << " is uploaded." << endl; + return true; + }, move(socket)); + + slave_sockets.push_back(move(future)); + + if (end >= file_info.size) { + break; } + start = end; + end = start + BLOCK_SIZE - 1; + i++; + } + + for (auto& future: slave_sockets) + future.get(); + + file_info.num_block = block_seq; + file_info.blocks_metadata = blocks_metadata; + file_info.uploading = 0; + + socket = connect(file_hash_key); + send_message(socket.get(), &file_info); + auto reply = read_reply (socket.get()); + + if (reply->message != "TRUE") { + cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; + return EXIT_FAILURE; } + + socket->close(); + close(fd); + + cout << "[INFO] " << file_name << " is uploaded." << endl; return EXIT_SUCCESS; } // }}} -// load {{{ -std::string DFS::load(std::string file) { - Histogram boundaries(NUM_NODES, 0); +// download {{{ +int DFS::download(std::string file_name) { + Histogram boundaries(NUM_NODES, 100); boundaries.initialize(); - string file_name = file; + //! Does the file exists + if (not this->exists(file_name)) { + cerr << "[ERR] " << file_name << " already doesn't exists in VeloxDFS." << endl; + return EXIT_FAILURE; + } + uint32_t file_hash_key = h(file_name); auto socket = connect (file_hash_key); - FileExist fe; - fe.name = file_name; - send_message(socket.get(), &fe); - auto rep = read_reply (socket.get()); - - if (rep->message != "TRUE") { - cerr << "[ERR] " << file_name << " doesn't exist." << endl; - return ""; - } FileRequest fr; fr.name = file_name; send_message(socket.get(), &fr); auto fd = read_reply (socket.get()); - - std::string output; socket->close(); - int block_seq = 0; - for (auto block_name : fd->blocks) { - uint32_t hash_key = fd->hash_keys[block_seq++]; - auto tmp_socket = connect(boundaries.get_index(hash_key)); - BlockRequest br; - br.name = block_name; - br.hash_key = hash_key; - send_message(tmp_socket.get(), &br); - auto msg = read_reply(tmp_socket.get()); - output += msg->content; - tmp_socket->close(); - } - socket->close(); - return output; -} -// }}} -// get {{{ -int DFS::get(vec_str argv) { - if (argv.size() < 3) { - cout << "[INFO] dfs get file_1 file_2 ..." << endl; - return EXIT_FAILURE; - } else { - Histogram boundaries(NUM_NODES, 0); - boundaries.initialize(); - for (uint32_t i = 2; i < argv.size(); i++) { - string file_name = argv[i]; - uint32_t file_hash_key = h(file_name); - auto socket = connect (file_hash_key); - FileExist fe; - fe.name = file_name; - send_message(socket.get(), &fe); - auto rep = read_reply (socket.get()); - - if (rep->message != "TRUE") { - cerr << "[ERR] " << file_name << " doesn't exist." << endl; - continue; - } - FileRequest fr; - fr.name = file_name; + ofstream file; + file.rdbuf()->pubsetbuf(0, 0); //! No buffer + file.open(file_name, ios::binary); - send_message(socket.get(), &fr); - auto fd = read_reply (socket.get()); + for (uint32_t i = 0; i < fd->blocks.size(); i++) { + IOoperation io_ops; + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_REQUEST; + io_ops.block.first = fd->blocks[i]; - ofstream f(file_name); - socket->close(); - int block_seq = 0; - for (auto block_name : fd->blocks) { - uint32_t hash_key = fd->hash_keys[block_seq++]; - auto tmp_socket = connect(boundaries.get_index(hash_key)); - BlockRequest br; - br.name = block_name; - br.hash_key = hash_key; - send_message(tmp_socket.get(), &br); - auto msg = read_reply(tmp_socket.get()); - f << msg->content; - tmp_socket->close(); - } + auto slave_socket = connect(boundaries.get_index(fd->hash_keys[i])); + send_message(slave_socket.get(), &io_ops); + auto msg = read_reply(slave_socket.get()); - cout << "[INFO] " << file_name << " is downloaded." << endl; - f.close(); - socket->close(); - } + file.write(msg->block.second.c_str(), msg->block.second.length()); + slave_socket->close(); } - return EXIT_SUCCESS; -} -// }}} -// cat {{{ -int DFS::cat(vec_str argv) { - if (argv.size() < 3) { - cout << "[INFO] dfs cat file_1 file_2 ..." << endl; - return EXIT_FAILURE; - } else { - Histogram boundaries(NUM_NODES, 0); - boundaries.initialize(); - for (uint32_t i = 2; i < argv.size(); i++) { - string file_name = argv[i]; - uint32_t file_hash_key = h(file_name); - auto socket = connect (file_hash_key % NUM_NODES); - FileExist fe; - fe.name = file_name; - send_message(socket.get(), &fe); - auto rep = read_reply (socket.get()); - - if (rep->message != "TRUE") { - cerr << "[ERR] " << file_name << " doesn't exist." << endl; - continue; - } - FileRequest fr; - fr.name = file_name; - - send_message (socket.get(), &fr); - auto fd = read_reply (socket.get()); + file.close(); - socket->close(); - int block_seq = 0; - for (auto block_name : fd->blocks) { - uint32_t hash_key = fd->hash_keys[block_seq++]; - auto tmp_socket = connect(boundaries.get_index(hash_key)); - BlockRequest br; - br.name = block_name; - br.hash_key = hash_key; - send_message(tmp_socket.get(), &br); - auto msg = read_reply(tmp_socket.get()); - cout << msg->content; - tmp_socket->close(); - } - socket->close(); - } - } return EXIT_SUCCESS; } // }}} -// ls {{{ -int DFS::ls(vec_str argv) { - const uint32_t KB = 1024; - const uint32_t MB = 1024 * 1024; - const uint64_t GB = (uint64_t) 1024 * 1024 * 1024; - const uint64_t TB = (uint64_t) 1024 * 1024 * 1024 * 1024; - const uint64_t PB = (uint64_t) 1024 * 1024 * 1024 * 1024 * 1024; - const uint32_t K = 1000; - const uint32_t M = 1000 * 1000; - const uint64_t G = (uint64_t) 1000 * 1000 * 1000; - const uint64_t T = (uint64_t) 1000 * 1000 * 1000 * 1000; - const uint64_t P = (uint64_t) 1000 * 1000 * 1000 * 1000 * 1000; - vector total; - string op = ""; - if (argv.size() >= 3) { - op = argv[2]; - } - for (unsigned int net_id=0; net_id(socket.get()); - std::copy(file_list_reply->data.begin(), file_list_reply->data.end(), back_inserter(total)); - } +// read_all {{{ +std::string DFS::read_all(std::string file) { + Histogram boundaries(NUM_NODES, 100); + boundaries.initialize(); - std::sort(total.begin(), total.end(), [] (const FileInfo& a, const FileInfo& b) { - return (a.name < b.name); - }); - cout - << setw(25) << "FileName" - << setw(14) << "Hash Key" - << setw(14) << "Size" - << setw(8) << "Blocks" - << setw(14) << "Host" - << setw(5) << "Repl" - << endl << string(80,'-') << endl; - - for (auto& fl: total) { - cout - << setw(25) << fl.name - << setw(14) << fl.hash_key; - if (op.compare("-h") == 0) { - float hsize = 0; - int tabsize = 12; - string unit; - cout.precision(2); - if (fl.size < K) { - hsize = (float)fl.size; - unit = "B"; - tabsize++; - cout.precision(0); - } else if (fl.size < M) { - hsize = (float)fl.size / KB; - unit = "KB"; - } else if (fl.size < G) { - hsize = (float)fl.size / MB; - unit = "MB"; - } else if (fl.size < T) { - hsize = (float)fl.size / GB; - unit = "GB"; - } else if (fl.size < P) { - hsize = (float)fl.size / TB; - unit = "TB"; - } else { - hsize = (float)fl.size / PB; - unit = "PB"; - } - cout << fixed; - cout << setw(tabsize) << hsize << unit; - } else { - cout << setw(14) << fl.size; - } + auto fd = get_file_description( + std::bind(&connect, std::placeholders::_1), file + ); + if(fd == nullptr) return ""; + + std::string output; + int index = 0; - cout - << setw(8) << fl.num_block - << setw(14) << nodes[fl.hash_key % NUM_NODES] - << setw(5) << fl.replica - << endl; + for (auto block_name : fd->blocks) { + IOoperation io_ops; + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_REQUEST; + io_ops.block.first = fd->blocks[index]; + + auto slave_socket = connect(boundaries.get_index(fd->hash_keys[index])); + send_message(slave_socket.get(), &io_ops); + auto msg = read_reply(slave_socket.get()); + output += msg->block.second; + slave_socket->close(); + index++; } - return EXIT_SUCCESS; + + return output; } -// }}} -// rm {{{ -int DFS::rm(vec_str argv) { - if (argv.size() < 3) { - cout << "[INFO] dfs rm file_1 file_2 ..." << endl; - return EXIT_FAILURE; - } else { - Histogram boundaries(NUM_NODES, 0); - boundaries.initialize(); +// }}} +// remove {{{ +int DFS::remove(std::string file_name) { + Histogram boundaries(NUM_NODES, 0); + boundaries.initialize(); - for (uint32_t i = 2; i < argv.size(); i++) { - string file_name = argv[i]; - uint32_t file_hash_key = h(file_name); - auto socket = connect(file_hash_key); - FileRequest fr; - fr.name = file_name; - - send_message(socket.get(), &fr); - auto fd = read_reply(socket.get()); - - unsigned int block_seq = 0; - for (auto block_name : fd->blocks) { - uint32_t block_hash_key = fd->hash_keys[block_seq]; - BlockDel bd; - bd.name = block_name; - bd.file_name = file_name; - bd.hash_key = block_hash_key; - bd.seq = block_seq++; - bd.replica = fd->replica; - send_message(socket.get(), &bd); - auto msg = read_reply(socket.get()); - if (msg->message != "OK") { - cerr << "[ERR] " << block_name << "doesn't exist." << endl; - return EXIT_FAILURE; - } - } + uint32_t file_hash_key = h(file_name); + auto socket = connect(file_hash_key); + FileRequest fr; + fr.name = file_name; - FileDel file_del; - file_del.name = file_name; - socket = connect(file_hash_key); - send_message(socket.get(), &file_del); - auto reply = read_reply(socket.get()); - if (reply->message != "OK") { - cerr << "[ERR] " << file_name << " doesn't exist." << endl; - return EXIT_FAILURE; - } - cout << "[INFO] " << file_name << " is removed." << endl; + send_message(socket.get(), &fr); + auto fd = read_reply(socket.get()); + //socket->close(); + + unsigned int block_seq = 0; + for (auto block_name : fd->blocks) { + uint32_t block_hash_key = fd->hash_keys[block_seq++]; + auto tmp_socket = connect(boundaries.get_index(block_hash_key)); + IOoperation io_ops; + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_DELETE; + io_ops.block.first = block_name; + + send_message(tmp_socket.get(), &io_ops); + auto msg = read_reply(tmp_socket.get()); + if (msg->message != "TRUE") { + cerr << "[ERR] " << block_name << "doesn't exist." << endl; + return EXIT_FAILURE; } - return EXIT_SUCCESS; } + + FileDel file_del; + file_del.name = file_name; + //socket = connect(file_hash_key); + send_message(socket.get(), &file_del); + auto reply = read_reply(socket.get()); + if (reply->message != "OK") { + cerr << "[ERR] " << file_name << " doesn't exist." << endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; } // }}} // format {{{ int DFS::format() { - vector total; - for (unsigned int net_id = 0; net_id < NUM_NODES; net_id++) { FormatRequest fr; auto socket = connect(net_id); @@ -486,47 +385,8 @@ int DFS::format() { return EXIT_SUCCESS; } // }}} -// show {{{ -int DFS::show(vec_str argv) { - if (argv.size() < 3) { - cout << "usage: dfs show file_name1 file_name2 ..." << endl; - return EXIT_FAILURE; - - } else { - Histogram boundaries(NUM_NODES, 0); - boundaries.initialize(); - for (uint32_t i = 2; i < argv.size(); i++) { - string file_name = argv[i]; - uint32_t file_hash_key = h(file_name); - auto socket = connect (file_hash_key); - FileRequest fr; - fr.name = file_name; - - send_message (socket.get(), &fr); - auto fd = read_reply(socket.get()); - cout << file_name << endl; - int block_seq = 0; - for (auto block_name : fd->blocks) { - uint32_t hash_key = fd->hash_keys[block_seq++]; - int which_node = boundaries.get_index(hash_key); - int tmp_node; - for (int i=0; ireplica; i++) { - if (i%2 == 1) { - tmp_node = (which_node + (i+1)/2 + nodes.size()) % nodes.size(); - } else { - tmp_node = (which_node - i/2 + nodes.size()) % nodes.size(); - } - string ip = nodes[tmp_node]; - cout << "\t- " << setw(15) << block_name << " : " << setw(15) << ip << endl; - } - } - socket->close(); - } - } - return EXIT_SUCCESS; -} -// }}} // pget {{{ +//! @deprecated int DFS::pget(vec_str argv) { string file_name = ""; if (argv.size() < 5) { @@ -539,23 +399,12 @@ int DFS::pget(vec_str argv) { file_name = argv[2]; uint64_t start_offset = stol(argv[3]); uint64_t read_byte = stol(argv[4]); - uint32_t file_hash_key = h(file_name); - auto socket = connect (file_hash_key); - FileExist fe; - fe.name = file_name; - send_message(socket.get(), &fe); - auto rep = read_reply (socket.get()); - - if (rep->message != "TRUE") { - cerr << "[ERR] " << file_name << " doesn't exist." << endl; - return EXIT_FAILURE; - } - FileRequest fr; - fr.name = file_name; - send_message(socket.get(), &fr); - auto fd = read_reply (socket.get()); - socket->close(); + auto fd = get_file_description( + std::bind(&connect, std::placeholders::_1), file_name + ); + if(fd == nullptr) return EXIT_FAILURE; + if (start_offset + read_byte > fd->size) { cerr << "[ERR] Wrong read byte." << endl; return EXIT_FAILURE; @@ -620,485 +469,227 @@ int DFS::update(vec_str argv) { ori_file_name = argv[2]; string new_file_name = argv[3]; - uint64_t start_offset = stol(argv[4]); - uint32_t file_hash_key = h(ori_file_name); - auto socket = connect (file_hash_key); - FileExist fe; - fe.name = ori_file_name; - send_message(socket.get(), &fe); - auto rep = read_reply (socket.get()); - - if (rep->message != "TRUE") { - cerr << "[ERR] " << ori_file_name << " doesn't exist." << endl; - return EXIT_FAILURE; - } - FileRequest fr; - fr.name = ori_file_name; + uint32_t start_offset = stol(argv[4]); ifstream myfile(new_file_name); myfile.seekg(0, myfile.end); - uint64_t new_file_size = myfile.tellg(); + uint32_t new_file_size = myfile.tellg(); - send_message(socket.get(), &fr); - auto fd = read_reply (socket.get()); - socket->close(); - if (start_offset + new_file_size > fd->size) { - cerr << "[ERR] Wrong file size." << endl; - return EXIT_FAILURE; - } - myfile.seekg(0, myfile.beg); char *buffer = new char[new_file_size]; + + myfile.seekg(0, myfile.beg); myfile.read(buffer, new_file_size); - string sbuffer(buffer); + + uint64_t written_bytes = write(ori_file_name, buffer, start_offset, new_file_size); + int ret = (written_bytes > 0 ? EXIT_SUCCESS : EXIT_FAILURE); + delete[] buffer; myfile.close(); - int block_seq = 0; - uint64_t passed_byte = 0; - uint64_t write_byte_cnt = 0; - uint32_t ori_start_pos = 0; - uint32_t to_write_byte = new_file_size; - bool first_block = true; - bool final_block = false; - for (auto block_name : fd->blocks) { - // pass until find the block which has start_offset - if (passed_byte + fd->block_size[block_seq] < start_offset) { - passed_byte += fd->block_size[block_seq]; - block_seq++; - continue; - } else { - // If this block is the first one of updating blocks, - // start position will be start_offset - passed_byte. - // Otherwise, start position will be 0. - uint32_t hash_key = fd->hash_keys[block_seq]; - if (first_block) { - first_block = false; - ori_start_pos = start_offset - passed_byte; - } else { - ori_start_pos = 0; - } - // write length means the lenght which should be repliaced in THIS block. - // to_write_byte means remaining total bytes to write - // If this block is the last one, write_length should be same as to_write_byte - // Otherwise, write_length should be same as block_size - start position - uint32_t write_length = fd->block_size[block_seq] - ori_start_pos; - if (to_write_byte < write_length) { - final_block = true; - write_length = to_write_byte; - } - // send message - BlockUpdate bu; - bu.name = block_name; - bu.file_name = ori_file_name; - bu.seq = block_seq; - bu.replica = fd->replica; - bu.hash_key = hash_key; - bu.pos = ori_start_pos; - bu.len = write_length; - bu.content = sbuffer.substr(write_byte_cnt, write_length); - bu.size = fd->block_size[block_seq]; - auto tmp_socket = connect(boundaries.get_index(file_hash_key)); - send_message(tmp_socket.get(), &bu); - auto reply = read_reply (tmp_socket.get()); - tmp_socket->close(); - if (reply->message != "OK") { - cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; - return EXIT_FAILURE; - } - // calculate total write bytes and remaining write bytes - write_byte_cnt += write_length; - if (final_block) { - break; - } - to_write_byte -= write_length; - block_seq++; - } - } + return ret; } - cout << "[INFO] " << ori_file_name << " is updated." << endl; - return EXIT_SUCCESS; } // }}} // append {{{ -int DFS::append(vec_str argv) { - string ori_file_name = ""; - if (argv.size() < 4) { // argument count check - cout << "[INFO] dfs append original_file new_file1 new_file2 ..." << endl; +//! @todo fix implementation +int DFS::append(string file_name, string buf) { + string ori_file_name = file_name; + Histogram boundaries(NUM_NODES, 0); + boundaries.initialize(); + + uint32_t file_hash_key = h(ori_file_name); + auto socket = connect(file_hash_key); + FileExist fe; + fe.name = ori_file_name; + send_message(socket.get(), &fe); + auto rep = read_reply (socket.get()); + + if (rep->message != "TRUE") { // exist check + cerr << "[ERR] " << ori_file_name << " doesn't exist." << endl; return EXIT_FAILURE; - } else { - Histogram boundaries(NUM_NODES, 0); - boundaries.initialize(); - ori_file_name = argv[2]; + } + FileRequest fr; + fr.name = ori_file_name; + + istringstream myfile (buf); + //ifstream myfile(new_file_name); + myfile.seekg(0, myfile.end); + uint64_t new_file_size = myfile.tellg(); + if (new_file_size <= 0) { // input size check + cerr << "[ERR] " << buf << " size should be greater than 0." << endl; + return EXIT_FAILURE; + } - for (uint32_t i = 3; i < argv.size(); i++) { - string new_file_name = argv[i]; - uint32_t file_hash_key = h(ori_file_name); - auto socket = connect(file_hash_key); - FileExist fe; - fe.name = ori_file_name; - send_message(socket.get(), &fe); - auto rep = read_reply (socket.get()); - - if (rep->message != "TRUE") { // exist check - cerr << "[ERR] " << ori_file_name << " doesn't exist." << endl; - return EXIT_FAILURE; - } - FileRequest fr; - fr.name = ori_file_name; - - ifstream myfile(new_file_name); - myfile.seekg(0, myfile.end); - uint64_t new_file_size = myfile.tellg(); - if (new_file_size <= 0) { // input size check - cerr << "[ERR] " << new_file_name << " size should be greater than 0." << endl; - return EXIT_FAILURE; - } + // start normal append procedure + send_message(socket.get(), &fr); + auto fd = read_reply (socket.get()); - // start normal append procedure - send_message(socket.get(), &fr); - auto fd = read_reply (socket.get()); - - int block_seq = fd->blocks.size()-1; // last block - uint32_t ori_start_pos = 0; // original file's start position (in last block) - uint64_t to_write_byte = new_file_size; - uint64_t write_byte_cnt = 0; - bool update_block = true; // 'false' for append - bool new_block = false; - uint32_t hash_key = fd->hash_keys[block_seq]; - uint64_t write_length = 0; - uint64_t start = 0; - uint64_t end = 0; - uint32_t block_size = 0; - - while (to_write_byte > 0) { // repeat until to_write_byte == 0 - if (update_block == true) { - ori_start_pos = fd->block_size[block_seq]; - if (BLOCK_SIZE - ori_start_pos > to_write_byte) { // can append within original block - myfile.seekg(start + to_write_byte, myfile.beg); - } else { // can't write whole contents in one block - myfile.seekg(start + BLOCK_SIZE - ori_start_pos - 1, myfile.beg); - new_block = true; - while (1) { - if (myfile.peek() =='\n' || myfile.tellg() == 0) { - break; - } else { - myfile.seekg(-1, myfile.cur); - } - } - if (myfile.tellg() <= 0) { - update_block = false; - } - } - } - if (update_block == true) { // update block - write_length = myfile.tellg(); - write_length -= start; - myfile.seekg(start, myfile.beg); - char *buffer = new char[write_length+1]; - bzero(buffer, write_length+1); - myfile.read(buffer, write_length); - string sbuffer(buffer); - delete[] buffer; - BlockUpdate bu; - bu.name = fd->blocks[block_seq]; - bu.file_name = ori_file_name; - bu.seq = block_seq; - bu.replica = fd->replica; - bu.hash_key = hash_key; - bu.pos = ori_start_pos; - bu.len = write_length; - bu.content = sbuffer; - bu.size = ori_start_pos + write_length; - auto tmp_socket = connect(boundaries.get_index(file_hash_key)); - send_message(tmp_socket.get(), &bu); - auto reply = read_reply (tmp_socket.get()); - tmp_socket->close(); - if (reply->message != "OK") { - cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; - return EXIT_FAILURE; - } - // calculate total write bytes and remaining write bytes - to_write_byte -= write_length; - write_byte_cnt += write_length; - start += write_length; - if (new_block == true) { - update_block = false; - } - } else { // append block - // make new block - block_seq++; - int which_server = ((file_hash_key % NUM_NODES) + block_seq) % NUM_NODES; - start = write_byte_cnt; - end = start + BLOCK_SIZE -1; - BlockInfo block_info; - - if (end < to_write_byte) { - // not final block - myfile.seekg(end, myfile.beg); - while (1) { - if (myfile.peek() =='\n') { - break; - } else { - myfile.seekg(-1, myfile.cur); - end--; - } - } - } else { - end = start + to_write_byte; - } - myfile.seekg(start, myfile.beg); - block_size = (uint32_t) end - start; - write_length = block_size; - char *buffer = new char[block_size+1]; - bzero(buffer, block_size+1); - myfile.read(buffer, block_size); - string sbuffer(buffer); - delete[] buffer; - myfile.seekg(start, myfile.beg); - block_info.content = sbuffer; - - block_info.name = ori_file_name + "_" + to_string(block_seq); - block_info.file_name = ori_file_name; - block_info.hash_key = boundaries.random_within_boundaries(which_server); - block_info.seq = block_seq; - block_info.size = block_size; - block_info.type = static_cast(FILETYPE::Normal); - block_info.replica = replica; - block_info.node = nodes[which_server]; - block_info.l_node = nodes[(which_server-1+NUM_NODES)%NUM_NODES]; - block_info.r_node = nodes[(which_server+1+NUM_NODES)%NUM_NODES]; - block_info.is_committed = 1; - - send_message(socket.get(), &block_info); - auto reply = read_reply (socket.get()); - - if (reply->message != "OK") { - cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; - return EXIT_FAILURE; - } - to_write_byte -= write_length; - write_byte_cnt += write_length; - if (to_write_byte == 0) { + int block_seq = fd->blocks.size()-1; // last block + uint32_t ori_start_pos = 0; // original file's start position (in last block) + uint64_t to_write_byte = new_file_size; + uint64_t write_byte_cnt = 0; + bool update_block = true; // 'false' for append + bool new_block = false; + uint32_t hash_key = fd->hash_keys[block_seq]; + uint64_t write_length = 0; + uint64_t start = 0; + uint64_t end = 0; + uint32_t block_size = 0; + vector blocks_metadata; + + while (to_write_byte > 0) { // repeat until to_write_byte == 0 + BlockMetadata metadata; + Block block; + IOoperation io_ops; + + if (update_block == true) { + ori_start_pos = fd->block_size[block_seq]; + if (BLOCK_SIZE - ori_start_pos > to_write_byte) { // can append within original block + myfile.seekg(start + to_write_byte, myfile.beg); + } else { // can't write whole bufs in one block + myfile.seekg(start + BLOCK_SIZE - ori_start_pos - 1, myfile.beg); + new_block = true; + while (1) { + if (myfile.peek() =='\n' || myfile.tellg() == 0) { break; + } else { + myfile.seekg(-1, myfile.cur); } - start = end; - end = start + BLOCK_SIZE - 1; - which_server = (which_server + 1) % NUM_NODES; + } + if (myfile.tellg() <= 0) { + update_block = false; } } - FileUpdate fu; - fu.name = ori_file_name; - fu.num_block = block_seq+1; - fu.size = fd->size + new_file_size; - send_message(socket.get(), &fu); - auto reply = read_reply (socket.get()); - myfile.close(); - socket->close(); - - if (reply->message != "OK") { - cerr << "[ERR] Failed to append file. Details: " << reply->details << endl; - return EXIT_FAILURE; - } - cout << "[INFO] " << argv[i] << " is appended." << endl; } - } - return EXIT_SUCCESS; -} -// }}} -// push_back {{{ -int DFS::push_back(vec_str argv) { - string ori_file_name = ""; - if (argv.size() < 4) { // argument count check - cout << "[INFO] dfs append original_file new_file1 new_file2 ..." << endl; - return EXIT_FAILURE; - } else { - Histogram boundaries(NUM_NODES, 0); - boundaries.initialize(); - ori_file_name = argv[2]; - - for (uint32_t i = 3; i < argv.size(); i++) { - string content= argv[i]; - uint32_t file_hash_key = h(ori_file_name); - auto socket = connect(file_hash_key); - FileExist fe; - fe.name = ori_file_name; - send_message(socket.get(), &fe); - auto rep = read_reply (socket.get()); - - if (rep->message != "TRUE") { // exist check - cerr << "[ERR] " << ori_file_name << " doesn't exist." << endl; - return EXIT_FAILURE; - } - FileRequest fr; - fr.name = ori_file_name; - - istringstream myfile (content); - //ifstream myfile(new_file_name); - myfile.seekg(0, myfile.end); - uint64_t new_file_size = myfile.tellg(); - if (new_file_size <= 0) { // input size check - cerr << "[ERR] " << content << " size should be greater than 0." << endl; + if (update_block == true) { // update block + write_length = myfile.tellg(); + write_length -= start; + myfile.seekg(start, myfile.beg); + char *buffer = new char[write_length+1]; + bzero(buffer, write_length+1); + myfile.read(buffer, write_length); + string sbuffer(buffer); + delete[] buffer; + + metadata.name = fd->blocks[block_seq]; + metadata.file_name = ori_file_name; + metadata.seq = block_seq; + metadata.replica = fd->replica; + metadata.hash_key = hash_key; + metadata.size = ori_start_pos + write_length; + metadata.l_node = "0"; + metadata.r_node = "0"; + metadata.is_committed = 1; + + blocks_metadata.push_back(metadata); + + block.first = metadata.name; + block.second = move(sbuffer); + + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_UPDATE; + io_ops.block = move(block); + io_ops.pos = ori_start_pos; + io_ops.length = write_length; + + auto block_server = connect(boundaries.get_index(metadata.hash_key)); + send_message(block_server.get(), &io_ops); + auto reply = read_reply (block_server.get()); + block_server->close(); + if (reply->message != "TRUE") { + cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; return EXIT_FAILURE; - } + } - // start normal append procedure - send_message(socket.get(), &fr); - auto fd = read_reply (socket.get()); - - int block_seq = fd->blocks.size()-1; // last block - uint32_t ori_start_pos = 0; // original file's start position (in last block) - uint64_t to_write_byte = new_file_size; - uint64_t write_byte_cnt = 0; - bool update_block = true; // 'false' for append - bool new_block = false; - uint32_t hash_key = fd->hash_keys[block_seq]; - uint64_t write_length = 0; - uint64_t start = 0; - uint64_t end = 0; - uint32_t block_size = 0; - - while (to_write_byte > 0) { // repeat until to_write_byte == 0 - if (update_block == true) { - ori_start_pos = fd->block_size[block_seq]; - if (BLOCK_SIZE - ori_start_pos > to_write_byte) { // can append within original block - myfile.seekg(start + to_write_byte, myfile.beg); - } else { // can't write whole contents in one block - myfile.seekg(start + BLOCK_SIZE - ori_start_pos - 1, myfile.beg); - new_block = true; - while (1) { - if (myfile.peek() =='\n' || myfile.tellg() == 0) { - break; - } else { - myfile.seekg(-1, myfile.cur); - } - } - if (myfile.tellg() <= 0) { - update_block = false; - } - } - } - if (update_block == true) { // update block - write_length = myfile.tellg(); - write_length -= start; - myfile.seekg(start, myfile.beg); - char *buffer = new char[write_length+1]; - bzero(buffer, write_length+1); - myfile.read(buffer, write_length); - string sbuffer(buffer); - delete[] buffer; - BlockUpdate bu; - bu.name = fd->blocks[block_seq]; - bu.file_name = ori_file_name; - bu.seq = block_seq; - bu.replica = fd->replica; - bu.hash_key = hash_key; - bu.pos = ori_start_pos; - bu.len = write_length; - bu.size = ori_start_pos + write_length; - bu.content = sbuffer; - bu.is_header = true; - - send_message(socket.get(), &bu); - auto reply = read_reply (socket.get()); - if (reply->message != "OK") { - cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; - return EXIT_FAILURE; - } - - //bu.is_header = false; - - //auto tmp_socket = connect(boundaries.get_index(hash_key)); - //send_message(tmp_socket.get(), &bu); - //reply = read_reply (tmp_socket.get()); - //tmp_socket->close(); - //if (reply->message != "OK") { - // cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; - // return EXIT_FAILURE; - //} - // calculate total write bytes and remaining write bytes - to_write_byte -= write_length; - write_byte_cnt += write_length; - start += write_length; - if (new_block == true) { - update_block = false; - } - } else { // append block - // make new block - block_seq++; - int which_server = ((file_hash_key % NUM_NODES) + block_seq) % NUM_NODES; - start = write_byte_cnt; - end = start + BLOCK_SIZE -1; - BlockInfo block_info; - - if (end < to_write_byte) { - // not final block - myfile.seekg(end, myfile.beg); - while (1) { - if (myfile.peek() =='\n') { - break; - } else { - myfile.seekg(-1, myfile.cur); - end--; - } - } - } else { - end = start + to_write_byte; - } - myfile.seekg(start, myfile.beg); - block_size = (uint32_t) end - start; - write_length = block_size; - char *buffer = new char[block_size+1]; - bzero(buffer, block_size+1); - myfile.read(buffer, block_size); - string sbuffer(buffer); - delete[] buffer; - myfile.seekg(start, myfile.beg); - block_info.content = sbuffer; - - block_info.name = ori_file_name + "_" + to_string(block_seq); - block_info.file_name = ori_file_name; - block_info.hash_key = boundaries.random_within_boundaries(which_server); - block_info.seq = block_seq; - block_info.size = block_size; - block_info.type = static_cast(FILETYPE::Normal); - block_info.replica = replica; - block_info.node = nodes[which_server]; - block_info.l_node = nodes[(which_server-1+NUM_NODES)%NUM_NODES]; - block_info.r_node = nodes[(which_server+1+NUM_NODES)%NUM_NODES]; - block_info.is_committed = 1; - - send_message(socket.get(), &block_info); - auto reply = read_reply (socket.get()); - - if (reply->message != "OK") { - cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; - return EXIT_FAILURE; - } - to_write_byte -= write_length; - write_byte_cnt += write_length; - if (to_write_byte == 0) { + // calculate total write bytes and remaining write bytes + to_write_byte -= write_length; + write_byte_cnt += write_length; + start += write_length; + if (new_block == true) { + update_block = false; + } + } else { // append block + // make new block + block_seq++; + int which_server = ((file_hash_key % NUM_NODES) + block_seq) % NUM_NODES; + start = write_byte_cnt; + end = start + BLOCK_SIZE -1; + + if (end < to_write_byte) { + // not final block + myfile.seekg(end, myfile.beg); + while (1) { + if (myfile.peek() =='\n') { break; + } else { + myfile.seekg(-1, myfile.cur); + end--; } - start = end; - end = start + BLOCK_SIZE - 1; - which_server = (which_server + 1) % NUM_NODES; } + } else { + end = start + to_write_byte; } - FileUpdate fu; - fu.name = ori_file_name; - fu.num_block = block_seq+1; - fu.size = fd->size + new_file_size; - send_message(socket.get(), &fu); - auto reply = read_reply (socket.get()); - //myfile.close(); - socket->close(); + myfile.seekg(start, myfile.beg); + block_size = (uint32_t) end - start; + write_length = block_size; + char *buffer = new char[block_size+1]; + bzero(buffer, block_size+1); + myfile.read(buffer, block_size); + string sbuffer(buffer); + delete[] buffer; + myfile.seekg(start, myfile.beg); + + metadata.name = ori_file_name + "_" + to_string(block_seq); + metadata.file_name = ori_file_name; + metadata.hash_key = boundaries.random_within_boundaries(which_server); + metadata.seq = block_seq; + metadata.size = block_size; + + metadata.replica = replica; + metadata.node = nodes[which_server]; + metadata.l_node = nodes[(which_server-1+NUM_NODES)%NUM_NODES]; + metadata.r_node = nodes[(which_server+1+NUM_NODES)%NUM_NODES]; + metadata.is_committed = 1; + + blocks_metadata.push_back(metadata); + + IOoperation io_ops; + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_INSERT; + io_ops.block.first = metadata.name; + io_ops.block.second = move(sbuffer); + + auto block_server = connect(boundaries.get_index(metadata.hash_key)); + send_message(block_server.get(), &io_ops); + auto reply = read_reply (block_server.get()); + block_server->close(); if (reply->message != "OK") { - cerr << "[ERR] Failed to append file. Details: " << reply->details << endl; + cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; return EXIT_FAILURE; + } + to_write_byte -= write_length; + write_byte_cnt += write_length; + if (to_write_byte == 0) { + break; } + start = end; + end = start + BLOCK_SIZE - 1; + which_server = (which_server + 1) % NUM_NODES; } } + FileUpdate fu; + fu.name = ori_file_name; + fu.num_block = block_seq+1; + fu.size = fd->size + new_file_size; + fu.blocks_metadata = blocks_metadata; + + send_message(socket.get(), &fu); + auto reply = read_reply (socket.get()); + socket->close(); + + if (reply->message != "OK") { + cerr << "[ERR] Failed to append file. Details: " << reply->details << endl; + return EXIT_FAILURE; + } return EXIT_SUCCESS; } /// }}} @@ -1116,46 +707,370 @@ bool DFS::exists(std::string name) { } // }}} // touch {{{ -bool DFS::touch(std::string name) { - if (exists(name)) +bool DFS::touch(std::string file_name) { + if (exists(file_name)) return false; - Histogram boundaries(NUM_NODES, 0); + Histogram boundaries(NUM_NODES, 100); boundaries.initialize(); - int which_server = h(name) % NUM_NODES; - - BlockInfo block_info; - block_info.name = name + "_0"; - block_info.file_name = name; - block_info.hash_key = boundaries.random_within_boundaries(which_server); - block_info.seq = 0; - block_info.size = 0; - block_info.type = static_cast(FILETYPE::Normal); - block_info.replica = replica; - block_info.node = nodes[which_server]; - block_info.l_node = nodes[(which_server-1+NUM_NODES)%NUM_NODES]; - block_info.r_node = nodes[(which_server+1+NUM_NODES)%NUM_NODES]; - block_info.is_committed = 1; - block_info.content = "NOOP"; - - auto socket = connect(h(name)); - send_message(socket.get(), &block_info); - auto reply = read_reply (socket.get()); + FILETYPE type = FILETYPE::Normal; + + //! If it is an app to be upload :TODO: + /* + if (is_binary) { + replica = NUM_NODES; + type = FILETYPE::App; + } + */ + uint32_t file_hash_key = h(file_name); + + //! Insert the file FileInfo file_info; - file_info.name = name; - file_info.hash_key = h(name); - file_info.type = static_cast(FILETYPE::Normal); + file_info.name = file_name; + file_info.hash_key = file_hash_key; + file_info.type = static_cast(type); file_info.replica = replica; - file_info.size = 0; + file_info.size = 0ul; + + //! Send file to be submitted; + auto socket = connect(file_hash_key); + send_message(socket.get(), &file_info); + + //! Get information of where to send the file + auto description = read_reply(socket.get()); + socket->close(); + + IOoperation io_ops; + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_INSERT; + + int which_server = (description->hash_key % NUM_NODES) % NUM_NODES; + + BlockMetadata metadata; + metadata.name = file_name + "_" + to_string(0); + metadata.file_name = file_name; + metadata.hash_key = boundaries.random_within_boundaries(which_server); + metadata.seq = 0; + metadata.size = 0; + metadata.type = static_cast(type); + metadata.replica = replica; + metadata.node = nodes[which_server]; + metadata.l_node = nodes[(which_server - 1 + NUM_NODES) % NUM_NODES]; + metadata.r_node = nodes[(which_server + 1 + NUM_NODES) % NUM_NODES]; + metadata.is_committed = 1; + + Block block; + block.first = metadata.name; + block.second = ""; + + io_ops.block = move(block); + + socket = connect(boundaries.get_index(metadata.hash_key)); + send_message(socket.get(), &io_ops); + + auto future = async(launch::async, [](unique_ptr socket) -> bool { + auto reply = read_reply (socket.get()); + socket->close(); + + if (reply->message != "TRUE") { + cerr << "[ERR] Failed to upload block . Details: " << reply->details << endl; + return false; + } + + return true; + }, move(socket)); + + future.get(); + file_info.num_block = 1; + file_info.blocks_metadata.push_back(metadata); + file_info.uploading = 0; + socket = connect(file_hash_key); send_message(socket.get(), &file_info); - reply = read_reply (socket.get()); + auto reply = read_reply (socket.get()); + + if (reply->message != "TRUE") { + cerr << "[ERR] Failed to upload file. Details: " << reply->details << endl; + return EXIT_FAILURE; + } + + socket->close(); + + return EXIT_SUCCESS; +} +// }}} +// write {{{ +uint64_t DFS::write(std::string& file_name, const char* buf, uint64_t off, uint64_t len) { + Histogram boundaries(NUM_NODES, 0); + boundaries.initialize(); + + //auto fd = get_file_description( + //std::bind(&connect, *this, std::placeholders::_1), file_name + //); + + auto socket = connect(h(file_name)); + + FileRequest fr; + fr.name = file_name; + + send_message(socket.get(), &fr); + auto fd = (read_reply (socket.get())); + socket->close(); + if(fd == nullptr) return 0; + + off = std::max(0ul, std::min(off, std::max(fd->size, BLOCK_SIZE - 1))); + + //! Insert the blocks + vector blocks_metadata; + vector> slave_sockets; + + uint64_t to_write_bytes = len; + uint64_t written_bytes = 0; + + int block_beg_seq = (int) off / BLOCK_SIZE; + int block_end_seq = (int) (len + off - 1) / BLOCK_SIZE; + + for(int i=block_beg_seq; i<=block_end_seq; i++) { + BlockMetadata metadata; + Block block; + IOoperation io_ops; + + uint64_t pos_to_update, len_to_write; + + if(i < (int)fd->num_block) { // updating exist block + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_UPDATE; + + //! Load block metadata info + int which_server = fd->hash_keys[i] % NUM_NODES; + + pos_to_update = (i == block_beg_seq && fd->block_size[i] > 0) ? (off % BLOCK_SIZE) : 0; + len_to_write = (fd->block_size[i] == 0) ? std::min(to_write_bytes, BLOCK_SIZE) : std::min((BLOCK_SIZE - pos_to_update), to_write_bytes); + + metadata.name = fd->blocks[i]; + metadata.file_name = file_name; + metadata.hash_key = fd->hash_keys[i]; + metadata.seq = i; + metadata.size = std::max(fd->block_size[i], len_to_write); + metadata.type = static_cast(FILETYPE::Normal); + metadata.replica = fd->replica; + metadata.node = nodes[which_server]; + metadata.l_node = nodes[(which_server-1+NUM_NODES)%NUM_NODES]; + metadata.r_node = nodes[(which_server+1+NUM_NODES)%NUM_NODES]; + metadata.is_committed = 1; + } + else { // creating a new block + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_INSERT; + + int which_server = ((fd->hash_key % NUM_NODES) + i) % NUM_NODES; + + pos_to_update = 0; + len_to_write = std::min(BLOCK_SIZE, to_write_bytes); + + metadata.name = file_name + "_" + to_string(i); + metadata.file_name = file_name; + metadata.hash_key = boundaries.random_within_boundaries(which_server); + metadata.seq = i; + metadata.size = len_to_write; + metadata.type = static_cast(FILETYPE::Normal); + metadata.replica = fd->replica; + metadata.node = nodes[which_server]; + metadata.l_node = nodes[(which_server - 1 + NUM_NODES) % NUM_NODES]; + metadata.r_node = nodes[(which_server + 1 + NUM_NODES) % NUM_NODES]; + metadata.is_committed = 1; + } + + blocks_metadata.push_back(metadata); + + string content_str(buf + written_bytes, len_to_write); + block.first = metadata.name; + block.second = content_str; + + io_ops.block = move(block); + io_ops.pos = pos_to_update; + io_ops.length = len_to_write; + + socket = connect(boundaries.get_index(metadata.hash_key)); + send_message(socket.get(), &io_ops); + + auto future = async(launch::async, [](unique_ptr socket) -> bool { + auto reply = read_reply (socket.get()); + socket->close(); + + if (reply->message != "TRUE") { + cerr << "[ERR] Failed to upload block . Details: " << reply->details << endl; + return false; + } + + return true; + }, move(socket)); + + slave_sockets.push_back(move(future)); + + written_bytes += len_to_write; + to_write_bytes -= written_bytes; + } + + for (auto& future: slave_sockets) + future.get(); + + // Update Metadata + FileUpdate fu; + fu.name = fd->name; + fu.num_block = std::max(fd->num_block, (unsigned int)block_end_seq + 1); + fu.size = std::max(written_bytes + off, fd->size); + fu.blocks_metadata = blocks_metadata; + + socket = connect(fd->hash_key); + send_message(socket.get(), &fu); + auto reply = read_reply (socket.get()); socket->close(); - return (reply->message == "OK"); + return written_bytes; +} +// }}} +// read {{{ +uint64_t DFS::read(std::string& file_name, char* buf, uint64_t off, uint64_t len) { + Histogram boundaries(NUM_NODES, 0); + boundaries.initialize(); + + uint32_t file_hash_key = h(file_name); + auto socket = connect(file_hash_key); + + FileRequest fr; + fr.name = file_name; + + send_message(socket.get(), &fr); + auto fd = read_reply (socket.get()); + + socket->close(); + + if(fd == nullptr) return 0; + + off = std::max(0ul, std::min(off, fd->size)); + if(off >= fd->size) return 0; + + int block_beg_seq = (int) off / BLOCK_SIZE; + int block_end_seq = (int) (len + off - 1) / BLOCK_SIZE; + + std::string output = ""; + + uint64_t remain_len = len; + + for(int i=block_beg_seq; i<=block_end_seq; i++) { + uint32_t hash_key = fd->hash_keys[i]; + auto block_socket = connect(boundaries.get_index(hash_key)); + + IOoperation io_ops; + io_ops.operation = eclipse::messages::IOoperation::OpType::BLOCK_REQUEST; + io_ops.block.first = fd->blocks[i]; + io_ops.pos = (i == block_beg_seq && fd->block_size[i] > 0) ? (off % fd->block_size[i]) : 0; + io_ops.length = std::min((fd->block_size[i] - io_ops.pos), remain_len); + + auto slave_socket = connect(boundaries.get_index(fd->hash_keys[i])); + send_message(slave_socket.get(), &io_ops); + auto msg = read_reply(slave_socket.get()); + output += msg->block.second; + slave_socket->close(); + + remain_len -= io_ops.length; + + if(io_ops.pos + io_ops.length > fd->block_size[i]) + break; + } + + strcpy(buf, output.c_str()); + + return (uint64_t)output.length(); +} +// }}} +// get_metadata {{{ +model::metadata DFS::get_metadata(std::string& fname) { + model::metadata md; + + FileRequest fr; + fr.name = fname; + + auto socket = connect(h(fname)); + send_message(socket.get(), &fr); + auto fd = (read_reply (socket.get())); + socket->close(); + + if(fd != nullptr) { + md.name = fd->name; + md.hash_key = fd->hash_key; + md.size = fd->size; + md.num_block = fd->num_block; + md.type = fd->type; + md.replica = fd->replica; + md.blocks = fd->blocks; + md.hash_keys = fd->hash_keys; + md.block_size = fd->block_size; + } + + return md; +} +// }}} +// get_metadata_all {{{ +vector DFS::get_metadata_all() { + vector total; + + for (unsigned int net_id=0; net_id(socket.get()); + std::copy(file_list_reply->data.begin(), file_list_reply->data.end(), back_inserter(total)); + } + + vector metadata_vector; + + for (auto fd : total) { + model::metadata md; + md.name = fd.name; + md.hash_key = fd.hash_key; + md.size = fd.size; + md.num_block = fd.num_block; + md.type = fd.type; + md.replica = fd.replica; + metadata_vector.push_back(md); + } + + return move(metadata_vector); +} +// }}} +// file_metadata_append {{{ +void DFS::file_metadata_append(std::string name, size_t size, model::metadata& blocks) { + FileUpdate fu; + fu.name = name; + fu.num_block = blocks.blocks.size(); + fu.size = size; + fu.is_append = true; + + for (size_t i = 0; i < blocks.blocks.size(); i++) { + BlockMetadata metadata; + metadata.file_name = name; + metadata.name = blocks.blocks[i]; + metadata.seq = 0; + metadata.hash_key = blocks.hash_keys[i]; + metadata.size = blocks.block_size[i]; + metadata.replica = 1; + metadata.type = 0; + metadata.node = ""; + metadata.l_node = ""; + metadata.r_node = ""; + metadata.is_committed = 1; + + fu.blocks_metadata.push_back(metadata); + } + + + uint32_t file_hash_key = h(name); + auto socket = connect(file_hash_key); + send_message(socket.get(), &fu); + read_reply(socket.get()); + socket->close(); } // }}} } diff --git a/src/client/dfs.hh b/src/client/dfs.hh index 8488e89..3d762e1 100644 --- a/src/client/dfs.hh +++ b/src/client/dfs.hh @@ -1,44 +1,92 @@ #pragma once + +#include "model/metadata.hh" + #include #include -#include #include -#include namespace velox { -using boost::asio::ip::tcp; using vec_str = std::vector; class DFS { public: DFS(); - void load_settings(); - int put(vec_str); - int get(vec_str); - int cat(vec_str); - int ls(vec_str); - int rm(vec_str); - int format(); - int show(vec_str); - int pget(vec_str); - int update(vec_str); - int append(vec_str); - bool fexists(std::string); + + //! Write the contents into a remote file. + //! + //! @param file_name File to peform the operation + //! if the file does not exists it will create it. + //! @param buf contents to be copied. + //! @param off offset to append the buffer. + //! @param len size of the content. + //! @retval >0 bytes successfully uploaded. + //! @retval 0 The operation failed. + uint64_t write(std::string& file_name, const char* buf, uint64_t off, uint64_t len); + + //! Download a remote file's section into a buffer. + //! + //! @pre The file must exist. + //! @param file_name File to peform the operation. + //! @param[out] buffer Destination buffer. + //! @param off offset from where to read. + //! @param len size of segment to read. + //! @retval >0 bytes successfully uploaded. + //! @retval 0 The operation failed. + uint64_t read(std::string& file_name, char* buf, uint64_t off, uint64_t len); + + //! Load all the file into a string. + //! + //! @param file_name File to peform the operation. + //! @retval "" The file did not exists. + std::string read_all(std::string file_name); + + //! Append a string to the end of a remote file. + //! @attention It is currently not working due to + //! changes in the internal API. + int append(std::string file_name, std::string buf); + + //! Upload a local file to a the veloxdfs intance. + //! + //! @param file_name File to peform the operation. + //! @param is_binary Whether to broadcast the file to all the nodes. + //! @retval 0 SUCCESS + //! @retval 1 FAILURE + int upload(std::string file_name, bool is_binary); + + //! Download a remote file to your current local directory. + //! + //! @param file_name File to peform the operation. + //! @retval 0 SUCCESS + //! @retval 1 FAILURE + int download(std::string file_name); + bool exists(std::string); + bool touch(std::string); - int push_back(vec_str); - std::string load(std::string); + int remove(std::string); + + int format(); + + model::metadata get_metadata(std::string& fname); + + std::vector get_metadata_all(); + + void file_metadata_append(std::string, size_t, model::metadata&); + + //! @deprecated + int pget(vec_str); + + //! @deprecated + int update(vec_str); private: - uint32_t BLOCK_SIZE; + uint64_t BLOCK_SIZE; uint32_t NUM_NODES; int replica; - int port; std::vector nodes; - - std::unique_ptr connect (uint32_t); }; } diff --git a/src/client/model/metadata.hh b/src/client/model/metadata.hh new file mode 100644 index 0000000..7e9389b --- /dev/null +++ b/src/client/model/metadata.hh @@ -0,0 +1,25 @@ +#ifndef __MODEL_METADATA_HH__ +#define __MODEL_METADATA_HH__ + +#include +#include +#include + +namespace velox { + namespace model { + struct metadata { + std::string name; + uint32_t hash_key; + uint64_t size; + unsigned int num_block; + unsigned int type; + unsigned int replica; + std::vector blocks; + std::vector hash_keys; + std::vector block_size; + }; + + } +} + +#endif diff --git a/src/client/vdfs.cc b/src/client/vdfs.cc index 6c86cea..1300710 100644 --- a/src/client/vdfs.cc +++ b/src/client/vdfs.cc @@ -2,14 +2,49 @@ #include "dfs.hh" #include "../common/hash.hh" +#include + using namespace velox; // Constructors {{{ file::file(vdfs* vdfs_, std::string name_) { this->vdfs_ = vdfs_; - name = name_; + this->name = name_; + this->opened = false; + this->id = this->generate_fid(); } +file::file(vdfs* vdfs_, std::string name_, bool opened_) { + this->vdfs_ = vdfs_; + this->name = name_; + this->opened = opened_; + this->id = this->generate_fid(); +} + +file::file(const file& that) { + this->vdfs_ = that.vdfs_; + this->name = that.name; + this->opened = that.opened; + this->id = that.id; +} + +// }}} +// generate_fid {{{ +long file::generate_fid() { + return std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch() + ).count(); +} +// }}} +// operator= {{{ +file& file::operator=(const file& rhs) { + this->vdfs_ = rhs.vdfs_; + this->name = rhs.name; + this->opened = rhs.opened; + this->id = rhs.id; + + return *this; +} // }}} // push_back {{{ void file::append(std::string content) { @@ -21,43 +56,147 @@ std::string file::get() { return vdfs_->load(name); } // }}} +// open {{{ +void file::open() { + this->opened = true; +} +// }}} +// close {{{ +void file::close() { + this->opened = false; +} +// }}} +// is_open {{{ +bool file::is_open() { + return this->opened; +} +// }}} +// get_id {{{ +long file::get_id() { + return this->id; +} +// }}} +// get_name {{{ +std::string file::get_name() { + return this->name; +} +// }}} +// get_size {{{ +long file::get_size() { + return this->size; +} +// }}} + +/******************************************/ +/* */ +/******************************************/ + // vdfs {{{ vdfs::vdfs() { dfs = new DFS(); - dfs->load_settings(); +// dfs->load_settings(); + + opened_files = nullptr; +} + +vdfs::vdfs(vdfs& that) { + dfs = new DFS(); + //dfs->load_settings(); + + if(that.opened_files != nullptr) + opened_files = new std::vector(*that.opened_files); + else + opened_files = nullptr; } vdfs::~vdfs() { + if(this->opened_files != nullptr) { + for(auto f : *(this->opened_files)) + this->close(f.get_id()); + + delete this->opened_files; + } + delete dfs; } // }}} +// operator= {{{ +vdfs& vdfs::operator=(vdfs& rhs) { + if(dfs != nullptr) delete dfs; + + dfs = new DFS(); + //dfs->load_settings(); + + if(opened_files != nullptr) delete opened_files; + + if(rhs.opened_files != nullptr) + opened_files = new std::vector(*rhs.opened_files); + else + opened_files = nullptr; + + return *this; +} +// }}} // open {{{ file vdfs::open(std::string name) { - //if (!dfs->exists(name)) - dfs->touch(name); + dfs->touch(name); - return velox::file(this, name); + velox::file new_file(this, name, true); + + if(opened_files == nullptr) + opened_files = new std::vector; + + opened_files->push_back(new_file); + + return new_file; +} +// }}} +// open_file {{{ +long vdfs::open_file(std::string fname) { + return (this->open(fname)).get_id(); +} +// }}} +// close {{{ +bool vdfs::close(long fid) { + if(opened_files == nullptr) return false; + + velox::file* f = this->get_file(fid); + + if(f == nullptr) return false; + + f->close(); + return true; +} +// }}} +// is_open() {{{ +bool vdfs::is_open(long fid) { + if(opened_files == nullptr) return false; + + velox::file* f = this->get_file(fid); + if( f == nullptr) return false; + + return f->is_open(); } // }}} // upload {{{ file vdfs::upload(std::string name) { - dfs->put({"","", name}); + dfs->upload(name, false); return velox::file(this, name); } // }}} // append {{{ void vdfs::append (std::string name, std::string content) { - dfs->push_back({"", "", name, content}); + dfs->append(name, content); } // }}} // load {{{ std::string vdfs::load(std::string name) { - return dfs->load({name}); + return dfs->read_all(name); } // }}} // rm {{{ bool vdfs::rm (std::string name) { - return dfs->rm({"", "", name}); + return dfs->remove(name); } // }}} // format {{{ @@ -70,3 +209,38 @@ bool vdfs::exists(std::string name) { return dfs->exists(name); } // }}} +// write {{{ +uint32_t vdfs::write(long fid, const char *buf, uint32_t off, uint32_t len) { + velox::file* f = this->get_file(fid); + if(f == nullptr) return -1; + + return dfs->write(f->name, buf, off, len); +} +// }}} +// read {{{ +uint32_t vdfs::read(long fid, char *buf, uint32_t off, uint32_t len) { + velox::file* f = this->get_file(fid); + if(f == nullptr) return -1; + + return dfs->read(f->name, buf, off, len); +} +// }}} +// get_file {{{ +velox::file* vdfs::get_file(long fid) { + for(auto& f : *(this->opened_files)) { + if(f.get_id() == fid) + return &f; + } + + return nullptr; +} +// }}} +// get_metadata {{{ +model::metadata vdfs::get_metadata(long fid) { + velox::file* f = this->get_file(fid); + if(f == nullptr) return model::metadata(); + + return dfs->get_metadata(f->name); +} +// }}} + diff --git a/src/client/vdfs.hh b/src/client/vdfs.hh index 7138c26..a4062cf 100644 --- a/src/client/vdfs.hh +++ b/src/client/vdfs.hh @@ -1,5 +1,6 @@ #pragma once #include "dfs.hh" +#include namespace velox { @@ -11,27 +12,67 @@ class file { void append(std::string); std::string get(); - protected: + void open(); + void close(); + bool is_open(); + + long get_id(); + std::string get_name(); + long get_size(); + + file& operator=(const file&); + file(vdfs*, std::string); + file(vdfs*, std::string, bool); + file(const file&); + + private: + long id; std::string name; vdfs* vdfs_; + bool opened; + + long size; + + long generate_fid(); }; class vdfs { friend file; public: vdfs(); + vdfs(vdfs&); ~vdfs(); + + vdfs& operator=(vdfs&); + velox::file open(std::string); + + long open_file(std::string); + bool close(long); + bool is_open(long); + velox::file upload(std::string); + bool rm(std::string); bool format(); + bool exists(std::string); + uint32_t write(long, const char*, uint32_t, uint32_t); + uint32_t read(long, char*, uint32_t, uint32_t); + + model::metadata get_metadata(long fid); + + void append(std::string, std::string); + protected: DFS* dfs; - void append(std::string, std::string); std::string load(std::string); + + private: + velox::file* get_file(long); + std::vector* opened_files; }; diff --git a/src/client/vmr.cc b/src/client/vmr.cc index edae12e..19714ff 100644 --- a/src/client/vmr.cc +++ b/src/client/vmr.cc @@ -2,17 +2,19 @@ #include "../common/context_singleton.hh" #include "../messages/boost_impl.hh" #include "../messages/factory.hh" -#include "../messages/job.hh" -#include "../common/ecfs.hh" +#include "../mapreduce/messages/job.hh" #include "../common/hash.hh" #include #include #include #include +#include using namespace std; using namespace velox; using namespace eclipse::messages; +using namespace boost::asio; +using namespace boost::asio::ip; using vec_str = std::vector; // Free functions {{{ @@ -27,7 +29,7 @@ uint32_t random_number() { tcp::endpoint* find_local_master(uint32_t job_id) { - int port = GET_INT("network.ports.client"); + int port = GET_INT("network.ports.mapreduce"); vec_str nodes = GET_VEC_STR("network.nodes"); string host = nodes[ job_id % nodes.size() ]; diff --git a/src/common/block.hh b/src/common/block.hh new file mode 100644 index 0000000..f41ee7b --- /dev/null +++ b/src/common/block.hh @@ -0,0 +1,4 @@ +#pragma once +#include + +typedef std::pair Block; diff --git a/src/common/blockmetadata.hh b/src/common/blockmetadata.hh new file mode 100644 index 0000000..ad00ead --- /dev/null +++ b/src/common/blockmetadata.hh @@ -0,0 +1,21 @@ +#pragma once +#include +#include + +namespace eclipse { + +struct BlockMetadata { + std::string name; + std::string file_name; + unsigned int seq; + uint32_t hash_key; + uint32_t size; + unsigned int type; + int replica; + std::string node; + std::string l_node; + std::string r_node; + unsigned int is_committed; +}; + +} diff --git a/src/common/context.cc b/src/common/context.cc index a4bf24c..b7ade6c 100644 --- a/src/common/context.cc +++ b/src/common/context.cc @@ -1,5 +1,9 @@ #include "context.hh" #include +#include +#include +#include + using namespace std; @@ -22,7 +26,7 @@ Context* Context::connect () { if (singleton == nullptr) { singleton = new Context(); singleton->init(); - singleton->run(); + //singleton->run(); } return singleton; @@ -44,7 +48,13 @@ void Context::run (){ int concurrency = settings.get ("cache.concurrency"); for (int i = 0; i < concurrency; i++ ) { auto t = new std::thread ( [this] { + try { this->io.run(); + } catch (exception& e) { + logger->error("iosvc exception %s", e.what()); + } catch (boost::exception& e) { + logger->error("iosvc exception %s", diagnostic_information(e).c_str()); + } }); threads.emplace_back (t); diff --git a/src/common/dl_loader.hh b/src/common/dl_loader.hh index abfed92..059ef95 100644 --- a/src/common/dl_loader.hh +++ b/src/common/dl_loader.hh @@ -6,13 +6,14 @@ #include #include #include +#include #include "../mapreduce/output_collection.hh" using before_map_t = void (*)(std::unordered_map&); using after_map_t = void (*)(std::unordered_map&); using mapper_t = void (*)(std::string&, velox::OutputCollection&, std::unordered_map&); -using reducer_t = void (*)(std::string&, std::list&, velox::OutputCollection&); +using reducer_t = void (*)(std::string&, std::vector&, velox::OutputCollection&); class DL_loader { public: diff --git a/src/common/ecfs.hh b/src/common/ecfs.hh deleted file mode 100644 index b2b0455..0000000 --- a/src/common/ecfs.hh +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#include "definitions.hh" -#include "hash.hh" -#include "settings.hh" diff --git a/src/common/histogram.cc b/src/common/histogram.cc index 1b30b7b..561ccbd 100644 --- a/src/common/histogram.cc +++ b/src/common/histogram.cc @@ -254,7 +254,7 @@ uint32_t Histogram::random_within_boundaries (uint32_t index) { if (which_server != 0) lower_boundary = boundaries[which_server-1]; else - lower_boundary = boundaries[numserver-1]; + lower_boundary = 0; uint32_t upper_boundary = boundaries[which_server]; uint32_t range = upper_boundary - lower_boundary; diff --git a/src/common/histogram.hh b/src/common/histogram.hh index 189e2bb..39da8af 100644 --- a/src/common/histogram.hh +++ b/src/common/histogram.hh @@ -1,8 +1,8 @@ #ifndef __HISTOGRAM__ #define __HISTOGRAM__ +#include "definitions.hh" #include -#include #include #define MAX_UINT 4294967295 diff --git a/src/common/logger.cc b/src/common/logger.cc index 0142168..3ee5701 100644 --- a/src/common/logger.cc +++ b/src/common/logger.cc @@ -70,6 +70,9 @@ Logger::Logger (char* title, const string& type, string mask_) { this->title = title; openlog (title, LOG_CONS, this->type); setlogmask(LOG_UPTO(mask)); + + //int type_ = syslog_facilities[type]; + //openlog (title, LOG_CONS, type_); } Logger::~Logger () { closelog (); } diff --git a/src/common/logger.hh b/src/common/logger.hh index 1e98e05..118d262 100644 --- a/src/common/logger.hh +++ b/src/common/logger.hh @@ -23,7 +23,7 @@ class Logger { // Singleton things static Logger* singleton; - Logger(char*, const std::string&, std::string); + Logger(char*, const std::string&, std::string mask_); ~Logger(); std::string title; diff --git a/src/common/settings.cc b/src/common/settings.cc index 94b127c..8c73d45 100644 --- a/src/common/settings.cc +++ b/src/common/settings.cc @@ -7,6 +7,8 @@ #include "settings.hh" #include #include +#include +#include #include #include #include @@ -27,11 +29,37 @@ #define FINAL_PATH "/eclipse.json" +static std::map default_ops { + {"log.type", "LOG_LOCAL6"}, + {"log.name", "ECLIPSE"}, + {"log.mask", "DEBUG"}, + {"cache.numbin", "100"}, + {"cache.size", "200000"}, + {"cache.concurrency", "1"}, + {"network.serialization", "binary"}, + {"fileystem.block", "137438953"}, + {"fileystem.buffer", "512"}, + {"fileystem.replica", "1"} +}; + using std::cout; using std::endl; using std::vector; using std::string; +using std::stringstream; using namespace boost::property_tree; + +static vector tokenize(std::string str_separated_by_colons) { + stringstream ss(str_separated_by_colons); + + vector output; + string token; + while (std::getline(ss, token, ':')) { + output.push_back(token); + } + return move(output); +} + //}}} // class SettingsImpl {{{ class Settings::SettingsImpl { @@ -77,6 +105,10 @@ bool Settings::SettingsImpl::get_project_path () // bool Settings::SettingsImpl::load () { + for (auto& kv : default_ops) { + pt.put(kv.first, kv.second); + } + if (not nofile) { get_project_path(); json_parser::read_json (config_path, pt); @@ -142,6 +174,11 @@ template<> int Settings::SettingsImpl::get (string& str) { template<> vector Settings::SettingsImpl::get (string& str) { vector output; + + auto property = get_env(str); + if (property != nullptr) + return tokenize(property); + auto& subtree = pt.get_child (str.c_str()); for (auto& v : subtree) diff --git a/src/fileleader/directory.cc b/src/fileleader/directory.cc new file mode 100644 index 0000000..1b136d9 --- /dev/null +++ b/src/fileleader/directory.cc @@ -0,0 +1,318 @@ +#include "directory.hh" +#include "../common/context_singleton.hh" +#include +#include + +#define DEFAULT_QUERY_SIZE 512 + +using namespace std; +using namespace eclipse; + +// Callback and free functions {{{ +static int file_callback(void *file_info, int argc, char **argv, char **azColName) { + int i = 0; + auto file = reinterpret_cast(file_info); + file->name = argv[i++]; + file->hash_key = atoi(argv[i++]); + file->size = atoll(argv[i++]); + file->num_block = atoi(argv[i++]); + file->type = atoi(argv[i++]); + file->replica = atoi(argv[i++]); + file->uploading = atoi(argv[i]); + return 0; +} + +static int block_callback(void *block_info, int argc, char **argv, char **azColName) { + int i = 0; + auto block = reinterpret_cast(block_info); + block->name = argv[i++]; + block->file_name = argv[i++]; + block->seq = atoi(argv[i++]); + block->hash_key = atoi(argv[i++]); + block->size = atoi(argv[i++]); + block->type = atoi(argv[i++]); + block->replica = atoi(argv[i++]); + block->node = argv[i++]; + block->l_node = argv[i] ? argv[i] : "NULL"; + i++; + block->r_node = argv[i] ? argv[i] : "NULL"; + i++; + block->is_committed = argv[i] ? atoi(argv[i]) : 0; + return 0; +} + +static int file_list_callback(void *list, int argc, char **argv, char **azColName) { + auto file_list = reinterpret_cast*>(list); + for (int i=0; ipush_back(tmp_file); + } + return 0; +} + +static int block_list_callback(void *list, int argc, char **argv, char **azColName) { + auto block_list = reinterpret_cast*>(list); + for (int i=0; ipush_back(tmp_block); + } + return 0; +} + +static int exist_callback(void *result, int argc, char **argv, char **azColName) { + *reinterpret_cast(result) = argv[0] ? true : false; + return 0; +} +// }}} +// open {{{ +static sqlite3* open(string path) { + sqlite3* db = NULL; + + int rc; + if ((rc = sqlite3_open_v2(path.c_str(), &db, + SQLITE_OPEN_FULLMUTEX | SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL)) != SQLITE_OK) { + ERROR("Can't open database: %i", rc); + } else { + DEBUG("Opened database successfully"); + } + return db; +} +// }}} + +// contructor {{{ +Directory::Directory() { + path = GET_STR("path.metadata") + "/metadata.db"; +} +// }}} +// query_exec_simple {{{ +bool Directory::query_exec_simple(char* query, int (*fn)(void*,int,char**,char**) = NULL, void* argv = NULL) { + char *zErrMsg = nullptr; + + sqlite3* db = open(path); + int rc = sqlite3_exec(db, query, fn, argv, &zErrMsg); + if (rc != SQLITE_OK) { + ERROR("SQL error: %s", zErrMsg); + sqlite3_free(zErrMsg); + } + sqlite3_close(db); + + return rc; +} +// }}} +// create_tables {{{ +void Directory::create_tables() { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "CREATE TABLE IF NOT EXISTS file_table( \ + name TEXT NOT NULL, \ + hash_key INT NOT NULL, \ + size INT NOT NULL, \ + num_block INT NOT NULL, \ + type INT NOT NULL, \ + replica INT NOT NULL, \ + uploading INT NOT NULL, \ + PRIMARY KEY (name));"); + + if (query_exec_simple(sql)) + DEBUG("file_table created successfully"); + + sprintf(sql, "CREATE TABLE IF NOT EXISTS block_table( \ + name TEXT NOT NULL, \ + file_name TEXT NOT NULL, \ + seq INT NOT NULL, \ + hash_key INT NOT NULL, \ + size INT NOT NULL, \ + type INT NOT NULL, \ + replica INT NOT NULL, \ + node TEXT NOT NULL, \ + l_node TEXT , \ + r_node TEXT , \ + is_committed INT , \ + PRIMARY KEY (name));"); + + if (query_exec_simple(sql)) + DEBUG("block_table created successfully"); +} +// }}} + +// file_table_insert {{{ +void Directory::file_table_insert (FileInfo &file_info) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "INSERT INTO file_table (\ + name, hash_key, size, num_block, type, replica, uploading)\ + VALUES('%s', %" PRIu32 ", %" PRIu64 ", %u, %u, %u, %u);", + file_info.name.c_str(), + file_info.hash_key, + file_info.size, + file_info.num_block, + file_info.type, + file_info.replica, + file_info.uploading); + + if (query_exec_simple(sql)) + DEBUG("file_metadata inserted successfully"); +} +// }}} +// file_table_select {{{ +void Directory::file_table_select(string name, FileInfo *file_info) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "SELECT * from file_table where name='%s';", name.c_str()); + query_exec_simple(sql, file_callback, (void*)file_info); +} +// }}} +// file_table_select_all {{{ +void Directory::file_table_select_all(vector &file_list) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "SELECT * from file_table where uploading=0;"); + query_exec_simple(sql, file_list_callback, (void*)&file_list); +} +// }}} +// file_table_update {{{ +void Directory::file_table_update(string file_name, uint64_t size, uint32_t num_block) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "UPDATE file_table set \ + size=%" PRIu64 ", num_block=%u where name='%s';", + size, num_block, file_name.c_str()); + + if (query_exec_simple(sql)) + DEBUG("file_metadata updated successfully"); + +} +// }}} +// file_table_delete {{{ +void Directory::file_table_delete(string name) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "DELETE from file_table where name='%s';", name.c_str()); + if (query_exec_simple(sql)) + DEBUG("file_metadata deleted successfully"); +} +// }}} +// file_table_exists {{{ +bool Directory::file_table_exists(string name) { + char sql[DEFAULT_QUERY_SIZE]; + bool result = false; + + sprintf(sql, "SELECT name from file_table where name='%s';", name.c_str()); + if (query_exec_simple(sql, exist_callback, &result)) + DEBUG("file_exist executed successfully"); + + return result; +} +// }}} +// file_table_confirm_upload {{{ +void Directory::file_table_confirm_upload (std::string file_name, uint32_t num_block) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "UPDATE file_table set uploading=0, num_block=%u where name='%s';", num_block, + file_name.c_str()); + query_exec_simple(sql); +} +// }}} + +// block_table_insert {{{ +void Directory::block_table_insert(BlockMetadata& metadata) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "INSERT OR REPLACE INTO block_table (\ + name, file_name, seq, hash_key, size, type, replica, node, l_node, r_node, is_committed)\ + VALUES ('%s', '%s', %u, %" PRIu32 ", %" PRIu32 ", %u, %u, '%s', '%s', '%s', %u);", + metadata.name.c_str(), + metadata.file_name.c_str(), + metadata.seq, + metadata.hash_key, + metadata.size, + metadata.type, + metadata.replica, + metadata.node.c_str(), + metadata.l_node.c_str(), + metadata.r_node.c_str(), + metadata.is_committed); + + if (query_exec_simple(sql)) + DEBUG("block_metadata inserted successfully"); +} +// }}} +// block_table_select {{{ +void Directory::block_table_select(string file_name, unsigned int block_seq, BlockInfo *block_info) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "SELECT * from block_table where (file_name='%s') and \ + (seq=%u);", file_name.c_str(), block_seq); + + query_exec_simple(sql, block_callback, (void*)block_info); +} +// }}} +// block_table_select_all {{{ +void Directory::block_table_select_all(vector &block_info) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "SELECT * from block_table;"); + query_exec_simple(sql, block_list_callback, (void*)&block_info); +} +// }}} +// block_table_update {{{ +void Directory::block_table_update(string file_name, uint32_t size, uint32_t seq) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "UPDATE block_table set \ + size=%" PRIu32 " where (file_name='%s') and (seq=%u);", + size, file_name.c_str(), seq); + + if (query_exec_simple(sql)) + DEBUG("block_metadata updated successfully"); +} +// }}} +// block_table_delete {{{ +void Directory::block_table_delete(string file_name, unsigned int seq) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "DELETE from block_table where (file_name='%s') and (seq=%u);", file_name.c_str(), seq); + if (query_exec_simple(sql)) + DEBUG("block_metadata deleted successfully"); +} +// }}} +// block_table_delete_all {{{ +void Directory::block_table_delete_all(string file_name) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "DELETE from block_table where (file_name='%s');", file_name.c_str()); + if (query_exec_simple(sql)) + DEBUG("block_metadata deleted successfully"); +} +// }}} +// select_last_block_metadata {{{ +void Directory::select_last_block_metadata(string file_name, + BlockInfo *block_info) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "SELECT * FROM block_table WHERE (file_name='%s') \ + ORDER BY seq DESC LIMIT 1;", file_name.c_str()); + + if (query_exec_simple(sql, block_callback, (void*)block_info)) + DEBUG("The last block_metadata selected successfully"); +} +// }}} diff --git a/src/fileleader/directory.hh b/src/fileleader/directory.hh new file mode 100644 index 0000000..a078bca --- /dev/null +++ b/src/fileleader/directory.hh @@ -0,0 +1,40 @@ +#pragma once +#include +#include +#include +#include "../messages/blockinfo.hh" +#include "../messages/fileinfo.hh" +#include "blockmetadata.hh" + +namespace eclipse { +using namespace messages; + +class Directory { + public: + Directory(); + void create_tables(); + + void file_table_insert(FileInfo&); + void file_table_select(std::string, FileInfo *); + void file_table_select_all(std::vector&); + void file_table_update(std::string, uint64_t, uint32_t); + void file_table_delete(std::string); + bool file_table_exists(std::string); + void file_table_confirm_upload (std::string, uint32_t); + + void block_table_insert(BlockMetadata&); + void block_table_select(std::string, uint32_t, BlockInfo*); + void block_table_select_all(std::vector&); + void block_table_update(std::string, uint32_t, uint32_t); + void block_table_delete(std::string, uint32_t); + void block_table_delete_all(std::string); + void select_last_block_metadata(std::string, BlockInfo*); + + protected: + bool query_exec_simple(char*, int (*)(void*,int,char**,char**), void*); + + private: + std::string path; +}; + +} diff --git a/src/fileleader/file_leader.cc b/src/fileleader/file_leader.cc new file mode 100644 index 0000000..4c27585 --- /dev/null +++ b/src/fileleader/file_leader.cc @@ -0,0 +1,192 @@ +// includes & usings {{{ +#include "file_leader.hh" +#include "../messages/boost_impl.hh" +#include "../messages/filedescription.hh" + +using namespace eclipse; +using namespace eclipse::messages; +using namespace eclipse::network; +using namespace std; + +// }}} + +// Constructor & destructor {{{ +FileLeader::FileLeader (ClientHandler* net) : Node () { + network = net; + + network_size = context.settings.get("network.nodes").size(); + boundaries.reset( new Histogram {network_size, 100}); + boundaries->initialize(); + + directory.create_tables(); +} + +FileLeader::~FileLeader() { } +// }}} +// file_insert {{{ +//! @attention The block metadata is a proposal, the client +//! might endup with more blocks. +//! @todo fix block shortage in client +//! @todo strategy pattern for scheduling the blocks +unique_ptr FileLeader::file_insert(messages::FileInfo* f) { + directory.file_table_insert(*f); + INFO("Saving file: %s to SQLite db", f->name.c_str()); + + // ip = schedule_block(index, size); + FileDescription* fd = new FileDescription(); + + uint32_t size_per_block = GET_INT("filesystem.block"); + uint32_t n_blocks = static_cast (ceil((double)f->size /(double) size_per_block)); + INFO("%u block to be save for file %s", n_blocks, f->name.c_str()); + fd->name = f->name; + fd->size = f->size; + fd->hash_key = f->hash_key; + + //Compute blocks information + /* + int index = 0; + for (uint32_t i = 0; i < n_blocks; i++) { + auto block_name = f->name + "_" + to_string(i); + uint64_t hash_key = boundaries->random_within_boundaries(index); + fd->blocks.push_back(block_name); + fd->hash_keys.push_back(hash_key); + fd->block_size.push_back(size_per_block); + index = (index + 1) % network_size; + } + */ + + return unique_ptr(fd); +} +// }}} +// file_insert_confirm {{{ +bool FileLeader::file_insert_confirm(messages::FileInfo* f) { + directory.file_table_confirm_upload(f->name, f->num_block); + + for (auto& metadata : f->blocks_metadata) { + directory.block_table_insert(metadata); + } + + replicate_metadata(); + + return true; +} +// }}} +// file_update {{{ +bool FileLeader::file_update(messages::FileUpdate* f) { + if (file_exist(f->name)) { + DEBUG("[file_update] name: %s, size: %lu, num_block: %d", f->name.c_str(), f->size, f->num_block); + + if (f->is_append) { + BlockInfo bi; + directory.select_last_block_metadata(f->name, &bi); + int last_seq = bi.seq; + + for (auto& metadata : f->blocks_metadata) { + metadata.seq = ++last_seq; + directory.block_table_insert(metadata); + } + + FileInfo fi; + directory.file_table_select(f->name, &fi); + directory.file_table_update(f->name, f->size + fi.size, last_seq + 1); + + } else { + directory.file_table_update(f->name, f->size, f->num_block); + for (auto& metadata : f->blocks_metadata) { + directory.block_table_insert(metadata); + } + } + + INFO("Updating to SQLite db"); + return true; + } + + return false; +} +// }}} +// file_delete {{{ +bool FileLeader::file_delete(messages::FileDel* f) { + if (file_exist(f->name)) { + directory.file_table_delete(f->name); + directory.block_table_delete_all(f->name); + replicate_metadata(); + INFO("Removing from SQLite db"); + return true; + } + return false; +} +// }}} +// file_request {{{ +unique_ptr FileLeader::file_request(messages::FileRequest* m) { + string file_name = m->name; + + FileInfo fi; + fi.num_block = 0; + FileDescription* fd = new FileDescription(); + fd->name = file_name; + + directory.file_table_select(file_name, &fi); + fd->uploading = fi.uploading; + + if (fi.uploading == 1) //! Cancel if file is being uploading + return unique_ptr(fd); + + fd->hash_key = fi.hash_key; + fd->replica = fi.replica; + fd->size = fi.size; + fd->num_block = fi.num_block; + + int num_blocks = fi.num_block; + for (int i = 0; i< num_blocks; i++) { + BlockInfo bi; + directory.block_table_select(file_name, i, &bi); + string block_name = bi.name; + fd->blocks.push_back(block_name); + fd->hash_keys.push_back(bi.hash_key); + fd->block_size.push_back(bi.size); + } + + return unique_ptr(fd); +} +// }}} +// list {{{ +bool FileLeader::list (messages::FileList* m) { + directory.file_table_select_all(m->data); + return true; +} +// }}} +// file_exist {{{ +bool FileLeader::file_exist (std::string file_name) { + return directory.file_table_exists(file_name); +} +// }}} +// replicate_metadata {{{ +//! @brief This function replicates to its right and left neighbor +//! node the metadata db. +//! This function is intended to be invoked whenever the metadata db is modified. +void FileLeader::replicate_metadata() { + MetaData md; + md.node = context.settings.getip(); + md.content = local_io.read_metadata(); + + int left_node = ((id - 1) < 0) ? network_size - 1: id - 1; + int right_node = ((id + 1) == network_size) ? 0 : id + 1; + + network->send(left_node, &md); + network->send(right_node, &md); +} +// }}} +// metadata_save {{{ +void FileLeader::metadata_save(MetaData* m) { + std::string file_name = m->node + "_replica"; + local_io.write(file_name, m->content); +} +// }}} +// format {{{ +bool FileLeader::format () { + INFO("Formating DFS"); + local_io.format(); + directory.create_tables(); + return true; +} +// }}} diff --git a/src/fileleader/file_leader.hh b/src/fileleader/file_leader.hh new file mode 100644 index 0000000..81d16e5 --- /dev/null +++ b/src/fileleader/file_leader.hh @@ -0,0 +1,58 @@ +#pragma once + +#include "../nodes/node.hh" +#include "directory.hh" +#include "../blocknode/local_io.hh" +#include "../messages/fileinfo.hh" +#include "../messages/fileupdate.hh" +#include "../messages/filerequest.hh" +#include "../messages/filelist.hh" +#include "../messages/filedel.hh" +#include "../messages/fileexist.hh" +#include "../messages/metadata.hh" +#include "../common/histogram.hh" + +#include + +namespace eclipse { + +using vec_str = std::vector; + +class FileLeader: public Node { + public: + FileLeader(network::ClientHandler*); + ~FileLeader(); + + //! @brief Insert file metadata and compute block metadata. + unique_ptr file_insert(messages::FileInfo*); + + //! Confirm that all the blocks has been uploaded. + bool file_insert_confirm(messages::FileInfo*); + + //! Return a description of the blocks containing the file. + unique_ptr file_request(messages::FileRequest*); + + bool file_delete(messages::FileDel*); + + //! Fill the param with the list of files. + bool list(messages::FileList*); + + bool file_exist(std::string); + + //! Update the file with the new size and blocks. + bool file_update(messages::FileUpdate*); + + void metadata_save(messages::MetaData*); + + //! Remove everything and create tables. + bool format(); + + protected: + void replicate_metadata(); + Directory directory; + Local_io local_io; + std::unique_ptr boundaries; + int network_size; +}; + +} diff --git a/src/fileleader/file_leader_router.cc b/src/fileleader/file_leader_router.cc new file mode 100644 index 0000000..776178c --- /dev/null +++ b/src/fileleader/file_leader_router.cc @@ -0,0 +1,140 @@ +#include "file_leader_router.hh" +#include "../common/context_singleton.hh" +#include "../messages/boost_impl.hh" +#include +#include + +using namespace std; +using namespace eclipse; +namespace ph = std::placeholders; + +// Constructor {{{ +FileLeaderRouter::FileLeaderRouter(FileLeader* fl, Router* router): RouterDecorator(router) { + file_leader = fl; + + using namespace std::placeholders; + using std::placeholders::_1; + using std::placeholders::_2; + auto& rt = routing_table; + rt.insert({"FileInfo", bind(&FileLeaderRouter::insert_file, this, _1, _2)}); + rt.insert({"FileUpdate", bind(&FileLeaderRouter::update_file, this, _1, _2)}); + rt.insert({"FileRequest", bind(&FileLeaderRouter::request_file, this, _1, _2)}); + rt.insert({"FileList", bind(&FileLeaderRouter::request_ls, this, _1, _2)}); + rt.insert({"FileDel", bind(&FileLeaderRouter::delete_file, this, _1, _2)}); + rt.insert({"FileExist", bind(&FileLeaderRouter::file_exist, this, _1, _2)}); + rt.insert({"MetaData", bind(&FileLeaderRouter::replicate_metadata, this, _1, _2)}); + rt.insert({"FormatRequest", bind(&FileLeaderRouter::request_format, this, _1, _2)}); +} +// }}} +// FileInfo* {{{ +void FileLeaderRouter::insert_file (messages::Message* m_, Channel* tcp_connection) { + auto m = dynamic_cast (m_); + INFO("FileInfo received"); + + if (m->uploading == 1) { + auto reply = file_leader->file_insert(m); + tcp_connection->do_write(reply.get()); + + } else if (m->uploading == 0) { + bool ret = file_leader->file_insert_confirm(m); + Reply reply; + + if (ret) { + reply.message = "TRUE"; + + } else { + reply.message = "FALSE"; + } + tcp_connection->do_write(&reply); + } +} +// }}} +// FileUpdate* {{{ +void FileLeaderRouter::update_file (messages::Message* m_, Channel* tcp_connection) { + auto m = dynamic_cast (m_); + INFO ("FileUpdate received"); + + bool ret = file_leader->file_update(m); + Reply reply; + + if (ret) { + reply.message = "OK"; + + } else { + reply.message = "FAIL"; + reply.details = "File doesn't exist"; + } + + tcp_connection->do_write(&reply); +} +// }}} +// {{{ FileDel +void FileLeaderRouter::delete_file (messages::Message* m_, Channel* tcp_connection) { + auto m = dynamic_cast (m_); + INFO ("FileDel received"); + + bool ret = file_leader->file_delete(m); + Reply reply; + + if (ret) { + reply.message = "OK"; + } else { + reply.message = "FAIL"; + reply.details = "File doesn't exist"; + } + + tcp_connection->do_write(&reply); +} +// }}} +// request_file {{{ +void FileLeaderRouter::request_file (messages::Message* m_, Channel* tcp_connection) { + auto m = dynamic_cast (m_); + INFO ("File Info received %s", m->name.c_str()); + + auto fd = file_leader->file_request(m); + tcp_connection->do_write(fd.get()); +} +// }}} +// request_ls {{{ +void FileLeaderRouter::request_ls (messages::Message* m_, Channel* tcp_connection) { + auto m = dynamic_cast (m_); + file_leader->list(m); + tcp_connection->do_write(m); +} +// }}} +// file_exist {{{ +void FileLeaderRouter::file_exist (messages::Message* m_, Channel* tcp_connection) { + auto m = dynamic_cast (m_); + bool ret = file_leader->file_exist(m->name); + Reply reply; + + if (ret) { + reply.message = "TRUE"; + + } else { + reply.message = "FALSE"; + } + tcp_connection->do_write(&reply); +} +// }}} +// replicate_metadata {{{ +void FileLeaderRouter::replicate_metadata(messages::Message* m_, Channel* tcp_connection) { + auto m = dynamic_cast (m_); + file_leader->metadata_save(m); +} +// }}} +// request_format {{{ +void FileLeaderRouter::request_format (messages::Message* m_, Channel* tcp_connection) { + bool ret = file_leader->format(); + Reply reply; + + if (ret) { + reply.message = "OK"; + + } else { + reply.message = "FAIL"; + } + + tcp_connection->do_write(&reply); +} +// }}} diff --git a/src/fileleader/file_leader_router.hh b/src/fileleader/file_leader_router.hh new file mode 100644 index 0000000..fb03a9b --- /dev/null +++ b/src/fileleader/file_leader_router.hh @@ -0,0 +1,26 @@ +#pragma once +#include "../network/router_decorator.hh" +#include "file_leader.hh" + +namespace eclipse { + +//! +class FileLeaderRouter: public RouterDecorator { + public: + FileLeaderRouter(FileLeader*, Router*); + ~FileLeaderRouter() = default; + + protected: + FileLeader* file_leader; + + void insert_file(messages::Message*, Channel*); + void update_file(messages::Message*, Channel*); + void request_file(messages::Message*, Channel*); + void request_ls(messages::Message*, Channel*); + void delete_file(messages::Message*, Channel*); + void file_exist(messages::Message*, Channel*); + void replicate_metadata(messages::Message*, Channel*); + void request_format(messages::Message*, Channel*); +}; + +} /* eclipse */ diff --git a/src/java/jni/velox_DFS.cc b/src/java/jni/velox_DFS.cc new file mode 100644 index 0000000..2cdfbce --- /dev/null +++ b/src/java/jni/velox_DFS.cc @@ -0,0 +1,147 @@ +#include "velox_VeloxDFS.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +/* Local Functions */ +velox::DFS* get_dfs(JNIEnv *env, jobject obj) { + jclass dfs_c = env->GetObjectClass(obj); + jmethodID get_dfs = env->GetMethodID(dfs_c, "getDFS", "()J"); + if(get_dfs == nullptr) return nullptr; + + jlong dfs_ptr = env->CallLongMethod(obj, get_dfs); + return (dfs_ptr == 0) ? nullptr : reinterpret_cast(dfs_ptr); +} + +void string_array_to_vector(JNIEnv *env, jobject obj, jobjectArray inputs, std::vector &vec_str) { + int count = env->GetArrayLength(inputs); + + for (int i=0; iGetObjectArrayElement(inputs, i)); + const char *utf_chars = env->GetStringUTFChars(input, 0); + vec_str.push_back(std::string(utf_chars)); + env->ReleaseStringUTFChars(input, utf_chars); + } +} + +/* + * Class: Velox_DFS + * Method: constructDFS + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_velox_VeloxDFS_constructDFS + (JNIEnv *env, jobject obj) { + velox::DFS *dfs = get_dfs(env, obj); + return reinterpret_cast(((dfs == nullptr) ? new velox::DFS() : dfs)); +} + +/* + * Class: Velox_DFS + * Method: destructDFS + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_velox_VeloxDFS_destructDFS + (JNIEnv *env, jobject obj) { + velox::DFS *dfs = get_dfs(env, obj); + if(dfs != nullptr) delete dfs; +} + +/* + * Class: Velox_DFS + * Method: exists + * Signature: (Ljava/lang/String;)Z + */ +JNIEXPORT jboolean JNICALL Java_velox_VeloxDFS_exists + (JNIEnv *env, jobject obj, jstring input) { + velox::DFS *dfs = get_dfs(env, obj); + const char *file_name = env->GetStringUTFChars(input, 0); + jboolean ret = (jboolean)(dfs->exists(std::string(file_name))); + env->ReleaseStringUTFChars(input, file_name); + return ret; +} + +/* + * Class: Velox_DFS + * Method: touch + * Signature: (Ljava/lang/String;)Z + */ +JNIEXPORT jboolean JNICALL Java_velox_VeloxDFS_touch + (JNIEnv *env, jobject obj, jstring input) { + velox::DFS *dfs = get_dfs(env, obj); + const char *file_name = env->GetStringUTFChars(input, 0); + jboolean ret = (jboolean)(dfs->touch(std::string(file_name))); + env->ReleaseStringUTFChars(input, file_name); + return ret; +} + +/* + * Class: velox_VeloxDFS + * Method: write + * Signature: (Ljava/lang/String;[CII)I + */ +JNIEXPORT jlong JNICALL Java_velox_VeloxDFS_write + (JNIEnv *env, jobject obj, jstring input, jcharArray buf, jlong off, jlong len) { + velox::DFS *dfs = get_dfs(env, obj); + const char* file_name = env->GetStringUTFChars(input, 0); + char c_buf[len]; + env->GetCharArrayRegion(buf, 0, (jsize)len, (jchar*)c_buf); + + std::string file_name_str = std::string(file_name); + + int ret = dfs->write(file_name_str, c_buf, (uint64_t)off, (uint64_t)len); + + env->ReleaseStringUTFChars(input, file_name); + + return ret; +} + +/* + * Class: velox_VeloxDFS + * Method: read + * Signature: (Ljava/lang/String;[CII)I + */ +JNIEXPORT jlong JNICALL Java_velox_VeloxDFS_read + (JNIEnv *env, jobject obj, jstring input, jcharArray buf, jlong off, jlong len) { + velox::DFS *dfs = get_dfs(env, obj); + const char* file_name = env->GetStringUTFChars(input, 0); + char c_buf[len]; + std::string file_name_str = std::string(file_name); + + int ret = dfs->read(file_name_str, c_buf, (uint64_t)off, (uint64_t)len); + + env->SetCharArrayRegion(buf, 0, (jsize)len, (jchar*)c_buf); + + env->ReleaseStringUTFChars(input, file_name); + + return ret; +} + +/* + * Class: velox_VeloxDFS + * Method: getMetadata + * Signature: (Ljava/lang/String;)Lvelox/model/Metadata; + */ +JNIEXPORT jobject JNICALL Java_velox_VeloxDFS_getMetadata + (JNIEnv* env, jobject obj, jstring input) { + velox::DFS *dfs = get_dfs(env, obj); + const char* file_name = env->GetStringUTFChars(input, 0); + + std::string file_name_str(file_name); + + velox::model::metadata md = dfs->get_metadata(file_name_str); + + env->ReleaseStringUTFChars(input, file_name); + + jclass MetadataClass = env->FindClass("Lvelox/model/Metadata;"); + jmethodID init = env->GetMethodID(MetadataClass, "", "(Ljava/lang/String;JJIII)V"); + + return env->NewObject(MetadataClass, init, + env->NewStringUTF(md.name.c_str()), md.hash_key, md.size, md.num_block, md.type, md.replica + ); +} + +#ifdef __cplusplus +} +#endif diff --git a/src/java/jni/velox_VDFS.cc b/src/java/jni/velox_VDFS.cc new file mode 100644 index 0000000..d86116f --- /dev/null +++ b/src/java/jni/velox_VDFS.cc @@ -0,0 +1,134 @@ +#include "velox_VDFS.h" + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +velox::vdfs* get_vdfs(JNIEnv* env, jobject obj) { + jclass vdfs_c = env->GetObjectClass(obj); + jmethodID get_vdfs = env->GetMethodID(vdfs_c, "getVDFS", "()J"); + if(get_vdfs == nullptr) return nullptr; + + jlong vdfs_ptr = env->CallLongMethod(obj, get_vdfs); + return (vdfs_ptr == 0) ? nullptr : reinterpret_cast(vdfs_ptr); +} + +/* + * Class: velox_VDFS + * Method: constructVDFS + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_velox_VDFS_constructVDFS + (JNIEnv* env, jobject obj) { + velox::vdfs* vdfs = get_vdfs(env, obj); + return reinterpret_cast(((vdfs == nullptr) ? new velox::vdfs() : vdfs)); +} + +/* + * Class: velox_VDFS + * Method: destructVDFS + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_velox_VDFS_destructVDFS + (JNIEnv* env, jobject obj) { + velox::vdfs* vdfs = get_vdfs(env, obj); + if(vdfs != nullptr) delete vdfs; +} + +/* + * Class: velox_VDFS + * Method: open + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_velox_VDFS_open + (JNIEnv* env, jobject obj, jstring str) { + velox::vdfs* vdfs = get_vdfs(env, obj); + const char* file_name = env->GetStringUTFChars(str, 0); + jlong fid = (jlong)vdfs->open_file(std::string(file_name)); + env->ReleaseStringUTFChars(str, file_name); + + return fid; +} + +/* + * Class: velox_VDFS + * Method: close + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_velox_VDFS_close + (JNIEnv* env, jobject obj, jlong fid) { + velox::vdfs* vdfs = get_vdfs(env, obj); + return (jboolean)vdfs->close(fid); +} + +/* + * Class: velox_VDFS + * Method: isOpen + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_velox_VDFS_isOpen + (JNIEnv* env, jobject obj, jlong fid) { + velox::vdfs* vdfs = get_vdfs(env, obj); + return (jboolean)vdfs->is_open(fid); +} + +/* + * Class: velox_VDFS + * Method: write + * Signature: (J[BII)J + */ +JNIEXPORT jlong JNICALL Java_velox_VDFS_write + (JNIEnv* env, jobject obj, jlong fid, jbyteArray buf, jlong off, jlong len) { + velox::vdfs* vdfs = get_vdfs(env, obj); + + char buffer[len]; + + env->GetByteArrayRegion(buf, 0, (jsize)len, (jbyte*)buffer); + + return vdfs->write((long)fid, buffer, (uint32_t)off, (uint32_t)len); +} + +/* + * Class: velox_VDFS + * Method: read + * Signature: (J[BII)J + */ +JNIEXPORT jlong JNICALL Java_velox_VDFS_read + (JNIEnv* env, jobject obj, jlong fid, jbyteArray buf, jlong off, jlong len) { + velox::vdfs* vdfs = get_vdfs(env, obj); + + char c_buf[len]; + + uint32_t ret = vdfs->read((long)fid, c_buf, (uint32_t)off, (uint32_t)len); + + env->SetByteArrayRegion(buf, 0, (jsize)len, (jbyte*)c_buf); + + return ret; +} + +/* + * Class: velox_VDFS + * Method: getMetadata + * Signature: (J)Lvelox/model/Metadata; + */ +JNIEXPORT jobject JNICALL Java_velox_VDFS_getMetadata + (JNIEnv* env, jobject obj, jlong fid) { + velox::vdfs* vdfs = get_vdfs(env, obj); + + velox::model::metadata md = vdfs->get_metadata((long)fid); + + jclass MetadataClass = env->FindClass("Lvelox/model/Metadata;"); + jmethodID init = env->GetMethodID(MetadataClass, "", "(Ljava/lang/String;JJIII)V"); + + return env->NewObject(MetadataClass, init, + env->NewStringUTF(md.name.c_str()), md.hash_key, md.size, md.num_block, md.type, md.replica + ); +} + +#ifdef __cplusplus +} +#endif diff --git a/src/java/jni/velox_VDFS.h b/src/java/jni/velox_VDFS.h new file mode 100644 index 0000000..47a8bb5 --- /dev/null +++ b/src/java/jni/velox_VDFS.h @@ -0,0 +1,77 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class velox_VDFS */ + +#ifndef _Included_velox_VDFS +#define _Included_velox_VDFS +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: velox_VDFS + * Method: constructVDFS + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_velox_VDFS_constructVDFS + (JNIEnv *, jobject); + +/* + * Class: velox_VDFS + * Method: destructVDFS + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_velox_VDFS_destructVDFS + (JNIEnv *, jobject); + +/* + * Class: velox_VDFS + * Method: open + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_velox_VDFS_open + (JNIEnv *, jobject, jstring); + +/* + * Class: velox_VDFS + * Method: close + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_velox_VDFS_close + (JNIEnv *, jobject, jlong); + +/* + * Class: velox_VDFS + * Method: isOpen + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_velox_VDFS_isOpen + (JNIEnv *, jobject, jlong); + +/* + * Class: velox_VDFS + * Method: write + * Signature: (J[BJJ)J + */ +JNIEXPORT jlong JNICALL Java_velox_VDFS_write + (JNIEnv *, jobject, jlong, jbyteArray, jlong, jlong); + +/* + * Class: velox_VDFS + * Method: read + * Signature: (J[BJJ)J + */ +JNIEXPORT jlong JNICALL Java_velox_VDFS_read + (JNIEnv *, jobject, jlong, jbyteArray, jlong, jlong); + +/* + * Class: velox_VDFS + * Method: getMetadata + * Signature: (J)Lvelox/model/Metadata; + */ +JNIEXPORT jobject JNICALL Java_velox_VDFS_getMetadata + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/java/jni/velox_VeloxDFS.h b/src/java/jni/velox_VeloxDFS.h new file mode 100644 index 0000000..a815fa9 --- /dev/null +++ b/src/java/jni/velox_VeloxDFS.h @@ -0,0 +1,77 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class velox_VeloxDFS */ + +#ifndef _Included_velox_VeloxDFS +#define _Included_velox_VeloxDFS +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: velox_VeloxDFS + * Method: constructDFS + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_velox_VeloxDFS_constructDFS + (JNIEnv *, jobject); + +/* + * Class: velox_VeloxDFS + * Method: destructDFS + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_velox_VeloxDFS_destructDFS + (JNIEnv *, jobject); + +/* + * Class: velox_VeloxDFS + * Method: format + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_velox_VeloxDFS_format + (JNIEnv *, jobject); + +/* + * Class: velox_VeloxDFS + * Method: exists + * Signature: (Ljava/lang/String;)Z + */ +JNIEXPORT jboolean JNICALL Java_velox_VeloxDFS_exists + (JNIEnv *, jobject, jstring); + +/* + * Class: velox_VeloxDFS + * Method: touch + * Signature: (Ljava/lang/String;)Z + */ +JNIEXPORT jboolean JNICALL Java_velox_VeloxDFS_touch + (JNIEnv *, jobject, jstring); + +/* + * Class: velox_VeloxDFS + * Method: write + * Signature: (Ljava/lang/String;[CJJ)J + */ +JNIEXPORT jlong JNICALL Java_velox_VeloxDFS_write + (JNIEnv *, jobject, jstring, jcharArray, jlong, jlong); + +/* + * Class: velox_VeloxDFS + * Method: read + * Signature: (Ljava/lang/String;[CJJ)J + */ +JNIEXPORT jlong JNICALL Java_velox_VeloxDFS_read + (JNIEnv *, jobject, jstring, jcharArray, jlong, jlong); + +/* + * Class: velox_VeloxDFS + * Method: getMetadata + * Signature: (Ljava/lang/String;)Lvelox/model/Metadata; + */ +JNIEXPORT jobject JNICALL Java_velox_VeloxDFS_getMetadata + (JNIEnv *, jobject, jstring); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/java/velox/VDFS.java b/src/java/velox/VDFS.java new file mode 100644 index 0000000..2ea5ac8 --- /dev/null +++ b/src/java/velox/VDFS.java @@ -0,0 +1,35 @@ +package velox; + +import velox.model.Metadata; + +public class VDFS { + static { + System.loadLibrary("vdfs_jni"); + } + + /* VDFS Object Management */ + private long mVDFS = 0; + + private native long constructVDFS(); + private native void destructVDFS(); + + public VDFS() { + mVDFS = this.constructVDFS(); + } + + public void destroy() { + this.destructVDFS(); + } + + public long getVDFS() { return mVDFS; } + + /* Native Functions for Operations */ + public native long open(String name); + public native boolean close(long fid); + public native boolean isOpen(long fid); + + public native long write(long fid, byte[] buf, long off, long len); + public native long read(long fid, byte[] buf, long off, long len); + + public native Metadata getMetadata(long fid); +} diff --git a/src/java/velox/VeloxDFS.java b/src/java/velox/VeloxDFS.java new file mode 100644 index 0000000..7915c9c --- /dev/null +++ b/src/java/velox/VeloxDFS.java @@ -0,0 +1,35 @@ +package velox; + +import java.lang.String; + +import velox.model.Metadata; + +public class VeloxDFS { + static { + System.loadLibrary("dfs_jni"); + } + + /* DFS Object Management */ + private long mDFS = 0; + + private native long constructDFS(); + private native void destructDFS(); + + public VeloxDFS() { + mDFS = this.constructDFS(); + } + + public void destroy() { + this.destructDFS(); + } + + public long getDFS() { return mDFS; } + + /* Native Functions for Operations */ + public native int format(); + public native boolean exists(String fileName); + public native boolean touch(String fileName); + public native long write(String fileName, char[] buf, long off, long len); + public native long read(String fileName, char[] buf, long off, long len); + public native Metadata getMetadata(String fileName); +} diff --git a/src/java/velox/model/Metadata.java b/src/java/velox/model/Metadata.java new file mode 100644 index 0000000..2397384 --- /dev/null +++ b/src/java/velox/model/Metadata.java @@ -0,0 +1,21 @@ +package velox.model; + +import java.lang.String; + +public class Metadata { + public Metadata(String _name, long _hashKey, long _size, int _numBlock, int _type, int _replica) { + name = _name; + hashKey = _hashKey; + size = _size; + numBlock = _numBlock; + type = _type; + replica = _replica; + }; + + public String name; + public long hashKey; + public long size; + public int numBlock; + public int type; + public int replica; +} diff --git a/src/mapreduce/executor.cc b/src/mapreduce/executor.cc index bcc3482..e377a5e 100644 --- a/src/mapreduce/executor.cc +++ b/src/mapreduce/executor.cc @@ -1,111 +1,175 @@ #include "executor.hh" #include "../common/dl_loader.hh" +#include "../common/histogram.hh" #include "../common/hash.hh" -#include "../mapreduce/messages/key_value_shuffle.h" #include "../mapreduce/output_collection.hh" #include "../mapreduce/fs/ireader.h" #include "../messages/keyvalue.hh" +#include "../client/dfs.hh" +#include #include #include #include #include #include #include +#include +#include +#define MAP_MAX_LINE 10000 using namespace eclipse; using namespace std; namespace eclipse { // Constructor {{{ -Executor::Executor(PeerMR* p) : peer(p) { } +Executor::Executor(TaskExecutor* p) : peer(p) { } Executor::~Executor() { } // }}} // run_map {{{ -bool Executor::run_map (messages::Task* m, std::string input) { - auto path_lib = GET_STR("path.applications"); - auto network_size = GET_VEC_STR("network.nodes").size(); - path_lib += ("/" + m->library); - DL_loader loader (path_lib); - - try { - loader.init_lib(); - } catch (std::exception& e) { - context.logger->error ("Not found library path[%s]", path_lib.c_str()); - } +bool Executor::run_map (messages::Task* m) { + + auto path_lib = GET_STR("path.applications"); + auto network_size = GET_VEC_STR("network.nodes").size(); + auto mappers = GET_INT("mapreduce.mappers"); + path_lib += ("/" + m->library); + DL_loader loader (path_lib); - before_map_t _before_map_ = loader.load_function_before_map("before_map"); - std::unordered_map options; - if(_before_map_ != nullptr) - _before_map_(options); + try { + loader.init_lib(); + } catch (std::exception& e) { + context.logger->error ("Not found library path[%s]", path_lib.c_str()); + } - mapper_t _map_ = loader.load_function(m->func_name); - stringstream ss (input); + before_map_t _before_map_ = loader.load_function_before_map("before_map"); - char next_line[10000]; //! :TODO: change to DFS line limit - velox::OutputCollection results; + vector keys_blocks; - while (!ss.eof()) { - bzero(next_line, 10000); - ss.getline (next_line, 10000); - if (strnlen(next_line, 10000) == 0) - continue; + vector threads; + mutex mut; + INFO("LAunching mapper with %i threads", mappers); + for (int reducer_id = 0; reducer_id < m->blocks.size(); reducer_id++) { - std::string line(next_line); - _map_ (line, results, options); + // Make sure we only execute 'mappers' threads at that time + if (threads.size() >= mappers) { + threads.front().join(); + threads.erase(threads.begin(), threads.begin()+1); } - vector keys_per_node; - vector headers_list; - keys_per_node.resize(network_size); - headers_list.resize(network_size); - - auto run_headers = [&headers_list, &keys_per_node, &network_size](std::string key, std::vector* value) mutable { - int node = h(key) % network_size; - keys_per_node[node]++; - headers_list[node] = key; - }; - results.travel(run_headers); - - int i = 0; - for(unsigned int node = 0; node < network_size; node++) { - if(keys_per_node[node] == 0) continue; - - KeyValueShuffle kv; - kv.job_id_ = m->job_id; // :TODO: - kv.map_id_ = 0; - kv.key_ = headers_list[node]; - kv.is_header = true; - kv.number_of_keys = keys_per_node[node]; - peer->process(&kv); - i++; - } + threads.emplace_back(std::thread([&] (int id) { + std::mt19937 rng; + rng.seed(std::random_device()()); + std::uniform_int_distribution dist(0, INT_MAX); + + std::unordered_map options; + if(_before_map_ != nullptr) + _before_map_(options); + + mapper_t _map_ = loader.load_function(m->func_name); + + const string block_name = m->blocks[id].second; + INFO("Executing map on block: %s", block_name.c_str()); + Local_io local_io; + string input = local_io.read(block_name); + stringstream ss (input); + + char next_line[MAP_MAX_LINE]; + velox::OutputCollection results; + + while (!ss.eof()) { + bzero(next_line, MAP_MAX_LINE); + ss.getline (next_line, MAP_MAX_LINE); + if (strnlen(next_line, MAP_MAX_LINE) == 0) + continue; - auto run_block = [&m, &peer = this->peer](std::string key, std::vector* value) mutable { - KeyValueShuffle kv; - kv.job_id_ = m->job_id; // :TODO: - kv.map_id_ = 0; - kv.key_ = key; - kv.value_ = std::move(*value); - peer->process(&kv); - }; + std::string line(next_line); + _map_ (line, results, options); + } - results.travel(run_block); + map kv_blocks; + try { - after_map_t _after_map_ = loader.load_function_after_map("after_map"); - if(_after_map_ != nullptr) - _after_map_(options); + auto run_block = [&mut, &m, &kv_blocks, &keys_blocks, network_size, &dist, &rng](std::string key, std::vector* value) mutable { + int node = h(key) % network_size; + auto it = kv_blocks.find(node); + if (it == kv_blocks.end()) { + it = kv_blocks.insert({node, {}}).first; - return true; + it->second.node_id = node; + it->second.job_id_ = m->job_id; + it->second.map_id_ = 0; + + uint32_t random_id = dist(rng); + mut.lock(); + keys_blocks.push_back(node); + mut.unlock(); + it->second.kv_id = random_id; + } + + it->second.kv_pairs.insert({key, std::move(*value)}); + }; + results.travel(run_block); + + vector shuffled_array; + + for (int i = 0; i < network_size; i++) + shuffled_array.push_back(i); + + auto engine = std::default_random_engine{}; + std::shuffle(shuffled_array.begin(), shuffled_array.end(), engine); + + + for (auto& index: shuffled_array) { + mut.lock(); + auto it = kv_blocks.find(index); + if (it != kv_blocks.end()) { + peer->insert_key_value(&(it->second)); + } + mut.unlock(); + } + + + after_map_t _after_map_ = loader.load_function_after_map("after_map"); + if(_after_map_ != nullptr) + _after_map_(options); + INFO("MAP thread finishing"); + + } catch (exception& e) { + INFO("Mapper exception %s", e.what()); + } catch (boost::exception& e) { + INFO("Mapper exception %s", diagnostic_information(e).c_str()); + } + + + }, reducer_id)); } + + try { + for (auto& thread : threads) + thread.join(); + + peer->notify_map_is_finished(m->job_id, keys_blocks); + } catch (exception& e) { + INFO("Mapper parent exception %s", e.what()); + } catch (boost::exception& e) { + INFO("Mapper parent exception %s", diagnostic_information(e).c_str()); + } + return true; +} // }}} // run_reduce {{{ bool Executor::run_reduce (messages::Task* task) { auto path_lib = context.settings.get("path.applications"); auto block_size = GET_INT("filesystem.block"); + //auto reducer_slot = GET_INT("mapreduce.reduce_slot"); path_lib += ("/" + task->library); DL_loader loader (path_lib); + auto network_size = GET_VEC_STR("network.nodes").size(); + Histogram boundaries(network_size, 100); + boundaries.initialize(); + velox::model::metadata metadata; + try { loader.init_lib(); } catch (std::exception& e) { @@ -114,110 +178,106 @@ bool Executor::run_reduce (messages::Task* task) { reducer_t _reducer_ = loader.load_function_reduce(task->func_name); - try { - IReader ireader; - ireader.set_job_id(task->job_id); - ireader.set_map_id(0); // :TODO: - ireader.set_reducer_id(0); - ireader.init(); - - uint32_t iterations = 0; - uint32_t total_size = 0; - std::string block_content; - - std::list values; - while (ireader.is_next_key()) { - string key; - ireader.get_next_key(key); - - //int total_iterations = 0; - velox::OutputCollection output; - values.clear(); - - //if (ireader.is_next_value()) { - //ireader.get_next_value(last_output); - //total_iterations = 1; + uint32_t total_size = 0; + uint32_t num_keys = 0; + vector threads; + DirectoryMR directory; + uint32_t reducer_slot = directory.select_number_of_reducers(task->job_id); + mutex mut; + DEBUG("LAunching reducer with %i threads", reducer_slot); + for (int reducer_id = 0; reducer_id < reducer_slot; reducer_id++) { + threads.push_back(std::thread([&] (int id) { + DEBUG("%i %i", task->job_id, id); + + IReader ireader; + ireader.set_job_id(task->job_id); + ireader.set_map_id(0); // :TODO: + ireader.set_reducer_id(id); + + mut.lock(); + ireader.init(); + mut.unlock(); + std::string block_content; + + std::vector values; + while (ireader.is_next_key()) { + string key; + ireader.get_next_key(key); + + mut.lock(); + DEBUG("PROCCESSING KEY %s", key.c_str()); + mut.unlock(); + + velox::OutputCollection output; + values.clear(); //TODO: make a function to get values at a time while (ireader.is_next_value()) { string value; ireader.get_next_value(value); values.push_back(value); - - //total_iterations++; } + DEBUG("RUNNING REDUCER %s", key.c_str()); + + if(values.size() > 0) { + try { + _reducer_ (key, values, output); + + } catch (std::exception& e) { + ERROR("Error in the executer: %s", e.what()); + exit(EXIT_FAILURE); + } + } else + INFO("REDUCER skipping a KEY"); + + std::string current_block_content = ""; + + auto make_block_content = [&] (std::string key, std::vector* values) mutable { + + for(std::string& value : *values) { + current_block_content += key + ": " + value + "\n"; + num_keys++; + } + }; + + output.travel(make_block_content); - if(values.size() > 0) - _reducer_ (key, values, output); - //} - - std::string current_block_content; - auto make_block_content = [¤t_block_content](std::string key, std::vector* values) mutable { - for(std::string& value : *values) - current_block_content += key + ": " + value + "\n"; - }; - - output.travel(make_block_content); - - //INFO("Key %s #iterations: %i", key.c_str(), total_iterations); - if (block_content.length() + current_block_content.length() > (uint32_t)block_size || current_block_content.length() > (uint32_t)block_size) { - BlockInfo bi; - bi.file_name = task->file_output; - bi.name = task->file_output + "-" + key.c_str(); - bi.seq = iterations; - bi.hash_key = h(bi.name); - bi.size = block_content.length(); - bi.content = block_content; - bi.replica = 1; - bi.node = ""; - bi.l_node = ""; - bi.r_node = ""; - bi.is_committed = 1; - - dynamic_cast(peer)->submit_block(&bi); - iterations++; - total_size += block_content.length(); - block_content = ""; - - } else if (!ireader.is_next_key()) { block_content += current_block_content; - BlockInfo bi; - bi.file_name = task->file_output; - bi.name = task->file_output + "-" + key.c_str(); - bi.seq = iterations; - bi.hash_key = h(bi.name); - bi.size = block_content.length(); - bi.content = block_content; - bi.replica = 1; - bi.node = ""; - bi.l_node = ""; - bi.r_node = ""; - bi.is_committed = 1; - - dynamic_cast(peer)->submit_block(&bi); - iterations++; - total_size += block_content.length(); - block_content = ""; - } + mut.lock(); + total_size += current_block_content.length(); + mut.unlock(); - block_content += current_block_content; - } - FileInfo fi; - fi.name = task->file_output; - fi.num_block = iterations; - fi.size = total_size; - fi.hash_key = h(fi.name); - fi.replica = 1; - fi.reducer_output = true; - fi.job_id = task->job_id; + if (block_content.length() > (uint32_t)block_size || !ireader.is_next_key()) { + string name = task->file_output + "-" + key; - dynamic_cast(peer)->process(&fi); + mut.lock(); + metadata.blocks.push_back(name); + metadata.hash_keys.push_back(boundaries.random_within_boundaries(peer->get_id())); + metadata.block_size.push_back(block_content.length()); + mut.unlock(); - } catch (std::exception& e) { - context.logger->error ("Error in the executer: %s", e.what()); - exit(EXIT_FAILURE); + INFO("REDUCER SAVING TO DISK %s : %lu B", name.c_str(), block_content.size()); + Local_io local_io; + local_io.write(name, block_content); + + block_content.clear(); + } + + } + }, reducer_id)); } + + for (auto& thread : threads) + thread.join(); + + velox::DFS dfs; + INFO("REDUCER APPENDING FILE_METADATA KP:%u", num_keys); + dfs.file_metadata_append(task->file_output, total_size, metadata); + + peer->notify_task_leader(task->leader, task->job_id, "REDUCE"); + + return true; } // }}} diff --git a/src/mapreduce/executor.hh b/src/mapreduce/executor.hh index 0927ca8..ad2d7c2 100644 --- a/src/mapreduce/executor.hh +++ b/src/mapreduce/executor.hh @@ -1,6 +1,6 @@ #pragma once -#include "nodes/peermr.h" -#include "../messages/task.hh" +#include "task_executor.hh" +#include "messages/task.hh" #include namespace eclipse { @@ -8,14 +8,14 @@ class Executor { //typedef std::pair (*maptype)(std::string); //typedef std::string (*reducetype)(std::string, std::string); public: - Executor (PeerMR*); + Executor (TaskExecutor*); ~Executor (); - bool run_map (messages::Task*, std::string); + bool run_map (messages::Task*); bool run_reduce (messages::Task*); protected: - PeerMR* peer; + TaskExecutor* peer; }; } /* eclipse */ diff --git a/src/mapreduce/fs/directorymr.cc b/src/mapreduce/fs/directorymr.cc index e9edc83..572bf18 100644 --- a/src/mapreduce/fs/directorymr.cc +++ b/src/mapreduce/fs/directorymr.cc @@ -8,245 +8,186 @@ #include "../messages/idatainsert.hh" #include "../messages/igroupinsert.hh" #include "../messages/iblockinsert.hh" -#include "../../common/context.hh" +#include "../common/context_singleton.hh" +#include +#include +#include + +#define DEFAULT_QUERY_SIZE 512 + +using namespace std; namespace eclipse { -void DirectoryMR::init_db() { - open_db(); +static int idata_callback(void *idata_info, int argc, + char **argv, char **azColName) +{ + auto idata = reinterpret_cast(idata_info); + idata->job_id = atoi(argv[0]); + idata->map_id = atoi(argv[1]); + idata->num_reducer = atoi(argv[2]); + return 0; +} + +static int idata_list_callback(void *list, int argc, + char **argv, char **azColName) +{ + auto idata_list = reinterpret_cast (list); + for (int i = 0; i < argc; i++) { + IDataInfo tmp_idata; + tmp_idata.job_id = atoi(argv[i++]); + tmp_idata.map_id = atoi(argv[i++]); + tmp_idata.num_reducer = atoi(argv[i]); + idata_list->data.push_back(tmp_idata); + } + return 0; +} + +static int igroup_callback(void *igroup_info, int argc, char **argv, + char **azColName) { + auto igroup = reinterpret_cast(igroup_info); + igroup->job_id = atoi(argv[0]); + igroup->map_id = atoi(argv[1]); + igroup->reducer_id = atoi(argv[2]); + igroup->num_block = atoi(argv[3]); + return 0; +} +static int iblock_callback(void *iblock_info, int argc, char **argv, + char **azColName) { + auto iblock = reinterpret_cast(iblock_info); + iblock->job_id = atoi(argv[0]); + iblock->map_id = atoi(argv[1]); + iblock->reducer_id = atoi(argv[2]); + iblock->block_seq = atoi(argv[3]); + return 0; +} + +void DirectoryMR::create_tables() { + char sql[DEFAULT_QUERY_SIZE]; // Create SQL statement of IData - sprintf(sql, "CREATE TABLE idata_table( \ + sprintf(sql, "CREATE TABLE IF NOT EXISTS idata_table( \ job_id INT NOT NULL, \ map_id INT NOT NULL, \ num_reducer INT NOT NULL, \ PRIMARY KEY (job_id, map_id));"); + // Execute SQL statement - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s", zErrMsg); - sqlite3_free(zErrMsg); - } else { + if(Directory::query_exec_simple(sql, NULL, NULL)) DEBUG("idata_table created successfully"); - } + // Create SQL statement of IGroup - sprintf(sql, "CREATE TABLE igroup_table( \ + sprintf(sql, "CREATE TABLE IF NOT EXISTS igroup_table( \ job_id INT NOT NULL, \ map_id INT NOT NULL, \ reducer_id INT NOT NULL, \ num_block INT NOT NULL, \ PRIMARY KEY (job_id, map_id, reducer_id));"); + // Execute SQL statement - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { + if(Directory::query_exec_simple(sql, NULL, NULL)) DEBUG("igroup_table created successfully"); - } + // Create SQL statement of IBlock - sprintf(sql, "CREATE TABLE iblock_table( \ + sprintf(sql, "CREATE TABLE IF NOT EXISTS iblock_table( \ job_id INT NOT NULL, \ map_id INT NOT NULL, \ reducer_id INT NOT NULL, \ block_seq INT NOT NULL, \ PRIMARY KEY (job_id, map_id, reducer_id, block_seq));"); - // Execute SQL statement - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s", zErrMsg); - sqlite3_free(zErrMsg); - } else { + + if(Directory::query_exec_simple(sql, NULL, NULL)) DEBUG("iblock_table created successfully"); - } - sqlite3_close(db); } void DirectoryMR::insert_idata_metadata(IDataInsert idata_insert) { - // Open database - open_db(); - mutex.lock(); + char sql[DEFAULT_QUERY_SIZE]; // Create sql statement sprintf(sql, "INSERT INTO idata_table (\ job_id, map_id, num_reducer) \ VALUES (%" PRIu32 ", %" PRIu32 ", %" PRIu32 ");", idata_insert.job_id, idata_insert.map_id, idata_insert.num_reducer); - // Execute SQL statement - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } - // Close Database - sqlite3_close(db); - mutex.unlock(); + + Directory::query_exec_simple(sql, NULL, NULL); } void DirectoryMR::insert_igroup_metadata(IGroupInsert igroup_insert) { - // Open database - open_db(); - mutex.lock(); - // Create sql statement + char sql[DEFAULT_QUERY_SIZE]; + sprintf(sql, "INSERT INTO igroup_table (\ job_id, map_id, reducer_id, num_block) \ VALUES (%" PRIu32 ", %" PRIu32 ", %" PRIu32 ", %" PRIu32 ");", igroup_insert.job_id, igroup_insert.map_id, igroup_insert.reducer_id, igroup_insert.num_block); - // Execute SQL statement - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } - // Close Database - sqlite3_close(db); - mutex.unlock(); + + Directory::query_exec_simple(sql, NULL, NULL); } void DirectoryMR::insert_iblock_metadata(IBlockInsert iblock_insert) { - // Open database - open_db(); - mutex.lock(); - // Create sql statement + char sql[DEFAULT_QUERY_SIZE]; + sprintf(sql, "INSERT INTO iblock_table (\ job_id, map_id, reducer_id, block_seq) \ VALUES (%" PRIu32 ", %" PRIu32 ", %" PRIu32 ", %" PRIu32 ");", iblock_insert.job_id, iblock_insert.map_id, iblock_insert.reducer_id, iblock_insert.block_seq); - // Execute SQL statement - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } - // Close Database - sqlite3_close(db); - mutex.unlock(); -} -int DirectoryMR::idata_callback(void *idata_info, int argc, char **argv, char **azColName) -{ - auto idata = reinterpret_cast(idata_info); - idata->job_id = atoi(argv[0]); - idata->map_id = atoi(argv[1]); - idata->num_reducer = atoi(argv[2]); - return 0; -} - -int DirectoryMR::idata_list_callback(void *list, int argc, char **argv, char **azColName) -{ - auto idata_list = reinterpret_cast (list); - for (int i = 0; i < argc; i++) { - IDataInfo tmp_idata; - tmp_idata.job_id = atoi(argv[i++]); - tmp_idata.map_id = atoi(argv[i++]); - tmp_idata.num_reducer = atoi(argv[i]); - idata_list->data.push_back(tmp_idata); - } - return 0; -} - -int DirectoryMR::igroup_callback(void *igroup_info, int argc, char **argv, - char **azColName) { - auto igroup = reinterpret_cast(igroup_info); - igroup->job_id = atoi(argv[0]); - igroup->map_id = atoi(argv[1]); - igroup->reducer_id = atoi(argv[2]); - igroup->num_block = atoi(argv[3]); - return 0; -} -int DirectoryMR::iblock_callback(void *iblock_info, int argc, char **argv, - char **azColName) { - auto iblock = reinterpret_cast(iblock_info); - iblock->job_id = atoi(argv[0]); - iblock->map_id = atoi(argv[1]); - iblock->reducer_id = atoi(argv[2]); - iblock->block_seq = atoi(argv[3]); - return 0; + Directory::query_exec_simple(sql, NULL, NULL); } void DirectoryMR::select_idata_metadata(uint32_t job_id, uint32_t map_id, IDataInfo *idata_info) { - // Open database - open_db(); - mutex.lock(); - // Create sql statement + char sql[DEFAULT_QUERY_SIZE]; + sprintf(sql, "SELECT * from idata_table where job_id=%" PRIu32 " and \ map_id=%" PRIu32 ";", job_id, map_id); - // Execute SQL statement - rc = sqlite3_exec(db, sql, idata_callback, (void*)idata_info, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { + + if (Directory::query_exec_simple(sql,idata_callback, (void*)idata_info)) DEBUG("idata_metadata selected successfully\n"); - } - // Close Database - sqlite3_close(db); - mutex.unlock(); } -void DirectoryMR::select_all_idata_metadata(IDataList &idata_list) -{ - // open database - open_db(); - mutex.lock(); - // create sql statement +void DirectoryMR::select_all_idata_metadata(IDataList &idata_list) { + char sql[DEFAULT_QUERY_SIZE]; + sprintf(sql, "SELECT * from idata_table;"); - // execute sql statement - rc = sqlite3_exec(db, sql, idata_list_callback, (void*)&idata_list, &zErrMsg); - if (rc != SQLITE_OK) - { - context.logger -> error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } - else - { - DEBUG("idata_metadata selected successfully\n"); - } - // close database - sqlite3_close(db); - mutex.unlock(); + + if (Directory::query_exec_simple(sql,idata_list_callback, (void*)&idata_list)) + DEBUG("idata_metadata selected successfully"); } void DirectoryMR::select_igroup_metadata(uint32_t job_id, uint32_t map_id, uint32_t reducer_id, IGroupInfo *igroup_info) { - // Open database - open_db(); - mutex.lock(); - // Create sql statement + char sql[DEFAULT_QUERY_SIZE]; + sprintf(sql, "SELECT * from igroup_table where job_id=%" PRIu32 " and \ map_id=%" PRIu32 " and reducer_id=%" PRIu32 ";", job_id, map_id, reducer_id); - // Execute SQL statement - rc = sqlite3_exec(db, sql, igroup_callback, (void*)igroup_info, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - DEBUG("igroup_metadata selected successfully\n"); - } - // Close Database - sqlite3_close(db); - mutex.unlock(); + + if (Directory::query_exec_simple(sql, igroup_callback, (void*)igroup_info)) + DEBUG("igroup_metadata selected successfully"); } void DirectoryMR::select_iblock_metadata(uint32_t job_id, uint32_t map_id, uint32_t reducer_id, uint32_t block_seq, IBlockInfo *iblock_info) { - // Open database - open_db(); - mutex.lock(); - // Create sql statement + char sql[DEFAULT_QUERY_SIZE]; + sprintf(sql, "SELECT * from iblock_table where job_id=%" PRIu32 " and \ map_id=%" PRIu32 " and reducer_id=%" PRIu32 " and block_seq=%" PRIu32 ";", job_id, map_id, reducer_id, block_seq); - // Execute SQL statement - rc = sqlite3_exec(db, sql, iblock_callback, (void*)iblock_info, &zErrMsg); - if(rc != SQLITE_OK) { - ERROR("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - DEBUG("iblock_metadata selected successfully\n"); - } - // Close Database - sqlite3_close(db); - mutex.unlock(); + + if (Directory::query_exec_simple(sql, iblock_callback, (void*)iblock_info)) + DEBUG("iblock_metadata selected successfully"); } +uint32_t DirectoryMR::select_number_of_reducers(uint32_t job_id) { + char sql[DEFAULT_QUERY_SIZE]; + + sprintf(sql, "SELECT * FROM iblock_table WHERE (job_id='%" PRIu32 "') \ + ORDER BY reducer_id DESC LIMIT 1;", job_id); + + uint32_t ret = 0; + Directory::query_exec_simple(sql, [] (void* output, int, char **argv, char**) { + *reinterpret_cast(output) = atoi(argv[2]); + return 0; + }, (void*)&ret); + return ret + 1; +} + + } // namespace eclipse diff --git a/src/mapreduce/fs/directorymr.hh b/src/mapreduce/fs/directorymr.hh index 60f352d..dc42342 100644 --- a/src/mapreduce/fs/directorymr.hh +++ b/src/mapreduce/fs/directorymr.hh @@ -1,6 +1,6 @@ #pragma once #include -#include "../../nodes/directory.hh" +#include "../../fileleader/directory.hh" #include "../messages/idatainfo.hh" #include "../messages/igroupinfo.hh" #include "../messages/iblockinfo.hh" @@ -12,29 +12,23 @@ namespace eclipse { class DirectoryMR: public Directory { - public: - void init_db(); - void insert_idata_metadata(IDataInsert idata_insert); - void insert_igroup_metadata(IGroupInsert igroup_insert); - void insert_iblock_metadata(IBlockInsert iblock_insert); - void select_idata_metadata(uint32_t job_id, uint32_t map_id, - IDataInfo *idata_info); - void select_all_idata_metadata(IDataList &idata_list); - void select_igroup_metadata(uint32_t job_id, uint32_t map_id, - uint32_t reducer_id, IGroupInfo *igroup_info); - void select_iblock_metadata(uint32_t job_id, uint32_t map_id, - uint32_t reducer_id, uint32_t block_seq, IBlockInfo *iblock_info); + public: + void create_tables(); + void insert_idata_metadata(IDataInsert idata_insert); + void insert_igroup_metadata(IGroupInsert igroup_insert); + void insert_iblock_metadata(IBlockInsert iblock_insert); + void select_idata_metadata(uint32_t job_id, uint32_t map_id, + IDataInfo *idata_info); + void select_all_idata_metadata(IDataList &idata_list); + void select_igroup_metadata(uint32_t job_id, uint32_t map_id, + uint32_t reducer_id, IGroupInfo *igroup_info); + void select_iblock_metadata(uint32_t job_id, uint32_t map_id, + uint32_t reducer_id, uint32_t block_seq, IBlockInfo *iblock_info); + uint32_t select_number_of_reducers(uint32_t job_id); - protected: - static int idata_callback(void *idata_info, int argc, char **argv, - char **azColName); - static int idata_list_callback(void *list, int argc, char **argv, - char **azColName); - static int igroup_callback(void *igroup_info, int argc, char **argv, - char **azColName); - static int iblock_callback(void *iblock_info, int argc, char **argv, - char **azColName); + private: + using Directory::query_exec_simple; }; } diff --git a/src/mapreduce/fs/ireader.cc b/src/mapreduce/fs/ireader.cc index 9a13806..f609cd6 100644 --- a/src/mapreduce/fs/ireader.cc +++ b/src/mapreduce/fs/ireader.cc @@ -8,7 +8,7 @@ #include #include #include -#include "../../common/context.hh" +#include "../../common/context_singleton.hh" #include "../../messages/factory.hh" #include "../../messages/message.hh" #include "../messages/igroupinforequest.hh" @@ -185,15 +185,19 @@ bool IReader::ShiftToNextKey() { bool IReader::LoadKey(const int &index) { // Make sure you are not in the middle of the values. if (blocks_[index]->eof()) { -std::cout << "!!!! eof() works well!" << std::endl; + std::cout << "!!!! eof() works well!" << std::endl; return false; } + getline(*blocks_[index], loaded_keys_[index]); string num_value; getline(*blocks_[index], num_value); -if (num_value == "") return false; + + if (num_value == "") return false; + num_remain_[index] = stoi(num_value); key_order_.emplace(loaded_keys_[index], index); + return true; } bool IReader::LoadValue(const int &index) { diff --git a/src/mapreduce/fs/ireader.h b/src/mapreduce/fs/ireader.h index ea9b5c8..ef92730 100644 --- a/src/mapreduce/fs/ireader.h +++ b/src/mapreduce/fs/ireader.h @@ -35,13 +35,13 @@ class IReader: public IReader_interface { bool get_next_value(string &value) override; bool is_next_key() override; bool is_next_value() override; + uint32_t get_num_block(); private: // tcp::socket* connect(uint32_t net_id); // void send_message(tcp::socket *socket, messages::Message *msg); // messages::IGroupInfo* read_igroup_info(tcp::socket *socket); // messages::IBlockInfo* read_iblock_info(tcp::socket *socket); - uint32_t get_num_block(); void SetNext(); void SetNextAsCurrent(); bool ShiftToNextKey(); diff --git a/src/mapreduce/fs/iwriter.cc b/src/mapreduce/fs/iwriter.cc index 6ceb36a..f46b941 100644 --- a/src/mapreduce/fs/iwriter.cc +++ b/src/mapreduce/fs/iwriter.cc @@ -10,7 +10,7 @@ #include #include #include -#include "../../common/context.hh" +#include "../../common/context_singleton.hh" #include "../../messages/message.hh" #include "../../messages/factory.hh" #include "../../messages/reply.hh" @@ -155,7 +155,7 @@ void IWriter::add_key_value(const string &key, const string &value) { } else { new_size = get_block_size(index) + value.length() + 1; } - block->insert({key, value}); + block->insert({key, std::move(value)}); set_block_size(index, new_size); if (new_size > iblock_size_) { diff --git a/src/mapreduce/job.cc b/src/mapreduce/job.cc deleted file mode 100644 index cc94cac..0000000 --- a/src/mapreduce/job.cc +++ /dev/null @@ -1,94 +0,0 @@ -#include "job.hh" -#include "task.hh" -#include -#include -#include - -using namespace eclipse; - -int Job::id_counter_ = 0; - -Job::Job() { - id_ = id_counter_++; - tot_num_map_slot_ = DEFAULT_TNMS; - tot_num_red_slot_ = DEFAULT_TNRS; - num_map_slot_ = DEFAULT_NMS; - num_red_slot_= DEFAULT_NRS; - num_map_task_ = 0; - num_red_task_ = 0; -} -Job::Job(std::string app_path) { - Job(); - SetApplicationPath(app_path); -} -Job::~Job() { -} -int Job::GetId() { - return id_; -} -void Job::SetApplicationPath(std::string app_path) { - app_path_ = app_path; -} -std::string Job::GetApplicationPath() { - return app_path_; -} -void Job::SetTotalNumMapSlot(int tot_num_map_slot) { - tot_num_map_slot_ = tot_num_map_slot; -} -int Job::GetTotalNumMapSlot() { - return tot_num_map_slot_; -} -void Job::SetTotalNumReduceSlot(int tot_num_red_slot) { - tot_num_red_slot_ = tot_num_red_slot; -} -int Job::GetTotalNumReduceSlot() { - return tot_num_red_slot_; -} -void Job::SetNumMapSlot(int num_map_slot) { - num_map_slot_ = num_map_slot; -} -int Job::GetNumMapSlot() { - return num_map_slot_; -} -void Job::SetNumReduceSlot(int num_red_slot) { - num_red_slot_ = num_red_slot; -} -int Job::GetNumReduceSlot() { - return num_red_slot_; -} -void Job::AddTask(Task* task) { - if (task->GetType() == kMap) { - map_tasks_[num_map_task_++] = task; - } - else if (task->GetType() == kReduce) { - red_tasks_[num_red_task_++] = task; - } - else { - std::cout << "[ERROR] type of task has not been setted." << std::endl; - exit(1); - } -} -void Job::AddTask(TaskType type, std::string input_path) { - Task* task = new Task(type, input_path); - AddTask(task); -} -Task* Job::GetTask(TaskType type, int index) { - if (type == kMap) { - if (index >= num_map_task_ || index < 0) { - std::cout << "[ERROR] index is invalid for GetTask()" << std::endl; - exit(1); - } - return map_tasks_[index]; - } - else if (type == kReduce) { - if (index >= num_red_task_ || index < 0) { - std::cout << "[ERROR] index is invalid for GetTask()" << std::endl; - exit(1); - } - return red_tasks_[index]; - } - else { - std::cout << "[ERROR] type is invalid for GetTask()" << std::endl; - exit(1); - } -} diff --git a/src/mapreduce/job.hh b/src/mapreduce/job.hh deleted file mode 100644 index 042d2e2..0000000 --- a/src/mapreduce/job.hh +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once -#include "task.hh" -#include -#include - -#define DEFAULT_TNMS 8*16; -#define DEFAULT_TNRS 8*16; -#define DEFAULT_NMS 8*16; -#define DEFAULT_NRS 8*16; - -namespace eclipse { - -class Job { - private: - int id_; - static int id_counter_; - std::string app_path_; - // The total number of worker thread allowed at a time over the whole - // system. - // Default is the total number of available worker thread over the whole - // system. - int tot_num_map_slot_; - int tot_num_red_slot_; - // The number of woker thread allowed at a time within a node. - // Default is the total number of available worker thread within a node. - int num_map_slot_; - int num_red_slot_; - // The total number of map/reduce tasks. - int num_map_task_; - int num_red_task_; - std::vector map_tasks_; - std::vector red_tasks_; - - public: - Job(); - Job(std::string app_path); - ~Job(); - int GetId(); - void SetApplicationPath(std::string app_path); - std::string GetApplicationPath(); - void SetTotalNumMapSlot(int tot_num_map_slot); - int GetTotalNumMapSlot(); - void SetTotalNumReduceSlot(int tot_num_red_slot); - int GetTotalNumReduceSlot(); - void SetNumMapSlot(int num_map_slot); - int GetNumMapSlot(); - void SetNumReduceSlot(int num_red_slot); - int GetNumReduceSlot(); - void AddTask(Task* task); - void AddTask(TaskType type, std::string input_path); - Task* GetTask(TaskType type, int index); -}; - -} diff --git a/src/mapreduce/messages/boost_impl.cc b/src/mapreduce/messages/boost_impl.cc new file mode 100644 index 0000000..a36e907 --- /dev/null +++ b/src/mapreduce/messages/boost_impl.cc @@ -0,0 +1,290 @@ +#include "boost_impl.hh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using eclipse::messages::Message; + +namespace boost { +namespace serialization { + +template + void serialize (Archive& ar, eclipse::messages::Task& c, unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.subjob_id); + ar & BOOST_SERIALIZATION_NVP(c.id); + ar & BOOST_SERIALIZATION_NVP(c.type); + ar & BOOST_SERIALIZATION_NVP(c.library); + ar & BOOST_SERIALIZATION_NVP(c.input_path); + ar & BOOST_SERIALIZATION_NVP(c.func_name); + ar & BOOST_SERIALIZATION_NVP(c.file_output); + ar & BOOST_SERIALIZATION_NVP(c.blocks); + ar & BOOST_SERIALIZATION_NVP(c.leader); + ar & BOOST_SERIALIZATION_NVP(c.func_body); + ar & BOOST_SERIALIZATION_NVP(c.lang); + } + +template + void serialize(Archive& ar, eclipse::messages::IDataInsert& c, unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.map_id); + ar & BOOST_SERIALIZATION_NVP(c.num_reducer); + } + +template + void serialize(Archive& ar, eclipse::messages::IGroupInsert& c, unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.map_id); + ar & BOOST_SERIALIZATION_NVP(c.reducer_id); + ar & BOOST_SERIALIZATION_NVP(c.num_block); + } + +template + void serialize(Archive& ar, eclipse::messages::IBlockInsert& c, unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.map_id); + ar & BOOST_SERIALIZATION_NVP(c.reducer_id); + ar & BOOST_SERIALIZATION_NVP(c.block_seq); + } + +template + void serialize (Archive& ar, eclipse::messages::IDataList& c, unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.data); + + } + +template + void serialize (Archive& ar, eclipse::messages::IDataInfo& c, unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.map_id); + ar & BOOST_SERIALIZATION_NVP(c.num_reducer); + } + +template + void serialize(Archive& ar, eclipse::messages::IDataInfoRequest& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.map_id); + } + +template + void serialize(Archive& ar, eclipse::messages::IGroupInfoRequest& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.map_id); + ar & BOOST_SERIALIZATION_NVP(c.reducer_id); + } + +template + void serialize(Archive& ar, eclipse::messages::IBlockInfoRequest& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.map_id); + ar & BOOST_SERIALIZATION_NVP(c.reducer_id); + ar & BOOST_SERIALIZATION_NVP(c.block_seq); + } + +template + void serialize(Archive& ar, eclipse::messages::KeyValueShuffle& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id_); + ar & BOOST_SERIALIZATION_NVP(c.map_id_); + ar & BOOST_SERIALIZATION_NVP(c.kv_pairs); + ar & BOOST_SERIALIZATION_NVP(c.number_of_keys); + ar & BOOST_SERIALIZATION_NVP(c.kv_id); + ar & BOOST_SERIALIZATION_NVP(c.node_id); + ar & BOOST_SERIALIZATION_NVP(c.origin_id); + } + +template + void serialize(Archive& ar, eclipse::messages::FinishShuffle& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id_); + ar & BOOST_SERIALIZATION_NVP(c.map_id_); + ar & BOOST_SERIALIZATION_NVP(c.node_id); + ar & BOOST_SERIALIZATION_NVP(c.kv_id); + } + +template + void serialize(Archive& ar, eclipse::messages::TaskStatus& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.is_success); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.saved_keys); + ar & BOOST_SERIALIZATION_NVP(c.shuffled_nodes); + ar & BOOST_SERIALIZATION_NVP(c.type); + } + +template + void serialize (Archive& ar, eclipse::messages::Job& c, unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.type); + ar & BOOST_SERIALIZATION_NVP(c.library); + ar & BOOST_SERIALIZATION_NVP(c.map_name); + ar & BOOST_SERIALIZATION_NVP(c.reduce_name); + ar & BOOST_SERIALIZATION_NVP(c.files); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.file_output); + ar & BOOST_SERIALIZATION_NVP(c.func_body); + ar & BOOST_SERIALIZATION_NVP(c.lang); + } + +template + void serialize(Archive& ar, eclipse::messages::JobStatus& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.is_success); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + } + +template + void serialize(Archive& ar, eclipse::messages::IDataKeys& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.keys); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + } + +template + void serialize(Archive& ar, eclipse::messages::FinishMap& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.nodes); + } + + +template + void serialize(Archive& ar, eclipse::messages::NodesShuffling& c, + unsigned int) { + ar & BASE_OBJECT(Message, c); + ar & BOOST_SERIALIZATION_NVP(c.nodes); + ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.id); + ar & BOOST_SERIALIZATION_NVP(c.kv_id); + } + +using namespace eclipse::messages; +using namespace boost::archive; + +template void serialize (boost::archive::xml_oarchive&, Task&, unsigned); +template void serialize (boost::archive::xml_iarchive&, Task&, unsigned); +template void serialize (boost::archive::binary_iarchive&, Task&, unsigned); +template void serialize (boost::archive::binary_oarchive&, Task&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, Job&, unsigned); +template void serialize (boost::archive::xml_iarchive&, Job&, unsigned); +template void serialize (boost::archive::binary_iarchive&, Job&, unsigned); +template void serialize (boost::archive::binary_oarchive&, Job&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, JobStatus&, unsigned); +template void serialize (boost::archive::xml_iarchive&, JobStatus&, unsigned); +template void serialize (boost::archive::binary_iarchive&, JobStatus&, unsigned); +template void serialize (boost::archive::binary_oarchive&, JobStatus&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IDataKeys&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IDataKeys&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IDataKeys&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IDataKeys&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, KeyValueShuffle&, unsigned); +template void serialize (boost::archive::xml_iarchive&, KeyValueShuffle&, unsigned); +template void serialize (boost::archive::binary_iarchive&, KeyValueShuffle&, unsigned); +template void serialize (boost::archive::binary_oarchive&, KeyValueShuffle&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, FinishShuffle&, unsigned); +template void serialize (boost::archive::xml_iarchive&, FinishShuffle&, unsigned); +template void serialize (boost::archive::binary_iarchive&, FinishShuffle&, unsigned); +template void serialize (boost::archive::binary_oarchive&, FinishShuffle&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, TaskStatus&, unsigned); +template void serialize (boost::archive::xml_iarchive&, TaskStatus&, unsigned); +template void serialize (boost::archive::binary_iarchive&, TaskStatus&, unsigned); +template void serialize (boost::archive::binary_oarchive&, TaskStatus&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IDataInsert&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IDataInsert&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IDataInsert&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IDataInsert&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IGroupInsert&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IGroupInsert&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IGroupInsert&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IGroupInsert&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IDataInfoRequest&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IDataInfoRequest&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IDataInfoRequest&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IDataInfoRequest&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IGroupInfoRequest&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IGroupInfoRequest&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IGroupInfoRequest&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IGroupInfoRequest&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IBlockInfoRequest&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IBlockInfoRequest&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IBlockInfoRequest&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IBlockInfoRequest&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IDataList&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IDataList&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IDataList&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IDataList&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IDataInfo&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IDataInfo&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IDataInfo&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IDataInfo&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, FinishMap&, unsigned); +template void serialize (boost::archive::xml_iarchive&, FinishMap&, unsigned); +template void serialize (boost::archive::binary_iarchive&, FinishMap&, unsigned); +template void serialize (boost::archive::binary_oarchive&, FinishMap&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, NodesShuffling&, unsigned); +template void serialize (boost::archive::xml_iarchive&, NodesShuffling&, unsigned); +template void serialize (boost::archive::binary_iarchive&, NodesShuffling&, unsigned); +template void serialize (boost::archive::binary_oarchive&, NodesShuffling&, unsigned); + +} +} + + +// 4) Also here +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::Task); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::Job); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::JobStatus); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataKeys); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::KeyValueShuffle); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FinishShuffle); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::TaskStatus); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataInsert); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IGroupInsert); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IBlockInsert); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataInfoRequest); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IGroupInfoRequest); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IBlockInfoRequest); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataList); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataInfo); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FinishMap); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::NodesShuffling); diff --git a/src/mapreduce/messages/boost_impl.hh b/src/mapreduce/messages/boost_impl.hh new file mode 100644 index 0000000..3857c42 --- /dev/null +++ b/src/mapreduce/messages/boost_impl.hh @@ -0,0 +1,105 @@ +// +//! @brief This module implements the serialization using BOOST SERIALIZAITON +//! For every new message class that you want to add you must add its +//! functions here and its cc file. +// +#pragma once + +// 1) Add your class here +#include "../../messages/boost_impl.hh" + +#include "idatalist.hh" +#include "idatainfo.hh" +#include "idatainsert.hh" +#include "igroupinsert.hh" +#include "iblockinsert.hh" +#include "idatainforequest.hh" +#include "igroupinforequest.hh" +#include "iblockinforequest.hh" +#include "key_value_shuffle.h" +#include "finish_shuffle.h" +#include "taskstatus.hh" +#include "job.hh" +#include "jobstatus.hh" +#include "idatakeys.hh" +#include "finishmap.hh" +#include "nodes_shuffling.hh" +#include "task.hh" + +#include +#include +#include + +#ifndef BASE_OBJECT +#define BASE_OBJECT(X,Y) \ + boost::serialization::make_nvp(#X, \ + boost::serialization::base_object(Y)); +#endif +#define ECNS eclipse::messages + +// 2) Also here +namespace boost{ +namespace serialization{ + +template void serialize (Archive&, eclipse::messages::Task&, unsigned); +template void serialize (Archive&, eclipse::messages::TaskStatus&, unsigned); +template void serialize (Archive&, eclipse::messages::Job&, unsigned); +template void serialize (Archive&, eclipse::messages::JobStatus&, unsigned); +template void serialize (Archive&, eclipse::messages::IDataKeys&, unsigned); +template void serialize (Archive&, eclipse::messages::KeyValueShuffle&, unsigned); +template void serialize (Archive&, eclipse::messages::FinishShuffle&, unsigned); +template void serialize (Archive&, eclipse::messages::IDataInsert&, unsigned); +template void serialize (Archive&, eclipse::messages::IGroupInsert&, unsigned); +template void serialize (Archive&, eclipse::messages::IBlockInsert&, unsigned); +template void serialize (Archive&, eclipse::messages::IDataInfoRequest&, unsigned); +template void serialize (Archive&, eclipse::messages::IGroupInfoRequest&, unsigned); +template void serialize (Archive&, eclipse::messages::IBlockInfoRequest&, unsigned); +template void serialize (Archive&, eclipse::messages::IDataList&, unsigned); +template void serialize (Archive&, eclipse::messages::IDataInfo&, unsigned); +template void serialize (Archive&, eclipse::messages::NodesShuffling&, unsigned); +} +} + +//BOOST_SERIALIZATION_ASSUME_ABSTRACT(ECNS::Message); + +#define TRACK_NEVER boost::serialization::track_never +// 3) Also here +BOOST_CLASS_EXPORT_KEY(eclipse::messages::Task); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::Job); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::JobStatus); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataKeys); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::KeyValueShuffle); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::FinishShuffle); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::TaskStatus); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataInsert); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IGroupInsert); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IBlockInsert); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataInfoRequest); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IGroupInfoRequest); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IBlockInfoRequest); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataList); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataInfo); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::FinishMap); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::NodesShuffling); + +// 4) and here +BOOST_CLASS_TRACKING(ECNS::Task, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::TaskStatus, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IDataInsert, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IGroupInsert, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IBlockInsert, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IDataInfoRequest, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IGroupInfoRequest, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IBlockInfoRequest, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::KeyValueShuffle, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::FinishShuffle, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::Job, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::JobStatus, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IDataKeys, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IDataList, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::IDataInfo, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::FinishMap, TRACK_NEVER); +BOOST_CLASS_TRACKING(ECNS::NodesShuffling, TRACK_NEVER); + +#undef ECNS +#undef TRACK_NEVER diff --git a/src/mapreduce/messages/finish_shuffle.h b/src/mapreduce/messages/finish_shuffle.h index 18cc38d..72268d7 100644 --- a/src/mapreduce/messages/finish_shuffle.h +++ b/src/mapreduce/messages/finish_shuffle.h @@ -11,8 +11,10 @@ struct FinishShuffle: public Message { std::string get_type() const override; - uint32_t job_id_; - uint32_t map_id_; + uint32_t job_id_ = 0; + uint32_t map_id_ = 0; + uint32_t node_id = 0; + uint32_t kv_id = 0; }; } diff --git a/src/messages/finishmap.cc b/src/mapreduce/messages/finishmap.cc similarity index 100% rename from src/messages/finishmap.cc rename to src/mapreduce/messages/finishmap.cc diff --git a/src/messages/finishmap.hh b/src/mapreduce/messages/finishmap.hh similarity index 73% rename from src/messages/finishmap.hh rename to src/mapreduce/messages/finishmap.hh index caf0d79..fef195b 100644 --- a/src/messages/finishmap.hh +++ b/src/mapreduce/messages/finishmap.hh @@ -1,6 +1,7 @@ #pragma once #include -#include "message.hh" +#include +#include "../../messages/message.hh" namespace eclipse { namespace messages { @@ -12,6 +13,7 @@ struct FinishMap: public Message { std::string get_type() const override; uint32_t job_id = 0; + std::vector nodes; }; } diff --git a/src/messages/idatakeys.cc b/src/mapreduce/messages/idatakeys.cc similarity index 100% rename from src/messages/idatakeys.cc rename to src/mapreduce/messages/idatakeys.cc diff --git a/src/messages/idatakeys.hh b/src/mapreduce/messages/idatakeys.hh similarity index 85% rename from src/messages/idatakeys.hh rename to src/mapreduce/messages/idatakeys.hh index 5e7ef72..bfb72b4 100644 --- a/src/messages/idatakeys.hh +++ b/src/mapreduce/messages/idatakeys.hh @@ -1,5 +1,5 @@ #pragma once -#include "message.hh" +#include "../../messages/message.hh" #include namespace eclipse { diff --git a/src/mapreduce/messages/idatalist.hh b/src/mapreduce/messages/idatalist.hh index 9a74947..9a46ff6 100644 --- a/src/mapreduce/messages/idatalist.hh +++ b/src/mapreduce/messages/idatalist.hh @@ -4,13 +4,15 @@ #include #include #include -#include "idatainfo.hh" +#include "../mapreduce/messages/idatainfo.hh" namespace eclipse { namespace messages { + struct IDataList: public Message { IDataList() = default; IDataList(std::vector); + std::string get_type() const override; std::vector data; }; diff --git a/src/messages/job.cc b/src/mapreduce/messages/job.cc similarity index 100% rename from src/messages/job.cc rename to src/mapreduce/messages/job.cc diff --git a/src/messages/job.hh b/src/mapreduce/messages/job.hh similarity index 91% rename from src/messages/job.hh rename to src/mapreduce/messages/job.hh index 87c9e97..dca4e86 100644 --- a/src/messages/job.hh +++ b/src/mapreduce/messages/job.hh @@ -1,6 +1,6 @@ #pragma once -#include "message.hh" +#include "../../messages/message.hh" #include #include diff --git a/src/messages/jobstatus.cc b/src/mapreduce/messages/jobstatus.cc similarity index 100% rename from src/messages/jobstatus.cc rename to src/mapreduce/messages/jobstatus.cc diff --git a/src/messages/jobstatus.hh b/src/mapreduce/messages/jobstatus.hh similarity index 83% rename from src/messages/jobstatus.hh rename to src/mapreduce/messages/jobstatus.hh index a7490f1..5d15abf 100644 --- a/src/messages/jobstatus.hh +++ b/src/mapreduce/messages/jobstatus.hh @@ -1,5 +1,5 @@ #pragma once -#include "message.hh" +#include "../../messages/message.hh" namespace eclipse { diff --git a/src/mapreduce/messages/key_value_shuffle.h b/src/mapreduce/messages/key_value_shuffle.h index 5ca8714..59c6371 100644 --- a/src/mapreduce/messages/key_value_shuffle.h +++ b/src/mapreduce/messages/key_value_shuffle.h @@ -2,6 +2,7 @@ #define ECLIPSEMR_MAPREDUCE_MESSAGES_KEYVALUESHUFFLE_H_ #include #include +#include #include "../../messages/message.hh" namespace eclipse { namespace messages { @@ -11,12 +12,13 @@ struct KeyValueShuffle: public Message { ~KeyValueShuffle() = default; std::string get_type() const override; - uint32_t job_id_; - uint32_t map_id_; - std::string key_; - std::vector value_; - bool is_header = false; + uint32_t job_id_ = 0; + uint32_t map_id_ = 0; + std::map> kv_pairs; uint32_t number_of_keys = 0; + uint32_t kv_id = 0; + int node_id = 0; + int origin_id = 0; }; diff --git a/src/messages/nodes_shuffling.cc b/src/mapreduce/messages/nodes_shuffling.cc similarity index 100% rename from src/messages/nodes_shuffling.cc rename to src/mapreduce/messages/nodes_shuffling.cc diff --git a/src/messages/nodes_shuffling.hh b/src/mapreduce/messages/nodes_shuffling.hh similarity index 73% rename from src/messages/nodes_shuffling.hh rename to src/mapreduce/messages/nodes_shuffling.hh index cdda39d..1935ef4 100644 --- a/src/messages/nodes_shuffling.hh +++ b/src/mapreduce/messages/nodes_shuffling.hh @@ -1,5 +1,5 @@ #pragma once -#include "message.hh" +#include "../../messages/message.hh" #include namespace eclipse { @@ -10,6 +10,8 @@ struct NodesShuffling: public Message { std::vector nodes; uint32_t job_id = 0; + uint32_t id = 0; + uint32_t kv_id = 0; }; } diff --git a/src/messages/task.cc b/src/mapreduce/messages/task.cc similarity index 100% rename from src/messages/task.cc rename to src/mapreduce/messages/task.cc diff --git a/src/messages/task.hh b/src/mapreduce/messages/task.hh similarity index 73% rename from src/messages/task.hh rename to src/mapreduce/messages/task.hh index 1bbf077..9a9b10d 100644 --- a/src/messages/task.hh +++ b/src/mapreduce/messages/task.hh @@ -1,6 +1,6 @@ #pragma once -#include "message.hh" +#include "../../messages/message.hh" #include #include @@ -8,7 +8,9 @@ namespace eclipse { namespace messages { struct Task: public Message { - std::string get_type() const override; + Task()= default; + ~Task() = default; + virtual std::string get_type() const override; std::string type; std::string lang; std::string library, func_name, input_path; @@ -18,6 +20,7 @@ struct Task: public Message { uint32_t subjob_id = 0; uint32_t job_id = 0; uint32_t leader = 0; + uint32_t id = 0; }; } diff --git a/src/messages/taskstatus.cc b/src/mapreduce/messages/taskstatus.cc similarity index 100% rename from src/messages/taskstatus.cc rename to src/mapreduce/messages/taskstatus.cc diff --git a/src/messages/taskstatus.hh b/src/mapreduce/messages/taskstatus.hh similarity index 65% rename from src/messages/taskstatus.hh rename to src/mapreduce/messages/taskstatus.hh index 3ce72e1..78e5bf0 100644 --- a/src/messages/taskstatus.hh +++ b/src/mapreduce/messages/taskstatus.hh @@ -1,5 +1,6 @@ #pragma once -#include "message.hh" +#include "../../messages/message.hh" +#include namespace eclipse { namespace messages { @@ -8,8 +9,9 @@ struct TaskStatus: public Message { std::string get_type() const override; uint32_t job_id = 0; - uint32_t subjob_id = 0; + uint32_t saved_keys = 0; bool is_success = false; + std::vector shuffled_nodes; std::string type; }; diff --git a/src/mapreduce/nodes/peermr.cc b/src/mapreduce/nodes/peermr.cc deleted file mode 100644 index 7800a72..0000000 --- a/src/mapreduce/nodes/peermr.cc +++ /dev/null @@ -1,573 +0,0 @@ -// -// -// -// -// -// -// -// -// -#include "peermr.h" -#include "../messages/boost_impl.hh" -#include "../executor.hh" -#include "../py_executor.hh" -#include "../fs/iwriter.h" -#include -#include -#include -#include -#include -#include - -static int idebug = 0; - -namespace eclipse { -// Constructors {{{ -PeerMR::PeerMR(network::Network* net) : PeerDFS(net) { - directory.open_db(); -} -// }}} -// format {{{ -bool PeerMR::format () { - PeerDFS::format(); - directory.init_db(); - return true; -} -// }}} -// request_idata {{{ -IDataList PeerMR::request_idata_list() { - IDataList output; - directory.select_all_idata_metadata(output); - return output; -} -// }}} -// is_leader {{{ -bool PeerMR::is_leader(std::string f) { - return (id == (int)(h(f) % network_size)); -} -// }}} -// ------------- MAPREDUCE ROUTINES ------------------ -// process FinishMap {{{ -template<> void PeerMR::process(FinishMap *m) { - current_nodes_shuffling++; - - if (current_nodes_shuffling >= nodes_shuffling[m->job_id].size() && subjobs_remaining[m->job_id] <= 0) { - jobs_callback[m->job_id](); - } -} -// }}} -// process FinishShuffle {{{ -template<> void PeerMR::process(FinishShuffle *m) { - DEBUG("I got Finish shuffle jobid: %lu", m->job_id_); - - //Make sure all the nodes have finished shuffling - try { - const uint32_t job_id = m->job_id_; - auto it = iwriters_.find(job_id); - if (it != iwriters_.end()) { - it->second->finalize(); - iwriters_.erase(it); - } - - } catch (std::exception& e) { - ERROR("Iwriter exception"); - } - - FinishMap fm; - fm.job_id = m->job_id_; - uint32_t leader = m->job_id_ % network_size; - - if ((int32_t)leader == id) - process(&fm); - else - network->send(leader, &fm); -} -// }}} -// process KeyValueShuffle {{{ -template<> void PeerMR::process(KeyValueShuffle *kv_shuffle) { - auto key = kv_shuffle->key_; - int which_node = h(key) % network_size; - - DEBUG("KVshuffle H=%lu, K=%s, ID=%i, DST=%i", h(key), - key.c_str(), id, which_node); - - - shuffled_nodes[kv_shuffle->job_id_].insert(which_node); - - if (which_node == id) { - if (kv_shuffle->is_header){ - keys_to_be_recv += kv_shuffle->number_of_keys; - return; - } - write_key_value(kv_shuffle); - - current_keys++; - if (current_keys >= keys_to_be_recv){ - current_keys = 0; - keys_to_be_recv = 0; - FinishShuffle fs; - fs.job_id_ = kv_shuffle->job_id_; - process(&fs); - } - - } else { - DEBUG("Forwarding KVS to another node"); - network->send(which_node, kv_shuffle); - } -} -// }}} -// process NodesShuffling {{{ -template<> void PeerMR::process(NodesShuffling* m) { - DEBUG("I got a list of %i keys from map jobid:%lu", m->nodes.size(), m->job_id); - - if (nodes_shuffling.find(m->job_id) == nodes_shuffling.end()) { - nodes_shuffling[m->job_id] = vector (m->nodes.begin(), m->nodes.end()); - } else { - //std::copy(.end(), m->nodes.begin(), m->nodes.end()); - std::copy(m->nodes.begin(), m->nodes.end(), back_inserter(nodes_shuffling[m->job_id])); - } - - // Remote repeated elements on the list - auto& v = nodes_shuffling[m->job_id]; - std::sort(v.begin(), v.end()); - auto last = std::unique(v.begin(), v.end()); - v.erase(last, v.end()); -} -// }}} -// process SubJob{{{ -template<> void PeerMR::process(messages::SubJob* m) { - if (m->type == "MAP") { - schedule_map(m); - } -} -// }}} -// process SubJobStatus {{{ -template<> void PeerMR::process(messages::SubJobStatus* m) { - INFO("I got a SubJob status: %d jobid: %u", m->is_success, m->job_id); - if (m->is_success) { - subjobs_remaining[m->job_id]--; - } - - DEBUG("Subjob remaining : %i current nodes:%i", subjobs_remaining[m->job_id], current_nodes_shuffling); - if (subjobs_remaining[m->job_id] <= 0) { - if (current_nodes_shuffling >= nodes_shuffling[m->job_id].size() && m->type == "MAP") { - jobs_callback[m->job_id](); - //current_nodes_shuffling = nodes_shuffling[m->job_id].size(); - //for (auto node : nodes_shuffling[m->job_id]) { - // FinishShuffle fs; - // fs.job_id_ = m->job_id; - // if (node == id) - // process(&fs); - // else - // network->send(node, &fs); - //} - } else if (m->type == "REDUCE") { - jobs_callback[m->job_id](); - idebug =0; - } - } -} -// }}} -// process Task {{{ -template<> void PeerMR::process(Task* m) { - if (m->type == "MAP") { - request_local_map(m); - - } else { - request_local_reduce(m); - } -} -// }}} -// process TaskStatus {{{ -template<> void PeerMR::process(TaskStatus* m) { - INFO("I got a TaskStatus: %d jobid: %u", m->is_success, m->job_id); - if (m->is_success) { - tasks_remaining[m->subjob_id]--; - } - - INFO("Task remaining for job id:%lu = %i", m->job_id, tasks_remaining[m->job_id]); - if (tasks_remaining[m->subjob_id] == 0) { - - SubJobStatus sjob_status; - sjob_status.job_id = m->job_id; - sjob_status.is_success = true; - sjob_status.type = m->type; - - int which_node = m->job_id % network_size; - if (which_node == id) - process(&sjob_status); - else - network->send(which_node, &sjob_status); - } -} -// }}} -// process FileInfo {{{ -template<> void PeerMR::process (FileInfo* m) { - PeerDFS::process(m); -} -// }}} -// on_read {{{ -void PeerMR::on_read(messages::Message *msg, int) { - std::string type = msg->get_type(); - - mutex.lock(); - if (type == "KeyValueShuffle") { - auto kv_shuffle = dynamic_cast(msg); - process(kv_shuffle); - - } else if (type == "FinishShuffle") { - auto finish_shuffle = dynamic_cast(msg); - process(finish_shuffle); - - } else if (type == "Task") { - auto task_ = dynamic_cast(msg); - process(task_); - - } else if (type == "TaskStatus") { - auto task_ = dynamic_cast(msg); - process(task_); - - } else if (type == "SubJob") { - auto task_ = dynamic_cast(msg); - process(task_); - - } else if (type == "SubJobStatus") { - auto task_ = dynamic_cast(msg); - process(task_); - - } else if (type == "FinishMap") { - auto task_ = dynamic_cast(msg); - process(task_); - - } else if (type == "NodesShuffling") { - auto task_ = dynamic_cast(msg); - process(task_); - - } else { - PeerDFS::on_read(msg, 0); - } - mutex.unlock(); -} -// }}} -// process_job {{{ -bool PeerMR::process_job (messages::Job* m, std::function f) { - mutex.lock(); - jobs_callback[m->job_id] = f; - - if (m->type == "MAP") { - subjobs_remaining[m->job_id] = m->files.size(); - logger->info("JOB LEADER %i Processing %i jobs", id, m->files.size()); - - for (auto& file : m->files) { - int which_node = h(file) % network_size; - - SubJob sjob; - sjob.type = "MAP"; - sjob.library = m->library; - sjob.map_name = m->map_name; - sjob.reduce_name = m->reduce_name; - sjob.file = file; - sjob.job_id = m->job_id; - sjob.func_body = m->func_body; - sjob.lang = m->lang; - - if (which_node == id) - process(&sjob); - else - network->send(which_node, &sjob); - } - } else if (m->type == "REDUCE") { - schedule_reduce(m); - } - mutex.unlock(); - return true; -} -// }}} -// ------------- MAP ROUTINES ------------------ -// schedule_map {{{ -void PeerMR::schedule_map(messages::SubJob* m) { - INFO("File leader %i schedules a map task", id); - auto file = m->file; - FileInfo fi; - fi.num_block = 0; - - directory.select_file_metadata(file, &fi); - current_nodes_shuffling = 0; - nodes_shuffling[m->job_id] = std::vector (); - - int num_blocks = fi.num_block; - if (num_blocks == 0) return; //! Not file found in the db - - - // Generate random subjob id - std::mt19937 rng; - rng.seed(std::random_device()()); - std::uniform_int_distribution dist(1, - std::numeric_limits::max()); - - uint32_t subjob_id = dist(rng); - - map tasks; - for (int i = 0; i < num_blocks; i++) { - BlockInfo bi; - directory.select_block_metadata(file, i, &bi); - auto block_name = bi.name; - auto hash_key = bi.hash_key; - auto block_node = boundaries->get_index(hash_key); - - Task task; - task.job_id = m->job_id; - task.subjob_id = subjob_id; - task.func_name = m->map_name; - task.type = m->type; - task.library = m->library; - task.input_path = m->file; - task.leader = id; - task.func_body = m->func_body; - task.lang = m->lang; - - tasks.insert({block_node, task}); - tasks[block_node].blocks.push_back({hash_key, block_name}); - } - - tasks_remaining[subjob_id] = tasks.size(); - INFO("%d nodes will run maps", tasks.size()); - - for (auto& task : tasks) { - if (task.first == id) { - request_local_map(&task.second); - - } else { - INFO("Forwaring Map task to %d jobid:%" PRIu32, task.first, m->job_id); - network->send (task.first, &task.second); - } - } -} -// }}} -// request_local_map {{{ -void PeerMR::request_local_map (messages::Task* m) { - auto disk_path = GET_STR("path.scratch"); - logger->info ("Executing map subjobid:%lu", m->subjob_id); - for (auto& block : m->blocks) { - logger->info ("Executing map on block: %s", block.second.c_str()); - - //Spin-lock until the block has arrived to the node (iterative workflow patch) - string path = disk_path + "/" + block.second; - while(access(path.c_str(), F_OK) == -1); - - request(block.first, block.second, std::bind( - &PeerMR::run_map_onto_block, this, - std::placeholders::_1, - std::placeholders::_2, m)); - } - -} -// }}} -// run_map_onto_block {{{ -void PeerMR::run_map_onto_block(string ignoreme, string block, Task* stask) { - Reply reply; - - INFO("Executing map"); - if (stask->lang == "C++") { - Executor exec(this); - - if (exec.run_map(stask, block)) - reply.message = "MAPDONE"; - else - reply.message = "MAPFAILED"; - - } else if (stask->lang == "Python") { - PYexecutor exec(this); - - if (exec.run_map(stask, block)) - reply.message = "MAPDONE"; - else - reply.message = "MAPFAILED"; - } - - auto job_nodes = shuffled_nodes[stask->job_id]; - NodesShuffling ns; - ns.job_id = stask->job_id; - ns.nodes = std::vector(job_nodes.begin(), job_nodes.end()); - - auto which_node = stask->job_id % network_size; - if ((int)which_node == id) - process(&ns); - else - network->send(which_node, &ns); - - //sleep(10); - notify_task_leader (stask->leader, stask->subjob_id, stask->job_id, "MAP"); -} -// }}} -// notify_task_leader {{{ -void PeerMR::notify_task_leader(int leader, uint32_t subjob_id, uint32_t job_id, string type) { - int leader_node = (int) leader; - - TaskStatus ts; - ts.is_success = true; - ts.job_id = job_id; - ts.subjob_id = subjob_id; - ts.type = type; - - if (leader_node == id) { - process(&ts); - - } else { - network->send(leader_node, &ts); - } -} -// }}} -// write_key_value {{{ -void PeerMR::write_key_value(messages::KeyValueShuffle *kv_shuffle) { - - const uint32_t job_id = kv_shuffle->job_id_; - std::shared_ptr iwriter; - auto it = iwriters_.find(job_id); - if (it == iwriters_.end()) { - const uint32_t map_id = kv_shuffle->map_id_; - iwriter = std::make_shared(job_id, map_id); - iwriters_.insert({job_id, iwriter}); - } - else { - iwriter = it->second; - } - const std::string& key = kv_shuffle->key_; - auto& values = kv_shuffle->value_; - for (auto& v : values) { - iwriters_[job_id]->add_key_value(key, v); - } -} -// }}} -// ------------- REDUCE ROUTINES ------------------ -// schedule_reduce {{{ -void PeerMR::schedule_reduce(messages::Job* m) { - subjobs_remaining[m->job_id] = 1; - auto reduce_nodes = nodes_shuffling[m->job_id]; - - tasks_remaining[m->job_id] = reduce_nodes.size(); - logger->info("JOB LEADER %i Processing REDUCE %i jobs", id, reduce_nodes.size()); - for (auto which_node : reduce_nodes) { - Task task; - task.type = "REDUCE"; - task.job_id = m->job_id; - task.func_name = m->reduce_name; - task.library = m->library; - task.leader = id; - task.file_output = m->file_output; - task.func_body = m->func_body; - task.lang = m->lang; - - if (which_node == id) - process(&task); - else - network->send(which_node, &task); - } -} -// }}} -// request_local_reduce {{{ -void PeerMR::request_local_reduce (messages::Task* m) { - logger->info ("Executing reduce jobid:%lu", m->job_id); - auto map_id = 0; - auto job_id = m->job_id; - - IDataInfo di; - di.map_id = map_id; - di.job_id = job_id; - di.num_reducer = 0; - directory.select_idata_metadata(job_id, map_id, &di); - - if (di.num_reducer > 0) { //! Perform reduce operation - logger->info("Performing reduce operation"); - if (m->lang == "C++") { - Executor exec(this); - Reply reply; - - if (exec.run_reduce(m)) - reply.message = "MAPDONE"; - else - reply.message = "MAPFAILED"; - - } else if (m->lang == "Python") { - PYexecutor exec(this); - Reply reply; - - if (exec.run_reduce(m)) - reply.message = "MAPDONE"; - else - reply.message = "MAPFAILED"; - } - } - - //notify_task_leader(m->leader, m->job_id, m->job_id, "REDUCE"); -} -// }}} -// ------------- REDUCE OUTPUT ROUTINES ------------------ -// submit_block {{{ -void PeerMR::submit_block(messages::BlockInfo* m) { - //mutex.lock(); - auto file_name = m->file_name; - int which_node = h(file_name) % network_size; - - if (which_node == id) { //If it is its leader node - insert_block(m); - } else { - network->send(which_node, m); - } - //mutex.unlock(); -} -// }}} -// insert_file {{{ -bool PeerMR::insert_file(messages::FileInfo* f) { - //bool ret = directory.file_exist(f->name.c_str()); - - messages::FileInfo sel_fi; - directory.select_file_metadata(f->name.c_str(), &sel_fi); - bool ret = (sel_fi.is_valid && strcmp(sel_fi.name.c_str(), f->name.c_str()) == 0); - - if (ret) { - FileUpdate fu; - fu.name = f->name; - fu.size = f->size + sel_fi.size; - fu.num_block = f->num_block + sel_fi.num_block; - directory.update_file_metadata(fu); - //replicate_metadata(); - - INFO("File:%s exists in db, Updated to (%u, %u)", fu.name.c_str(), fu.size, fu.num_block); - - if (f->reducer_output) { - INFO("RETURNING FROM REDUCER"); - int leader = f->job_id % network_size; - notify_task_leader(leader, f->job_id, f->job_id, "REDUCE"); - - } - - return false; - } - - directory.insert_file_metadata(*f); - //replicate_metadata(); - - logger->info("Saving to SQLite db"); - - if (f->reducer_output) { - INFO("RETURNING FROM REDUCER"); - int leader = f->job_id % network_size; - notify_task_leader(leader, f->job_id, f->job_id, "REDUCE"); - - } - return true; -} -// }}} -// insert_block {{{ -bool PeerMR::insert_block(messages::BlockInfo* m) { - // A block from other nodes should be last in the sequence. - messages::BlockInfo last_block; - directory.select_last_block_metadata(m->file_name.c_str(), &last_block); - if(last_block.is_valid) - m->seq = last_block.seq + 1; - - return PeerDFS::insert_block(m); -} -// }}} -} // namespace eclipse diff --git a/src/mapreduce/nodes/peermr.h b/src/mapreduce/nodes/peermr.h deleted file mode 100644 index 609ee8d..0000000 --- a/src/mapreduce/nodes/peermr.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef ECLIPSEMR_NODES_PEERMR_H_ -#define ECLIPSEMR_NODES_PEERMR_H_ -#include -#include -#include -#include -#include -#include "../../nodes/peerdfs.hh" -#include "../fs/directorymr.hh" -#include "../fs/iwriter_interface.hh" -#include "../../messages/message.hh" -#include "../messages/idatalist.hh" -#include "../messages/key_value_shuffle.h" -#include "../messages/finish_shuffle.h" -#include "../messages/job.hh" -#include "../messages/subjob.hh" -#include "../messages/task.hh" - -namespace eclipse { - -class PeerMR: public PeerDFS { - public: - PeerMR(network::Network*); - ~PeerMR() = default; - - void on_read(messages::Message *msg, int) override; - bool format(); - IDataList request_idata_list(); - - bool process_job(messages::Job*, std::function); - void submit_block(messages::BlockInfo*); - template void process(T); - - bool insert_file(messages::FileInfo*) override; - bool insert_block(messages::BlockInfo*) override; - - protected: - bool is_leader(std::string); - void notify_task_leader(int, uint32_t, uint32_t, std::string); - - void schedule_map(messages::SubJob*); - void schedule_reduce(messages::Job*); - - void request_local_map(messages::Task*); - void request_local_reduce(messages::Task*); - - void run_map_onto_block(std::string, std::string, messages::Task*); - void write_key_value(messages::KeyValueShuffle *key_value); - - std::unordered_map subjobs_remaining; - std::unordered_map tasks_remaining; - std::unordered_map> jobs_callback; - std::unordered_map> iwriters_; - std::unordered_map> shuffled_nodes; - std::unordered_map> shuffled_keys; - std::unordered_map> nodes_shuffling; - uint32_t current_nodes_shuffling = 0; - uint32_t keys_to_be_recv = 0; - uint32_t current_keys = 0; - DirectoryMR directory; - std::mutex mutex; -}; - -} // namespace eclipse -#endif // ECLIPSEMR_NODES_PEERMR_H_ diff --git a/src/mapreduce/nodes/remotemr.cc b/src/mapreduce/nodes/remotemr.cc deleted file mode 100644 index bb91e47..0000000 --- a/src/mapreduce/nodes/remotemr.cc +++ /dev/null @@ -1,39 +0,0 @@ -#include "remotemr.h" -#include -#include "peermr.h" -#include "../../messages/reply.hh" -#include "../messages/idatalist.hh" - -using namespace eclipse::messages; -namespace ph = std::placeholders; -using std::bind; - -using namespace eclipse; - -// Constructor {{{ -RemoteMR::RemoteMR(PeerMR* p, network::Network* net) : RemoteDFS(p, net) { - peer = dynamic_cast(peer_dfs); - auto& rt = routing_table; - rt.insert({"Job", bind(&RemoteMR::job_handler, this, ph::_1, ph::_2)}); - rt.insert({"IDataList", bind(&RemoteMR::list_idata, this, ph::_1, ph::_2)}); -} -//}}} -// job_handler {{{ -void RemoteMR::job_handler (messages::Message* _m, int n) { - auto m = dynamic_cast(_m); - logger->info("Job received."); - - peer->process_job(m, std::bind([](decltype(network) net, int n) { - Reply reply; - reply.message = "OK"; - net->send(n, &reply); - }, - network, n)); -} -// }}} -// list_idata {{{ -void RemoteMR::list_idata(messages::Message *msg, int n) { - auto reply = peer->request_idata_list(); - network->send(n, &reply); -} -// }}} diff --git a/src/mapreduce/nodes/remotemr.h b/src/mapreduce/nodes/remotemr.h deleted file mode 100644 index 3270e8a..0000000 --- a/src/mapreduce/nodes/remotemr.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include "../../nodes/remotedfs.hh" -#include "../../messages/message.hh" -#include "peermr.h" - -namespace eclipse { - -class RemoteMR: public RemoteDFS { - public: - RemoteMR (PeerMR*, network::Network*); - void job_handler(messages::Message*, int); - void list_idata(messages::Message*, int); - - protected: - PeerMR* peer; -}; - -} // namespace eclipse diff --git a/src/mapreduce/py_executor.cc b/src/mapreduce/py_executor.cc index 9435345..3a16864 100644 --- a/src/mapreduce/py_executor.cc +++ b/src/mapreduce/py_executor.cc @@ -16,7 +16,7 @@ using namespace std; namespace eclipse { // Constructor {{{ -PYexecutor::PYexecutor(PeerMR* p) : peer(p) { } +PYexecutor::PYexecutor(TaskExecutor* p) : peer(p) { } PYexecutor::~PYexecutor() { } // }}} // run_map {{{ @@ -69,10 +69,10 @@ bool PYexecutor::run_map (messages::Task* m, std::string input) { KeyValueShuffle kv; kv.job_id_ = m->job_id; // :TODO: kv.map_id_ = 0; - kv.key_ = headers_list[node]; - kv.is_header = true; - kv.number_of_keys = keys_per_node[node]; - peer->process(&kv); + //kv.key_ = headers_list[node]; + //kv.is_header = true; + //kv.number_of_keys = keys_per_node[node]; + //peer->process(&kv); i++; } @@ -80,9 +80,9 @@ bool PYexecutor::run_map (messages::Task* m, std::string input) { KeyValueShuffle kv; kv.job_id_ = m->job_id; // :TODO: kv.map_id_ = 0; - kv.key_ = kv_pair.first; - kv.value_ = kv_pair.second; - peer->process(&kv); + //kv.key_ = kv_pair.first; + //kv.value_ = kv_pair.second; + //peer->process(&kv); } Py_XDECREF(python_module); @@ -141,7 +141,7 @@ bool PYexecutor::run_reduce (messages::Task* task) { bi.r_node = ""; bi.is_committed = 1; - dynamic_cast(peer)->submit_block(&bi); + //dynamic_cast(peer)->submit_block(&bi); iterations++; total_size += block_content.length(); block_content = ""; @@ -161,7 +161,7 @@ bool PYexecutor::run_reduce (messages::Task* task) { bi.r_node = ""; bi.is_committed = 1; - dynamic_cast(peer)->submit_block(&bi); + //dynamic_cast(peer)->submit_block(&bi); iterations++; total_size += block_content.length(); block_content = ""; @@ -180,7 +180,7 @@ bool PYexecutor::run_reduce (messages::Task* task) { fi.reducer_output = true; fi.job_id = task->job_id; - dynamic_cast(peer)->process(&fi); + //dynamic_cast(peer)->process(&fi); } catch (std::exception& e) { PANIC("Error in the executer: %s", e.what()); diff --git a/src/mapreduce/py_executor.hh b/src/mapreduce/py_executor.hh index 1273705..f423f1a 100644 --- a/src/mapreduce/py_executor.hh +++ b/src/mapreduce/py_executor.hh @@ -10,8 +10,8 @@ // @todo Merge with c++ executor using a more elegant design pattern. // #pragma once -#include "nodes/peermr.h" -#include "../messages/task.hh" +#include "task_executor.hh" +#include "messages/task.hh" #include #include @@ -22,14 +22,14 @@ namespace eclipse { class PYexecutor { public: - PYexecutor(PeerMR*); + PYexecutor(TaskExecutor*); ~PYexecutor(); bool run_map(messages::Task*, std::string); bool run_reduce(messages::Task*); protected: - PeerMR* peer; + TaskExecutor* peer; std::map> py_map(std::string); std::string py_reduce(std::string, std::string); diff --git a/src/mapreduce/task_executor.cc b/src/mapreduce/task_executor.cc new file mode 100644 index 0000000..c91201d --- /dev/null +++ b/src/mapreduce/task_executor.cc @@ -0,0 +1,363 @@ +#include "task_executor.hh" +#include "../messages/boost_impl.hh" +#include "../common/hash.hh" +#include "../common/histogram.hh" +#include "messages/idatalist.hh" +#include "messages/finish_shuffle.h" +#include "executor.hh" +#include "py_executor.hh" +#include "fs/iwriter.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace std; +using namespace velox; + +mutex local_mut; +mutex local_mut2; + +namespace eclipse { +// Constructors {{{ +TaskExecutor::TaskExecutor(network::ClientHandler* net) : Node() { + network = net; + directory.create_tables(); + network_size = GET_VEC_STR("network.nodes").size(); +} +// }}} +// ------------- MAPREDUCE ROUTINES ------------------ +// LEADER functions +// job_accept {{{ +void TaskExecutor::job_accept(messages::Job* m, std::function fn) { + Histogram boundaries(network_size, 100); + boundaries.initialize(); + jobs_callback[m->job_id] = fn; + tasks_remaining[m->job_id] = 0; + + if (m->type == "MAP") { + + std::map>>map_nodes; + + // Organize a map of blocks per each block_node + for (auto& file : m->files) { + model::metadata md = dfs.get_metadata(file); + + for (size_t i = 0; i < md.blocks.size(); i++) { + + uint32_t hash_key_of_block = md.hash_keys[i]; + string block_name = md.blocks[i]; + int which_node = boundaries.get_index(hash_key_of_block); + + auto it = map_nodes.find(which_node); + if (it == map_nodes.end() ) + it = map_nodes.insert({which_node, {}}).first; + + it->second.push_back({hash_key_of_block, block_name}); + } + } + + INFO("JOB LEADER %i Processing %i tasks", id, map_nodes.size()); + // Schedule the tasks + tasks_remaining[m->job_id] = map_nodes.size(); + for (auto& task_stub : map_nodes) { + Task task; + task.job_id = m->job_id; + task.func_name = m->map_name; + task.type = m->type; + task.library = m->library; + task.leader = id; + task.func_body = m->func_body; + task.lang = m->lang; + task.blocks = task_stub.second; + network->send(task_stub.first, &task); + } + } else if (m->type == "REDUCE") { + schedule_reduce(m); + } +} +// }}} +// map_finish_notify {{{ +void TaskExecutor::map_finish_notify(FinishMap* info) { + INFO("FinishMap arrived, remaining=%lu", tasks_remaining[info->job_id] - 1); + for (auto& node : info->nodes) + nodes_shuffling.insert(node); + + if (--tasks_remaining[info->job_id] <= 0) { + current_nodes_shuffling = nodes_shuffling.size(); + + for (auto& node : nodes_shuffling) { + INFO("Sending Finishing shuffle request to node %i", node); + FinishShuffle fs; + fs.job_id_ = info->job_id; + network->send(node, &fs); + } + } +} +// }}} +// TaskStatus {{{ +void TaskExecutor::task_accept_status(TaskStatus* m) { + INFO("TaskStatus arrived | JOBID: %u", m->job_id); + + if (m->type == "REDUCE") { + INFO("Task remaining for job id:%lu = %i", m->job_id, tasks_remaining[m->job_id]); + if (--tasks_remaining[m->job_id] <= 0) { + jobs_callback[m->job_id](); + + // flush nodes_shuffling + nodes_shuffling.clear(); + tasks_remaining.erase(m->job_id); + jobs_callback.erase(m->job_id); + } + + + } else if (m->type == "MAP"){ + INFO("Task remaining for job id:%lu = %i", m->job_id, current_nodes_shuffling); + if (--current_nodes_shuffling <= 0) { + jobs_callback[m->job_id](); + tasks_remaining.erase(m->job_id); + } + } +} +// }}} +// Shuffler funtions +// shuffle_finish_notify {{{ + void TaskExecutor::shuffle_finish_notify(uint32_t job_id) { + INFO("I got Finish shuffle jobid: %lu", job_id); + + //Make sure all the nodes have finished shuffling + try { + auto it = iwriters_.find(job_id); + if (it != iwriters_.end()) { + it->second->finalize(); + iwriters_.erase(it); + } + + } catch (std::exception& e) { + ERROR("Iwriter exception"); + } + + TaskStatus ts; + ts.job_id = job_id; + ts.type = "MAP"; + uint32_t leader = job_id % network_size; + + DEBUG("Sending task status to leader %u", leader); + network->send(leader, &ts); +} +// }}} +// key_value_store {{{ +void TaskExecutor::key_value_store(KeyValueShuffle *kv) { + INFO("KVshuffle KV_ID=%lu, ID=%i, DST=%i", kv->kv_id, id, kv->node_id); + + if (kv->node_id == id) { + + std::thread([&, this] (KeyValueShuffle kv) { + write_key_value(&kv); + + NodesShuffling fs; + fs.job_id = kv.job_id_; + fs.id = id; + local_mut2.lock(); + network->send(kv.origin_id, &fs); + local_mut2.unlock(); + }, *kv).detach(); + + } else { + DEBUG("Forwarding KVS to another node"); + network->send(kv->node_id, kv); + } +} +// }}} +// write_key_value {{{ +void TaskExecutor::write_key_value(messages::KeyValueShuffle *kv_shuffle) { + + const uint32_t job_id = kv_shuffle->job_id_; + std::shared_ptr iwriter; + auto it = iwriters_.find(job_id); + if (it == iwriters_.end()) { + const uint32_t map_id = kv_shuffle->map_id_; + iwriter = std::make_shared(job_id, map_id); + iwriters_.insert({job_id, iwriter}); + } else { + iwriter = it->second; + } + + for (auto& pair : kv_shuffle->kv_pairs) { + const std::string& key = pair.first; + auto& values = pair.second; + + for (auto& v : values) { + iwriter->add_key_value(key, v); + } + } +} +// }}} +// ------------- MAP ROUTINES ------------------ +// task_accezpt {{{ + void TaskExecutor::task_accept(Task* m) { + if (m->type == "MAP") { + request_local_map(m); + + } else { + request_local_reduce(m); + } +} +// }}} +// request_local_map {{{ +void TaskExecutor::request_local_map (messages::Task* task) { + if (task->lang == "C++") { + + std::thread([&](Task task) { + Executor exec(this); + exec.run_map(&task); + INFO("MAP has finished"); + }, *task).detach(); + + } else if (task->lang == "Python") { + PYexecutor exec(this); + for (auto& block : task->blocks) { + string block_str = local_io.read(block.second); + exec.run_map(task, block_str); + } + } +} +// }}} +// notify_task_leader {{{ +void TaskExecutor::notify_task_leader(int leader, uint32_t job_id, string type) { + int leader_node = (int) leader; + + TaskStatus ts; + ts.is_success = true; + ts.job_id = job_id; + ts.type = type; + + network->send(leader_node, &ts); +} +// }}} +// notify_map_is_finished {{{ +void TaskExecutor::notify_map_is_finished(uint32_t job_id, + std::vector nodes) { + + FinishMap ts; + ts.job_id = job_id; + ts.nodes = nodes; + INFO("FINISHED MAP "); + + local_mut.lock(); + tasker_remaining_job.insert({job_id, ts}); + local_mut.unlock(); + + try_finish_map(job_id); +} +// }}} +// insert_key_value {{{ +void TaskExecutor::insert_key_value(KeyValueShuffle *kv) { + kv->origin_id = id; + local_mut.lock(); + tasker_remaining_nodes_shuffling.insert(kv->node_id); + local_mut.unlock(); + network->send(kv->node_id, kv); +} +// }}} +// try_finish_map {{{ +void TaskExecutor::try_finish_map(uint32_t job_id) { + local_mut.lock(); + if (tasker_remaining_job.find(job_id) != tasker_remaining_job.end()) { + if (tasker_remaining_nodes_shuffling.empty()) { + auto& ts = tasker_remaining_job[job_id]; + uint32_t leader = job_id % network_size; + network->send(leader, &ts); + } + } + local_mut.unlock(); +} +// }}} +// shuffle_is_done {{{ +void TaskExecutor::shuffle_is_done(uint32_t job_id, uint32_t id) { + local_mut.lock(); + auto itr = tasker_remaining_nodes_shuffling.find(id); + if(itr!=tasker_remaining_nodes_shuffling.end()){ + tasker_remaining_nodes_shuffling.erase(itr); + } + local_mut.unlock(); + + try_finish_map(job_id); +} +// }}} +// ------------- REDUCE ROUTINES ------------------ +// schedule_reduce {{{ +void TaskExecutor::schedule_reduce(messages::Job* m) { + auto reduce_nodes = nodes_shuffling; + tasks_remaining[m->job_id] = reduce_nodes.size(); + + logger->info("JOB LEADER %i Processing REDUCE %i jobs", id, reduce_nodes.size()); + + if (dfs.exists(m->file_output)) + dfs.remove(m->file_output); + + dfs.touch(m->file_output); + + for (auto which_node : reduce_nodes) { + Task task; + task.type = "REDUCE"; + task.job_id = m->job_id; + task.func_name = m->reduce_name; + task.library = m->library; + task.leader = id; + task.file_output = m->file_output; + task.func_body = m->func_body; + task.lang = m->lang; + + //if (which_node == id) + // process(&task); + //else + network->send(which_node, &task); + } +} +// }}} +// request_local_reduce {{{ +void TaskExecutor::request_local_reduce (messages::Task* m) { + logger->info ("Executing reduce jobid:%lu", m->job_id); + auto map_id = 0; + auto job_id = m->job_id; + + IDataInfo di; + di.map_id = map_id; + di.job_id = job_id; + di.num_reducer = 0; + directory.select_idata_metadata(job_id, map_id, &di); + + if (di.num_reducer > 0) { //! Perform reduce operation + if (m->lang == "C++") { + + std::async(std::launch::async, [&]() { + logger->info("Performing reduce operation"); + Executor exec(this); + Task copy_task = *m; + exec.run_reduce(©_task); + }); + + } else if (m->lang == "Python") { + PYexecutor exec(this); + Reply reply; + + if (exec.run_reduce(m)) + reply.message = "MAPDONE"; + else + reply.message = "MAPFAILED"; + } + } + + //notify_task_leader(m->leader, m->job_id, m->job_id, "REDUCE"); +} +// }}} +} // namespace eclipse diff --git a/src/mapreduce/task_executor.hh b/src/mapreduce/task_executor.hh new file mode 100644 index 0000000..bea35d8 --- /dev/null +++ b/src/mapreduce/task_executor.hh @@ -0,0 +1,64 @@ +#pragma once +#include "../nodes/node.hh" +#include "../blocknode/block_node.hh" +#include "../messages/message.hh" + +#include "fs/directorymr.hh" +#include "fs/iwriter_interface.hh" +#include "client/dfs.hh" +#include "messages/job.hh" +#include "messages/job.hh" +#include "messages/task.hh" +#include "messages/taskstatus.hh" +#include "messages/finishmap.hh" +#include "messages/nodes_shuffling.hh" +#include "messages/key_value_shuffle.h" + +#include +#include +#include +#include + +namespace eclipse { + +class TaskExecutor : public Node { + public: + TaskExecutor(network::ClientHandler*); + ~TaskExecutor() = default; + + void job_accept(messages::Job* job, std::function fn); + void task_accept(messages::Task* task); + void task_accept_status(messages::TaskStatus* status); + void key_value_store(KeyValueShuffle* kv); + void insert_key_value(messages::KeyValueShuffle *kv); + void shuffle_is_done(uint32_t, uint32_t); + + void map_finish_notify(messages::FinishMap*); + void shuffle_finish_notify(uint32_t job_id); + void notify_map_is_finished(uint32_t job_id, std::vector); + void notify_task_leader(int, uint32_t, std::string); + + protected: + void schedule_reduce(messages::Job*); + void request_local_map(messages::Task*); + void request_local_reduce(messages::Task*); + + void write_key_value(messages::KeyValueShuffle *key_value); + void try_finish_map(uint32_t job_id); + + std::unordered_map tasks_remaining; + std::unordered_map> jobs_callback; + std::unordered_map> iwriters_; + std::set nodes_shuffling; + + std::multiset tasker_remaining_nodes_shuffling; + std::map tasker_remaining_job; + + uint32_t current_nodes_shuffling = 0; + uint32_t network_size; + DirectoryMR directory; + velox::DFS dfs; + Local_io local_io; +}; + +} diff --git a/src/mapreduce/task_executor_router.cc b/src/mapreduce/task_executor_router.cc new file mode 100644 index 0000000..8d58b4e --- /dev/null +++ b/src/mapreduce/task_executor_router.cc @@ -0,0 +1,73 @@ +#include "task_executor_router.hh" +#include "../common/context_singleton.hh" +#include "../messages/boost_impl.hh" +#include "messages/finish_shuffle.h" +#include +#include + +using namespace std; +using namespace eclipse; +using namespace eclipse::messages; +namespace ph = std::placeholders; + +// Constructor {{{ +TaskExecutorRouter::TaskExecutorRouter (TaskExecutor* exec, Router* router) : RouterDecorator(router) { + executor = exec; + + using namespace std::placeholders; + using std::placeholders::_1; + using std::placeholders::_2; + auto& rt = routing_table; + rt.insert({"Job", bind(&TaskExecutorRouter::job_accept, this, _1, _2)}); + rt.insert({"Task", bind(&TaskExecutorRouter::task_accept, this, _1, _2)}); + rt.insert({"TaskStatus", bind(&TaskExecutorRouter::task_status_accept, this, _1, _2)}); + rt.insert({"KeyValueShuffle", bind(&TaskExecutorRouter::keyvalue_accept, this, _1, _2)}); + rt.insert({"FinishMap", bind(&TaskExecutorRouter::finish_map, this, _1, _2)}); + rt.insert({"FinishShuffle", bind(&TaskExecutorRouter::finish_shuffle, this, _1, _2)}); + rt.insert({"NodesShuffling", bind(&TaskExecutorRouter::nodes_shuffling, this, _1, _2)}); +} +// }}} +// job_accept {{{ +void TaskExecutorRouter::job_accept(Message* m, Channel* channel) { + Job* job = dynamic_cast(m); + executor->job_accept(job, std::bind([](Channel* channel) { + Reply reply; + reply.message = "OK"; + channel->do_write(&reply); + }, + channel)); +} +// }}} +// task_accept {{{ +void TaskExecutorRouter::task_accept(messages::Message* m , Channel*) { + executor->task_accept(dynamic_cast(m)); +} +// }}} +// task_status_accept {{{ +void TaskExecutorRouter::task_status_accept(messages::Message* m, Channel*) { + executor->task_accept_status(dynamic_cast(m)); +} +// }}} +// keyvalue_accept {{{ +void TaskExecutorRouter::keyvalue_accept(messages::Message* m, Channel*) { + executor->key_value_store(dynamic_cast(m)); +} +// }}} +// finish_map {{{ +void TaskExecutorRouter::finish_map(messages::Message* m_, Channel*) { + FinishMap* fm = dynamic_cast (m_); + executor->map_finish_notify(fm); +} +// }}} +// finish_shuffle {{{ +void TaskExecutorRouter::finish_shuffle(messages::Message* m, Channel*) { + FinishShuffle* fs = dynamic_cast (m); + executor->shuffle_finish_notify(fs->job_id_); +} +// }}} +// nodes_shuffling {{{ +void TaskExecutorRouter::nodes_shuffling(messages::Message* m, Channel*) { + NodesShuffling* ns = dynamic_cast (m); + executor->shuffle_is_done(ns->job_id, ns->id); +} +// }}} diff --git a/src/mapreduce/task_executor_router.hh b/src/mapreduce/task_executor_router.hh new file mode 100644 index 0000000..90d12dc --- /dev/null +++ b/src/mapreduce/task_executor_router.hh @@ -0,0 +1,25 @@ +#pragma once +#include "../network/router_decorator.hh" +#include "task_executor.hh" + +namespace eclipse { + +//! +class TaskExecutorRouter: public RouterDecorator { + public: + TaskExecutorRouter(TaskExecutor*, Router*); + ~TaskExecutorRouter() {}; + + protected: + TaskExecutor* executor; + + void job_accept(messages::Message*, Channel*); + void task_accept(messages::Message*, Channel*); + void task_status_accept(messages::Message*, Channel*); + void keyvalue_accept(messages::Message*, Channel*); + void finish_map(messages::Message*, Channel*); + void finish_shuffle(messages::Message*, Channel*); + void nodes_shuffling(messages::Message*, Channel*); +}; + +} /* eclipse */ diff --git a/src/messages/IOoperation.cc b/src/messages/IOoperation.cc new file mode 100644 index 0000000..41ee118 --- /dev/null +++ b/src/messages/IOoperation.cc @@ -0,0 +1,4 @@ +#include "IOoperation.hh" + +using namespace eclipse::messages; +std::string IOoperation::get_type() const { return "IOoperation"; } diff --git a/src/messages/IOoperation.hh b/src/messages/IOoperation.hh new file mode 100644 index 0000000..ef57711 --- /dev/null +++ b/src/messages/IOoperation.hh @@ -0,0 +1,34 @@ +#pragma once +#include "message.hh" +#include "../common/block.hh" +#include "../common/blockmetadata.hh" + +namespace eclipse { +namespace messages { + +struct IOoperation: public Message { + std::string get_type() const override; + + enum class OpType { + BLOCK_INSERT, + BLOCK_INSERT_REPLICA, + BLOCK_DELETE, + BLOCK_DELETE_REPLICA, + BLOCK_REQUEST, + BLOCK_TRANSFER, + BLOCK_UPDATE, + BLOCK_UPDATE_REPLICA + }; + + OpType operation; + std::string option; + + uint64_t pos = 0; + uint64_t length = 0; + + BlockMetadata block_metadata; + Block block; +}; + +} +} diff --git a/src/messages/blockinfo.hh b/src/messages/blockinfo.hh index 6d816e3..cf0631e 100644 --- a/src/messages/blockinfo.hh +++ b/src/messages/blockinfo.hh @@ -11,7 +11,7 @@ namespace messages { std::string file_name; unsigned int seq; uint32_t hash_key; - uint32_t size; + uint64_t size; unsigned int type; int replica; std::string node; diff --git a/src/messages/blockrequest.hh b/src/messages/blockrequest.hh index d35de20..a40b315 100644 --- a/src/messages/blockrequest.hh +++ b/src/messages/blockrequest.hh @@ -8,8 +8,11 @@ namespace messages { struct BlockRequest: public Message { BlockRequest () = default; - std::string get_type() const override; + + uint64_t off; + uint64_t len; + bool should_read_partially = false; std::string name; uint32_t hash_key; }; diff --git a/src/messages/blockstatus.cc b/src/messages/blockstatus.cc new file mode 100644 index 0000000..4b6abdc --- /dev/null +++ b/src/messages/blockstatus.cc @@ -0,0 +1,5 @@ +#include "blockstatus.hh" + +using namespace eclipse::messages; + +std::string BlockStatus::get_type() const { return "BlockStatus"; } diff --git a/src/messages/blockstatus.hh b/src/messages/blockstatus.hh new file mode 100644 index 0000000..c3aac9c --- /dev/null +++ b/src/messages/blockstatus.hh @@ -0,0 +1,17 @@ +#pragma once +#include "message.hh" +#include + +namespace eclipse { +namespace messages { + +struct BlockStatus: public Message { + std::string get_type() const override; + + std::string name; + uint32_t hash_key; + bool success = false; +}; + +} +} diff --git a/src/messages/blockupdate.hh b/src/messages/blockupdate.hh index 5223c08..657057c 100644 --- a/src/messages/blockupdate.hh +++ b/src/messages/blockupdate.hh @@ -9,14 +9,14 @@ namespace messages { std::string name; std::string file_name; - unsigned int seq; - uint32_t hash_key; - uint32_t size; - int replica; + unsigned int seq = 0; + uint32_t hash_key = 0; + uint32_t size = 0; + int replica = 0; std::string content; - uint32_t pos; - uint32_t len; - bool is_header; + uint32_t pos = 0; + uint32_t len = 0; + bool is_header = false; }; } } diff --git a/src/messages/boost_impl.cc b/src/messages/boost_impl.cc index 3ac76fb..fcc3fc6 100644 --- a/src/messages/boost_impl.cc +++ b/src/messages/boost_impl.cc @@ -2,8 +2,9 @@ #include #include -#include #include +#include +#include #include #include #include @@ -67,6 +68,8 @@ template ar & BOOST_SERIALIZATION_NVP(c.replica); ar & BOOST_SERIALIZATION_NVP(c.reducer_output); ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.uploading); + ar & BOOST_SERIALIZATION_NVP(c.blocks_metadata); } template @@ -75,6 +78,8 @@ template ar & BOOST_SERIALIZATION_NVP(c.name); ar & BOOST_SERIALIZATION_NVP(c.size); ar & BOOST_SERIALIZATION_NVP(c.num_block); + ar & BOOST_SERIALIZATION_NVP(c.blocks_metadata); + ar & BOOST_SERIALIZATION_NVP(c.is_append); } template @@ -109,21 +114,6 @@ template ar & BOOST_SERIALIZATION_NVP(c.is_header); } -template - void serialize (Archive& ar, eclipse::messages::Task& c, unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.subjob_id); - ar & BOOST_SERIALIZATION_NVP(c.type); - ar & BOOST_SERIALIZATION_NVP(c.library); - ar & BOOST_SERIALIZATION_NVP(c.input_path); - ar & BOOST_SERIALIZATION_NVP(c.func_name); - ar & BOOST_SERIALIZATION_NVP(c.file_output); - ar & BOOST_SERIALIZATION_NVP(c.blocks); - ar & BOOST_SERIALIZATION_NVP(c.leader); - ar & BOOST_SERIALIZATION_NVP(c.func_body); - ar & BOOST_SERIALIZATION_NVP(c.lang); - } template void serialize (Archive& ar, eclipse::messages::FileList& c, unsigned int) { ar & BASE_OBJECT(Message, c); @@ -154,17 +144,20 @@ template ar & BASE_OBJECT(Message, c); ar & BOOST_SERIALIZATION_NVP(c.name); ar & BOOST_SERIALIZATION_NVP(c.hash_key); + ar & BOOST_SERIALIZATION_NVP(c.name); + ar & BOOST_SERIALIZATION_NVP(c.hash_key); + ar & BOOST_SERIALIZATION_NVP(c.off); + ar & BOOST_SERIALIZATION_NVP(c.len); + ar & BOOST_SERIALIZATION_NVP(c.should_read_partially); } template void serialize (Archive& ar, eclipse::messages::FileDescription& c, unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.name); - ar & BOOST_SERIALIZATION_NVP(c.size); + using eclipse::messages::FileInfo; + ar & BASE_OBJECT(FileInfo, c); ar & BOOST_SERIALIZATION_NVP(c.blocks); ar & BOOST_SERIALIZATION_NVP(c.hash_keys); ar & BOOST_SERIALIZATION_NVP(c.block_size); - ar & BOOST_SERIALIZATION_NVP(c.replica); } template @@ -203,173 +196,39 @@ template } template - void serialize(Archive& ar, eclipse::messages::IDataInsert& c, unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.map_id); - ar & BOOST_SERIALIZATION_NVP(c.num_reducer); - } - -template - void serialize(Archive& ar, eclipse::messages::IGroupInsert& c, unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.map_id); - ar & BOOST_SERIALIZATION_NVP(c.reducer_id); - ar & BOOST_SERIALIZATION_NVP(c.num_block); - } - -template - void serialize(Archive& ar, eclipse::messages::IBlockInsert& c, unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.map_id); - ar & BOOST_SERIALIZATION_NVP(c.reducer_id); - ar & BOOST_SERIALIZATION_NVP(c.block_seq); - } - -template - void serialize (Archive& ar, eclipse::messages::IDataList& c, unsigned int) { + void serialize (Archive& ar, eclipse::messages::BlockStatus& c, unsigned int) { ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.data); - - } - -template - void serialize (Archive& ar, eclipse::messages::IDataInfo& c, unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.map_id); - ar & BOOST_SERIALIZATION_NVP(c.num_reducer); - } - -template - void serialize(Archive& ar, eclipse::messages::IDataInfoRequest& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.map_id); - } - -template - void serialize(Archive& ar, eclipse::messages::IGroupInfoRequest& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.map_id); - ar & BOOST_SERIALIZATION_NVP(c.reducer_id); - } - -template - void serialize(Archive& ar, eclipse::messages::IBlockInfoRequest& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.map_id); - ar & BOOST_SERIALIZATION_NVP(c.reducer_id); - ar & BOOST_SERIALIZATION_NVP(c.block_seq); - } - -template - void serialize(Archive& ar, eclipse::messages::KeyValueShuffle& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id_); - ar & BOOST_SERIALIZATION_NVP(c.map_id_); - ar & BOOST_SERIALIZATION_NVP(c.key_); - ar & BOOST_SERIALIZATION_NVP(c.value_); - ar & BOOST_SERIALIZATION_NVP(c.is_header); - ar & BOOST_SERIALIZATION_NVP(c.number_of_keys); - } - -template - void serialize(Archive& ar, eclipse::messages::FinishShuffle& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id_); - ar & BOOST_SERIALIZATION_NVP(c.map_id_); - } - -template - void serialize(Archive& ar, eclipse::messages::TaskStatus& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.is_success); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.subjob_id); - ar & BOOST_SERIALIZATION_NVP(c.type); - } - -template - void serialize (Archive& ar, eclipse::messages::Job& c, unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.type); - ar & BOOST_SERIALIZATION_NVP(c.library); - ar & BOOST_SERIALIZATION_NVP(c.map_name); - ar & BOOST_SERIALIZATION_NVP(c.reduce_name); - ar & BOOST_SERIALIZATION_NVP(c.files); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.file_output); - ar & BOOST_SERIALIZATION_NVP(c.func_body); - ar & BOOST_SERIALIZATION_NVP(c.lang); - } - -template - void serialize(Archive& ar, eclipse::messages::JobStatus& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.is_success); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - } - -template - void serialize (Archive& ar, eclipse::messages::SubJob& c, unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.type); - ar & BOOST_SERIALIZATION_NVP(c.library); - ar & BOOST_SERIALIZATION_NVP(c.map_name); - ar & BOOST_SERIALIZATION_NVP(c.reduce_name); - ar & BOOST_SERIALIZATION_NVP(c.file); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.func_body); - ar & BOOST_SERIALIZATION_NVP(c.lang); - } - -template - void serialize(Archive& ar, eclipse::messages::SubJobStatus& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.is_success); - ar & BOOST_SERIALIZATION_NVP(c.job_id); - ar & BOOST_SERIALIZATION_NVP(c.subjob_id); - ar & BOOST_SERIALIZATION_NVP(c.type); - } - -template - void serialize(Archive& ar, eclipse::messages::IDataKeys& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.keys); - ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.name); + ar & BOOST_SERIALIZATION_NVP(c.hash_key); + ar & BOOST_SERIALIZATION_NVP(c.success); } template - void serialize(Archive& ar, eclipse::messages::FinishMap& c, - unsigned int) { - ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.job_id); + void serialize (Archive& ar, eclipse::BlockMetadata& c, unsigned int) { + ar & BOOST_SERIALIZATION_NVP(c.name); + ar & BOOST_SERIALIZATION_NVP(c.file_name); + ar & BOOST_SERIALIZATION_NVP(c.seq); + ar & BOOST_SERIALIZATION_NVP(c.hash_key); + ar & BOOST_SERIALIZATION_NVP(c.size); + ar & BOOST_SERIALIZATION_NVP(c.type); + ar & BOOST_SERIALIZATION_NVP(c.replica); + ar & BOOST_SERIALIZATION_NVP(c.node); + ar & BOOST_SERIALIZATION_NVP(c.l_node); + ar & BOOST_SERIALIZATION_NVP(c.r_node); + ar & BOOST_SERIALIZATION_NVP(c.is_committed); } - template - void serialize(Archive& ar, eclipse::messages::NodesShuffling& c, - unsigned int) { + void serialize (Archive& ar, eclipse::messages::IOoperation& c, unsigned int) { ar & BASE_OBJECT(Message, c); - ar & BOOST_SERIALIZATION_NVP(c.nodes); - ar & BOOST_SERIALIZATION_NVP(c.job_id); + ar & BOOST_SERIALIZATION_NVP(c.operation); + ar & BOOST_SERIALIZATION_NVP(c.option); + ar & BOOST_SERIALIZATION_NVP(c.pos); + ar & BOOST_SERIALIZATION_NVP(c.length); + ar & BOOST_SERIALIZATION_NVP(c.block); + ar & BOOST_SERIALIZATION_NVP(c.block_metadata); } - using namespace eclipse::messages; using namespace boost::archive; @@ -403,11 +262,6 @@ template void serialize (boost::archive::xml_iarchive&, KeyRequest&, unsigned); template void serialize (boost::archive::binary_iarchive&, KeyRequest&, unsigned); template void serialize (boost::archive::binary_oarchive&, KeyRequest&, unsigned); -template void serialize (boost::archive::xml_oarchive&, Task&, unsigned); -template void serialize (boost::archive::xml_iarchive&, Task&, unsigned); -template void serialize (boost::archive::binary_iarchive&, Task&, unsigned); -template void serialize (boost::archive::binary_oarchive&, Task&, unsigned); - template void serialize (boost::archive::xml_oarchive&, FileInfo&, unsigned); template void serialize (boost::archive::xml_iarchive&, FileInfo&, unsigned); template void serialize (boost::archive::binary_iarchive&, FileInfo&, unsigned); @@ -483,102 +337,31 @@ template void serialize (boost::archive::xml_iarchive&, MetaData&, unsigned); template void serialize (boost::archive::binary_iarchive&, MetaData&, unsigned); template void serialize (boost::archive::binary_oarchive&, MetaData&, unsigned); -template void serialize (boost::archive::xml_oarchive&, Job&, unsigned); -template void serialize (boost::archive::xml_iarchive&, Job&, unsigned); -template void serialize (boost::archive::binary_iarchive&, Job&, unsigned); -template void serialize (boost::archive::binary_oarchive&, Job&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, JobStatus&, unsigned); -template void serialize (boost::archive::xml_iarchive&, JobStatus&, unsigned); -template void serialize (boost::archive::binary_iarchive&, JobStatus&, unsigned); -template void serialize (boost::archive::binary_oarchive&, JobStatus&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, SubJob&, unsigned); -template void serialize (boost::archive::xml_iarchive&, SubJob&, unsigned); -template void serialize (boost::archive::binary_iarchive&, SubJob&, unsigned); -template void serialize (boost::archive::binary_oarchive&, SubJob&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, SubJobStatus&, unsigned); -template void serialize (boost::archive::xml_iarchive&, SubJobStatus&, unsigned); -template void serialize (boost::archive::binary_iarchive&, SubJobStatus&, unsigned); -template void serialize (boost::archive::binary_oarchive&, SubJobStatus&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, IDataKeys&, unsigned); -template void serialize (boost::archive::xml_iarchive&, IDataKeys&, unsigned); -template void serialize (boost::archive::binary_iarchive&, IDataKeys&, unsigned); -template void serialize (boost::archive::binary_oarchive&, IDataKeys&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, KeyValueShuffle&, unsigned); -template void serialize (boost::archive::xml_iarchive&, KeyValueShuffle&, unsigned); -template void serialize (boost::archive::binary_iarchive&, KeyValueShuffle&, unsigned); -template void serialize (boost::archive::binary_oarchive&, KeyValueShuffle&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, FinishShuffle&, unsigned); -template void serialize (boost::archive::xml_iarchive&, FinishShuffle&, unsigned); -template void serialize (boost::archive::binary_iarchive&, FinishShuffle&, unsigned); -template void serialize (boost::archive::binary_oarchive&, FinishShuffle&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, TaskStatus&, unsigned); -template void serialize (boost::archive::xml_iarchive&, TaskStatus&, unsigned); -template void serialize (boost::archive::binary_iarchive&, TaskStatus&, unsigned); -template void serialize (boost::archive::binary_oarchive&, TaskStatus&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, IDataInsert&, unsigned); -template void serialize (boost::archive::xml_iarchive&, IDataInsert&, unsigned); -template void serialize (boost::archive::binary_iarchive&, IDataInsert&, unsigned); -template void serialize (boost::archive::binary_oarchive&, IDataInsert&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, IGroupInsert&, unsigned); -template void serialize (boost::archive::xml_iarchive&, IGroupInsert&, unsigned); -template void serialize (boost::archive::binary_iarchive&, IGroupInsert&, unsigned); -template void serialize (boost::archive::binary_oarchive&, IGroupInsert&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, IDataInfoRequest&, unsigned); -template void serialize (boost::archive::xml_iarchive&, IDataInfoRequest&, unsigned); -template void serialize (boost::archive::binary_iarchive&, IDataInfoRequest&, unsigned); -template void serialize (boost::archive::binary_oarchive&, IDataInfoRequest&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, IGroupInfoRequest&, unsigned); -template void serialize (boost::archive::xml_iarchive&, IGroupInfoRequest&, unsigned); -template void serialize (boost::archive::binary_iarchive&, IGroupInfoRequest&, unsigned); -template void serialize (boost::archive::binary_oarchive&, IGroupInfoRequest&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, IBlockInfoRequest&, unsigned); -template void serialize (boost::archive::xml_iarchive&, IBlockInfoRequest&, unsigned); -template void serialize (boost::archive::binary_iarchive&, IBlockInfoRequest&, unsigned); -template void serialize (boost::archive::binary_oarchive&, IBlockInfoRequest&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, IDataList&, unsigned); -template void serialize (boost::archive::xml_iarchive&, IDataList&, unsigned); -template void serialize (boost::archive::binary_iarchive&, IDataList&, unsigned); -template void serialize (boost::archive::binary_oarchive&, IDataList&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, IDataInfo&, unsigned); -template void serialize (boost::archive::xml_iarchive&, IDataInfo&, unsigned); -template void serialize (boost::archive::binary_iarchive&, IDataInfo&, unsigned); -template void serialize (boost::archive::binary_oarchive&, IDataInfo&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, FinishMap&, unsigned); -template void serialize (boost::archive::xml_iarchive&, FinishMap&, unsigned); -template void serialize (boost::archive::binary_iarchive&, FinishMap&, unsigned); -template void serialize (boost::archive::binary_oarchive&, FinishMap&, unsigned); - -template void serialize (boost::archive::xml_oarchive&, NodesShuffling&, unsigned); -template void serialize (boost::archive::xml_iarchive&, NodesShuffling&, unsigned); -template void serialize (boost::archive::binary_iarchive&, NodesShuffling&, unsigned); -template void serialize (boost::archive::binary_oarchive&, NodesShuffling&, unsigned); +template void serialize (boost::archive::xml_oarchive&, BlockStatus&, unsigned); +template void serialize (boost::archive::xml_iarchive&, BlockStatus&, unsigned); +template void serialize (boost::archive::binary_iarchive&, BlockStatus&, unsigned); +template void serialize (boost::archive::binary_oarchive&, BlockStatus&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, eclipse::BlockMetadata&, unsigned); +template void serialize (boost::archive::xml_iarchive&, eclipse::BlockMetadata&, unsigned); +template void serialize (boost::archive::binary_iarchive&, eclipse::BlockMetadata&, unsigned); +template void serialize (boost::archive::binary_oarchive&, eclipse::BlockMetadata&, unsigned); + +template void serialize (boost::archive::xml_oarchive&, IOoperation&, unsigned); +template void serialize (boost::archive::xml_iarchive&, IOoperation&, unsigned); +template void serialize (boost::archive::binary_iarchive&, IOoperation&, unsigned); +template void serialize (boost::archive::binary_oarchive&, IOoperation&, unsigned); } } -//! 4) Also here +// 4) Also here BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::Boundaries); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::KeyValue); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::OffsetKeyValue); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::Control); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::KeyRequest); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::Task); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FileInfo); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FileUpdate); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FileList); @@ -594,21 +377,6 @@ BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::BlockDel); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FormatRequest); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FileExist); BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::MetaData); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::Job); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::JobStatus); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::SubJob); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::SubJobStatus); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataKeys); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::KeyValueShuffle); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FinishShuffle); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::TaskStatus); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataInsert); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IGroupInsert); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IBlockInsert); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataInfoRequest); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IGroupInfoRequest); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IBlockInfoRequest); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataList); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IDataInfo); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::FinishMap); -BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::NodesShuffling); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::BlockStatus); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::BlockMetadata); +BOOST_CLASS_EXPORT_IMPLEMENT(eclipse::messages::IOoperation); diff --git a/src/messages/boost_impl.hh b/src/messages/boost_impl.hh index 8e4539c..fbbe0fb 100644 --- a/src/messages/boost_impl.hh +++ b/src/messages/boost_impl.hh @@ -1,17 +1,16 @@ // -// This module implements the serialization using BOOST SERIALIZAITON -// For every new message class that you want to add you must add its -// functions here and its cc file. +//! @brief This module implements the serialization using BOOST SERIALIZAITON +//! For every new message class that you want to add you must add its +//! functions here and its cc file. // #pragma once -//! 1) Add your class here +// 1) Add your class here #include "message.hh" #include "boundaries.hh" #include "keyvalue.hh" #include "control.hh" #include "keyrequest.hh" -#include "task.hh" #include "fileinfo.hh" #include "filelist.hh" #include "blockinfo.hh" @@ -27,27 +26,11 @@ #include "blockupdate.hh" #include "fileupdate.hh" #include "metadata.hh" -#include "finishmap.hh" -#include "nodes_shuffling.hh" - -// MapReduce messages -#include "../mapreduce/messages/idatalist.hh" -#include "../mapreduce/messages/idatainfo.hh" -#include "../mapreduce/messages/idatainsert.hh" -#include "../mapreduce/messages/igroupinsert.hh" -#include "../mapreduce/messages/iblockinsert.hh" -#include "../mapreduce/messages/idatainforequest.hh" -#include "../mapreduce/messages/igroupinforequest.hh" -#include "../mapreduce/messages/iblockinforequest.hh" -#include "../mapreduce/messages/key_value_shuffle.h" -#include "../mapreduce/messages/finish_shuffle.h" -#include "taskstatus.hh" +#include "blockstatus.hh" +#include "IOoperation.hh" #include "offsetkv.hh" -#include "job.hh" -#include "jobstatus.hh" -#include "subjob.hh" -#include "subjobstatus.hh" -#include "idatakeys.hh" +#include "../common/blockmetadata.hh" + #include #include @@ -60,7 +43,7 @@ #endif #define ECNS eclipse::messages -//! 2) Also here +// 2) Also here namespace boost{ namespace serialization{ @@ -70,7 +53,6 @@ template void serialize (Archive&, eclipse::messages::KeyValu template void serialize (Archive&, eclipse::messages::OffsetKeyValue&, unsigned); template void serialize (Archive&, eclipse::messages::Control&, unsigned); template void serialize (Archive&, eclipse::messages::KeyRequest&, unsigned); -template void serialize (Archive&, eclipse::messages::Task&, unsigned); template void serialize (Archive&, eclipse::messages::FileInfo&, unsigned); template void serialize (Archive&, eclipse::messages::FileUpdate&, unsigned); template void serialize (Archive&, eclipse::messages::FileList&, unsigned); @@ -86,23 +68,9 @@ template void serialize (Archive&, eclipse::messages::BlockDe template void serialize (Archive&, eclipse::messages::FormatRequest&, unsigned); template void serialize (Archive&, eclipse::messages::FileExist&, unsigned); template void serialize (Archive&, eclipse::messages::MetaData&, unsigned); -template void serialize (Archive&, eclipse::messages::Job&, unsigned); -template void serialize (Archive&, eclipse::messages::JobStatus&, unsigned); -template void serialize (Archive&, eclipse::messages::SubJob&, unsigned); -template void serialize (Archive&, eclipse::messages::SubJobStatus&, unsigned); -template void serialize (Archive&, eclipse::messages::IDataKeys&, unsigned); -template void serialize (Archive&, eclipse::messages::KeyValueShuffle&, unsigned); -template void serialize (Archive&, eclipse::messages::FinishShuffle&, unsigned); -template void serialize (Archive&, eclipse::messages::TaskStatus&, unsigned); -template void serialize (Archive&, eclipse::messages::IDataInsert&, unsigned); -template void serialize (Archive&, eclipse::messages::IGroupInsert&, unsigned); -template void serialize (Archive&, eclipse::messages::IBlockInsert&, unsigned); -template void serialize (Archive&, eclipse::messages::IDataInfoRequest&, unsigned); -template void serialize (Archive&, eclipse::messages::IGroupInfoRequest&, unsigned); -template void serialize (Archive&, eclipse::messages::IBlockInfoRequest&, unsigned); -template void serialize (Archive&, eclipse::messages::IDataList&, unsigned); -template void serialize (Archive&, eclipse::messages::IDataInfo&, unsigned); -template void serialize (Archive&, eclipse::messages::NodesShuffling&, unsigned); +template void serialize (Archive&, eclipse::messages::BlockStatus&, unsigned); +template void serialize (Archive&, eclipse::BlockMetadata&, unsigned); +template void serialize (Archive&, eclipse::messages::IOoperation&, unsigned); } } @@ -110,13 +78,12 @@ template void serialize (Archive&, eclipse::messages::NodesSh BOOST_SERIALIZATION_ASSUME_ABSTRACT(ECNS::Message); #define TRACK_NEVER boost::serialization::track_never -//! 3) Also here +// 3) Also here BOOST_CLASS_EXPORT_KEY(eclipse::messages::Boundaries); BOOST_CLASS_EXPORT_KEY(eclipse::messages::KeyValue); BOOST_CLASS_EXPORT_KEY(eclipse::messages::OffsetKeyValue); BOOST_CLASS_EXPORT_KEY(eclipse::messages::Control); BOOST_CLASS_EXPORT_KEY(eclipse::messages::KeyRequest); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::Task); BOOST_CLASS_EXPORT_KEY(eclipse::messages::FileInfo); BOOST_CLASS_EXPORT_KEY(eclipse::messages::FileUpdate); BOOST_CLASS_EXPORT_KEY(eclipse::messages::FileList); @@ -132,26 +99,11 @@ BOOST_CLASS_EXPORT_KEY(eclipse::messages::BlockDel); BOOST_CLASS_EXPORT_KEY(eclipse::messages::FormatRequest); BOOST_CLASS_EXPORT_KEY(eclipse::messages::FileExist); BOOST_CLASS_EXPORT_KEY(eclipse::messages::MetaData); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::Job); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::JobStatus); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::SubJob); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::SubJobStatus); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataKeys); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::KeyValueShuffle); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::FinishShuffle); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::TaskStatus); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataInsert); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IGroupInsert); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IBlockInsert); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataInfoRequest); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IGroupInfoRequest); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IBlockInfoRequest); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataList); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::IDataInfo); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::FinishMap); -BOOST_CLASS_EXPORT_KEY(eclipse::messages::NodesShuffling); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::BlockStatus); +BOOST_CLASS_EXPORT_KEY(eclipse::BlockMetadata); +BOOST_CLASS_EXPORT_KEY(eclipse::messages::IOoperation); -//! 4) and here +// 4) and here BOOST_CLASS_TRACKING(eclipse::messages::Message, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::Boundaries, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::KeyValue, boost::serialization::track_never); @@ -162,7 +114,6 @@ BOOST_CLASS_TRACKING(eclipse::messages::FileInfo, boost::serialization::track_ne BOOST_CLASS_TRACKING(eclipse::messages::FileUpdate, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::BlockInfo, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::BlockUpdate, boost::serialization::track_never); -BOOST_CLASS_TRACKING(eclipse::messages::Task, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::FileList, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::Reply, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::CacheInfo, boost::serialization::track_never); @@ -173,23 +124,8 @@ BOOST_CLASS_TRACKING(eclipse::messages::FileDel, boost::serialization::track_nev BOOST_CLASS_TRACKING(eclipse::messages::BlockDel, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::FormatRequest, boost::serialization::track_never); BOOST_CLASS_TRACKING(eclipse::messages::FileExist, boost::serialization::track_never); -BOOST_CLASS_TRACKING(ECNS::IDataInsert, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::IGroupInsert, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::IBlockInsert, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::IDataInfoRequest, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::IGroupInfoRequest, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::IBlockInfoRequest, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::KeyValueShuffle, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::FinishShuffle, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::TaskStatus, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::Job, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::JobStatus, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::SubJob, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::SubJobStatus, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::IDataKeys, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::IDataList, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::IDataInfo, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::FinishMap, TRACK_NEVER); -BOOST_CLASS_TRACKING(ECNS::NodesShuffling, TRACK_NEVER); +BOOST_CLASS_TRACKING(eclipse::messages::BlockStatus, boost::serialization::track_never); +BOOST_CLASS_TRACKING(eclipse::BlockMetadata, boost::serialization::track_never); +BOOST_CLASS_TRACKING(eclipse::messages::IOoperation, boost::serialization::track_never); #undef ECNS #undef TRACK_NEVER diff --git a/src/messages/factory.cc b/src/messages/factory.cc index 23f0002..89b3d96 100644 --- a/src/messages/factory.cc +++ b/src/messages/factory.cc @@ -2,6 +2,7 @@ #include "../common/context_singleton.hh" #include +#include #include #include #include @@ -52,6 +53,8 @@ std::string* save_message (Message* m) { void send_message(boost::asio::ip::tcp::socket* socket, eclipse::messages::Message* msg) { + boost::asio::ip::tcp::no_delay option(true); + socket->set_option(option); string* to_send = save_message(msg); socket->send(boost::asio::buffer(*to_send)); } diff --git a/src/messages/factory.hh b/src/messages/factory.hh index 4139d93..ed2da0a 100644 --- a/src/messages/factory.hh +++ b/src/messages/factory.hh @@ -13,7 +13,6 @@ Message* load_message (boost::asio::streambuf&); std::string* save_message (Message*); void send_message(boost::asio::ip::tcp::socket*, eclipse::messages::Message*); -//template auto read_reply(boost::asio::ip::tcp::socket*); template auto read_reply(boost::asio::ip::tcp::socket* socket) { @@ -25,10 +24,12 @@ auto read_reply(boost::asio::ip::tcp::socket* socket) { read(*socket, buffer(header, 16)); size_t size_of_msg = atoi(header); - read(*socket, buf, transfer_exactly(size_of_msg)); + size_t l = read(*socket, buf.prepare(size_of_msg)); Message* msg = nullptr; + buf.commit(l); msg = load_message(buf); + buf.consume(l); T* m = dynamic_cast(msg); return std::unique_ptr(m); } diff --git a/src/messages/filedescription.cc b/src/messages/filedescription.cc index b95d2e6..860e33b 100644 --- a/src/messages/filedescription.cc +++ b/src/messages/filedescription.cc @@ -2,4 +2,18 @@ using namespace eclipse::messages; +FileDescription& FileDescription::operator=(FileDescription& rhs) { + FileInfo::operator=(rhs); + + this->blocks.clear(); + this->hash_keys.clear(); + this->block_size.clear(); + + this->blocks = rhs.blocks; + this->hash_keys = rhs.hash_keys; + this->block_size = rhs.block_size; + + return *this; +} + std::string FileDescription::get_type() const { return "FileDescription"; } diff --git a/src/messages/filedescription.hh b/src/messages/filedescription.hh index 28845c9..e8b3a2e 100644 --- a/src/messages/filedescription.hh +++ b/src/messages/filedescription.hh @@ -1,23 +1,22 @@ #pragma once -#include "message.hh" +#include "fileinfo.hh" #include namespace eclipse { namespace messages { -struct FileDescription: public Message { +struct FileDescription: public FileInfo { FileDescription() = default; ~FileDescription() = default; + FileDescription& operator=(FileDescription&); + std::string get_type() const override; - std::string name; - uint64_t size; std::vector blocks; std::vector hash_keys; - std::vector block_size; - int replica; + std::vector block_size; }; } diff --git a/src/messages/fileinfo.hh b/src/messages/fileinfo.hh index e11385a..75a6a72 100644 --- a/src/messages/fileinfo.hh +++ b/src/messages/fileinfo.hh @@ -1,6 +1,8 @@ #pragma once #include "message.hh" +#include "../common/blockmetadata.hh" +#include namespace eclipse { namespace messages { @@ -19,6 +21,9 @@ struct FileInfo: public Message { unsigned int replica = 0; bool reducer_output = false; uint32_t job_id = 0; + int uploading = 1; + + std::vector blocks_metadata; }; } diff --git a/src/messages/fileupdate.hh b/src/messages/fileupdate.hh index 3288506..12d0482 100644 --- a/src/messages/fileupdate.hh +++ b/src/messages/fileupdate.hh @@ -1,6 +1,8 @@ #pragma once #include "message.hh" +#include "../common/blockmetadata.hh" +#include namespace eclipse { namespace messages { @@ -14,6 +16,9 @@ struct FileUpdate: public Message { std::string name; uint64_t size; unsigned int num_block; + bool is_append = false; + + std::vector blocks_metadata; }; } diff --git a/src/messages/idatalist.cc b/src/messages/idatalist.cc deleted file mode 100644 index 48d6fd4..0000000 --- a/src/messages/idatalist.cc +++ /dev/null @@ -1,10 +0,0 @@ -#include "idatalist.hh" - -namespace eclipse { -namespace messages { - -IDataList::IDataList (std::vector v) : data(v) { } -std::string IDataList::get_type() const {return "IDataList"; } - -} -} diff --git a/src/messages/idatalist.hh b/src/messages/idatalist.hh deleted file mode 100644 index 99435a3..0000000 --- a/src/messages/idatalist.hh +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -#include "message.hh" -#include -#include -#include -#include "../mapreduce/messages/idatainfo.hh" - -namespace eclipse { -namespace messages { - -struct IDataList: public Message { - IDataList() = default; - IDataList(std::vector); - - std::string get_type() const override; - std::vector data; -}; - -} -} diff --git a/src/messages/message.hh b/src/messages/message.hh index 0981943..4e45c0f 100644 --- a/src/messages/message.hh +++ b/src/messages/message.hh @@ -15,7 +15,7 @@ namespace messages { struct Message { Message() = default; Message(int, int); - virtual ~Message () {} + virtual ~Message () = default; virtual std::string get_type() const = 0; diff --git a/src/messages/subjob.cc b/src/messages/subjob.cc deleted file mode 100644 index 472e6b8..0000000 --- a/src/messages/subjob.cc +++ /dev/null @@ -1,9 +0,0 @@ -#include "subjob.hh" - -namespace eclipse { -namespace messages { - -std::string SubJob::get_type() const { return "SubJob"; } - -} -} diff --git a/src/messages/subjob.hh b/src/messages/subjob.hh deleted file mode 100644 index b3bcb7f..0000000 --- a/src/messages/subjob.hh +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include "message.hh" -#include - -namespace eclipse { -namespace messages { - -struct SubJob: public Message { - std::string get_type() const override; - uint32_t job_id = 0; - - std::string type; - std::string library; - std::string map_name; - std::string reduce_name; - std::string file; - std::string func_body; - std::string lang; -}; - -} -} diff --git a/src/messages/subjobstatus.cc b/src/messages/subjobstatus.cc deleted file mode 100644 index e92c8ef..0000000 --- a/src/messages/subjobstatus.cc +++ /dev/null @@ -1,7 +0,0 @@ -#include "subjobstatus.hh" - -namespace eclipse { -namespace messages { -std::string SubJobStatus::get_type() const { return "SubJobStatus"; } -} -} diff --git a/src/messages/subjobstatus.hh b/src/messages/subjobstatus.hh deleted file mode 100644 index 7e161d7..0000000 --- a/src/messages/subjobstatus.hh +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once -#include "message.hh" - -namespace eclipse { -namespace messages { - -struct SubJobStatus: public Message { - std::string get_type() const override; - - uint32_t job_id = 0; - uint32_t subjob_id = 0; - bool is_success = false; - std::string type; -}; - -} -} diff --git a/src/network/acceptor.cc b/src/network/acceptor.cc deleted file mode 100644 index 890d7de..0000000 --- a/src/network/acceptor.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include "acceptor.hh" - -using namespace eclipse::network; - -// Constructor {{{ -Acceptor::Acceptor(int port_, NetObserver* o): - observer(o), - iosvc (context.io), - nodes (context.settings.get("network.nodes")), - port (port_) -{ } -// }}} -// listen {{{ -void Acceptor::listen () { - acceptor = std::make_unique (iosvc, - tcp::endpoint(tcp::v4(), port) ); - - spawn(iosvc, bind(&Acceptor::do_listen, this, _1)); -} -// }}} -// do_listen {{{ -void Acceptor::do_listen (boost::asio::yield_context yield) { - boost::system::error_code ec; - for (;;) { - auto socket = new tcp::socket(iosvc); - acceptor->async_accept (*socket, yield[ec]); - if (!ec) - observer->on_accept(socket); - } -} -// }}} diff --git a/src/network/acceptor.hh b/src/network/acceptor.hh deleted file mode 100644 index b2d1ea6..0000000 --- a/src/network/acceptor.hh +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include "netobserver.hh" -#include "../common/context_singleton.hh" -#include -#include -#include -#include -#include - -namespace eclipse { -namespace network { - -using boost::asio::ip::tcp; -using vec_str = std::vector; - -class Acceptor { - public: - Acceptor(int, NetObserver*); - void listen(); - - protected: - void do_listen(boost::asio::yield_context ); - - std::unique_ptr acceptor; - NetObserver* observer = nullptr; - boost::asio::io_service& iosvc; - vec_str nodes; - int port = 0; -}; - -} /* network */ -} diff --git a/src/network/asyncchannel.cc b/src/network/asyncchannel.cc index 86f18af..4e0f68e 100644 --- a/src/network/asyncchannel.cc +++ b/src/network/asyncchannel.cc @@ -1,5 +1,6 @@ #include "asyncchannel.hh" #include "../messages/factory.hh" +#include "../common/context_singleton.hh" #include #include #include @@ -22,25 +23,24 @@ using namespace boost::system; using namespace boost::archive; // constructor {{{ -AsyncChannel::AsyncChannel(tcp::socket* s, tcp::socket* r, NetObserver* node_, int i) : +AsyncChannel::AsyncChannel(NetObserver* node_) : node (node_), - sender(s), - receiver(r), - id(i) + socket(context.io), + iosvc(context.io) { - if (s == nullptr or r == nullptr) { - ERROR("NULL pointer passed to asyncchannel %p %p", s, r); - exit(EXIT_FAILURE); - } is_writing.store(false); } AsyncChannel::~AsyncChannel() { - if (receiver!= nullptr) { - receiver->close(); - delete receiver; - } +// INFO("Socket is destroyed to/from %s", host.c_str()); + socket.close(); + } // }}} +// get_socket {{{ +tcp::socket& AsyncChannel::get_socket() { + return socket; +} +//}}} // do_write str {{{ void AsyncChannel::do_write(std::shared_ptr& str_p) { messages_queue.push(str_p); @@ -59,21 +59,70 @@ void AsyncChannel::do_write(Message* m) { } } // }}} +// do_write_buffer {{{ +void AsyncChannel::do_write_buffer() { + do_write_impl(); +} +// }}} +// commit{{{ +void AsyncChannel::commit(std::shared_ptr& str_p) { + messages_queue.push(str_p); +} +//}}} // do_write_impl {{{ void AsyncChannel::do_write_impl() { auto to_write = messages_queue.front(); - async_write (*sender, buffer(*to_write), transfer_exactly(to_write->size()), - boost::bind (&AsyncChannel::on_write, this, ph::error, ph::bytes_transferred)); + host = socket.remote_endpoint().address().to_string().c_str(); + port = socket.remote_endpoint().port(); + async_write (socket, buffer(*to_write), transfer_exactly(to_write->size()), + boost::bind (&AsyncChannel::on_write, shared_from_this(), ph::error, ph::bytes_transferred)); } // }}} // on_write {{{ void AsyncChannel::on_write(const boost::system::error_code& ec, size_t s) { if (ec) { - INFO("Message could not reach err=%s", - ec.message().c_str()); + WARN("Message could not reach err=%s (transferred %lu B) host %s", + ec.message().c_str(), s, host.c_str()); + + if (ec == boost::asio::error::connection_reset) { + INFO("Reconnecting to %s %u", host.c_str(), port); + + try { + socket.close(); + tcp::resolver resolver (context.io); + tcp::resolver::query query (host, to_string(port)); + tcp::resolver::iterator it(resolver.resolve(query)); + auto ep = make_unique(*it); + boost::system::error_code ec; + auto self(shared_from_this()); + std::function connect_callback = + [&, self] (const boost::system::error_code& ec) { + + if (ec) { + if(ec == boost::asio::error::timed_out) { + self->get_socket().close(); + socket.async_connect(*ep, connect_callback); + } + + ERROR("Failed to reconnect err=%s host %s", + ec.message().c_str(), host.c_str()); + exit(EXIT_FAILURE); + + } else { + do_write_impl(); + } + }; + + socket.async_connect(*ep, connect_callback); + + } catch (exception& e) { + INFO("Mapper exception %s", e.what()); + } catch (boost::exception& e) { + INFO("Mapper exception %s", diagnostic_information(e).c_str()); + } + } - do_write_impl(); } else { messages_queue.pop(); @@ -88,49 +137,61 @@ void AsyncChannel::on_write(const boost::system::error_code& ec, // do_read {{{ void AsyncChannel::do_read () { DEBUG("Connection established, starting to read"); - spawn(iosvc, bind(&AsyncChannel::read_coroutine, this, _1)); + spawn(iosvc, boost::bind(&AsyncChannel::read_coroutine, shared_from_this(), _1)); } // }}} // read_coroutine {{{ +//! @note This is a coroutine, if you don't know what is it look it up before going crazy. +//! @todo Fix this exception hell. +//! @date February 9th, 2017 void AsyncChannel::read_coroutine (yield_context yield) { boost::asio::streambuf buf; boost::system::error_code ec; char header [header_size + 1] = {'\0'}; - header[16] = '\0'; - Message* msg = nullptr; - while (true) { - try { - size_t l = async_read(*receiver, buffer(header, header_size), yield[ec]); - if (l != (size_t)header_size) - throw std::runtime_error("header size"); + try { + while (true) { + auto keep_alive = shared_from_this(); + //! Read header of incoming message, we know its size. + size_t recv = async_read(socket, buffer(header, header_size), yield[ec]); - if (ec) { - throw std::runtime_error("EC error"); - } + if (recv != (size_t)header_size or ec) + throw std::runtime_error("header error"); + + DEBUG("Header has arrived"); - size_t size = atoi(header); - l = read (*receiver, buf, transfer_exactly(size)); - if (l != size) { - throw std::runtime_error("body size"); + //! The header gives us the length of the incoming message. + //! Note, that buf.prepare is the fastest way to read. + size_t size = strtoul(header, NULL, 10); + try { + const auto& read_buffer = buf.prepare(size); + INFO("READING %lu", size); + recv = async_read(socket, read_buffer, yield[ec]); + } catch (std::bad_alloc& e) { + ERROR("Running out of memory"); } - } catch (std::exception& e) { - if (ec == boost::asio::error::eof) - INFO("AsyncChannel: Closing channel and socket"); + if (recv != size or ec) + throw std::runtime_error("body error"); - else - INFO("AsyncChannel: unformed header arrived from host %s, ex: %s", - receiver->remote_endpoint().address().to_string().c_str(), e.what()); + buf.commit(recv); + unique_ptr msg {load_message(buf)}; + buf.consume(recv); - node->on_disconnect(nullptr, id); - break; + DEBUG("Package has been deserialized"); + node->on_read(msg.get(), this); } + } catch (std::exception& e) { + if (ec == boost::asio::error::eof) + DEBUG("AsyncChannel: Closing server socket to client"); - msg = load_message(buf); - node->on_read(msg, id); - delete msg; - msg=nullptr; + else + ERROR("AsyncChannel: unformed message arrived from host %s, ex: %s", + socket.remote_endpoint().address().to_string().c_str(), e.what()); + + } catch (boost::exception& e) { + INFO("read exception %s", diagnostic_information(e).c_str()); } + } // }}} diff --git a/src/network/asyncchannel.hh b/src/network/asyncchannel.hh index f7ac640..1f5e0ac 100644 --- a/src/network/asyncchannel.hh +++ b/src/network/asyncchannel.hh @@ -1,36 +1,63 @@ #pragma once #include "channel.hh" -#include "asyncnode.hh" #include "../messages/message.hh" #include "netobserver.hh" + #include -#include #include #include +#include #include +#include +#include namespace eclipse { namespace network { -class AsyncChannel: public Channel { +using boost::asio::ip::tcp; +//! @brief Represent an opened channel between two endpoints. +//! +//! @attention It's intended to be used as a base class. +//! @attention Calling the constructor is not enought, +//! you should also call init function. +class AsyncChannel: public Channel, public std::enable_shared_from_this { public: - AsyncChannel(tcp::socket*, tcp::socket*, NetObserver*, int); - ~AsyncChannel(); - void do_write (messages::Message*) override; - void do_write (std::shared_ptr&); - void do_write_impl (); - void do_read (); + //! @param node_ Observer object + AsyncChannel(NetObserver* node_); + virtual ~AsyncChannel() ; + + //! @brief Write asynchronously the message. + void do_write(messages::Message*) override; + + //! @brief Write asynchronously the message sharing the payload. + void do_write(std::shared_ptr&); + + //! @brief Write asynchronously the message sharing the payload. + void do_write_buffer(); - protected: - void on_write (const boost::system::error_code&, size_t); + //! @brief This method implements the reading loop. + void do_read(); - void read_coroutine (boost::asio::yield_context); + //! @brief Get internal socket. + tcp::socket& get_socket(); + + //! @brief Add a message to the sending queue. + void commit(std::shared_ptr&); + + private: + void on_write(const boost::system::error_code&, size_t); + void do_write_impl(); + + void read_coroutine(boost::asio::yield_context); NetObserver* node = nullptr; - tcp::socket *sender, *receiver; - int id; + tcp::socket socket; std::queue> messages_queue; std::atomic is_writing; + boost::asio::io_service& iosvc; + + std::string host; + uint32_t port; }; } diff --git a/src/network/asyncnetwork.hh b/src/network/asyncnetwork.hh deleted file mode 100644 index 675f741..0000000 --- a/src/network/asyncnetwork.hh +++ /dev/null @@ -1,221 +0,0 @@ -#pragma once -#include "network.hh" -#include "asyncnode.hh" -#include "netobserver.hh" -#include "acceptor.hh" -#include "../messages/factory.hh" -#include "connector.hh" -#include -#include -#include - -namespace eclipse { -namespace network { - -using vec_str = std::vector; -using boost::asio::ip::tcp; - -template -class AsyncNetwork: public Network, public NetObserver { - public: - AsyncNetwork(int); - ~AsyncNetwork (); - - bool establish() override; - bool close () override; - size_t size () override; - bool send(int, messages::Message*) override; - bool send_and_replicate(std::vector, messages::Message* m) override; - void attach(AsyncNode*) override; - - void on_accept(tcp::socket*) override; - void on_connect(tcp::socket*) override; - void on_disconnect(tcp::socket*, int) override; - void on_read(messages::Message*, int) override; - - - private: - int id_of(tcp::socket*); - void start_reading(); - bool is_completed_network(); - - AsyncNode* node; - vec_str nodes; - - Acceptor acceptor; - Connector connector; - int net_size = 0; - - std::map> sockets; - std::map> channels; - std::atomic accepted_size; - std::atomic connected_size; - std::mutex acceptor_mutex; -}; -// Constructor {{{ -template -AsyncNetwork::AsyncNetwork (int port): - nodes(context.settings.get ("network.nodes")), - acceptor(port, this), - connector(port, this), - accepted_size(0), - connected_size(0) -{ - if (TYPE::is_multiple()) - net_size = nodes.size() - 1; - - else - net_size = 1; -} -template -AsyncNetwork::~AsyncNetwork () { } -// }}} -// establish {{{ -template -bool AsyncNetwork::establish () { - acceptor.listen(); - - if (TYPE::is_multiple()) connector.establish(); - - return true; -} -// }}} -// close {{{ -template -bool AsyncNetwork::close () { - return true; -} -// }}} -// size {{{ -template -size_t AsyncNetwork::size () { - return channels.size(); -} -// }}} -// send_and_replicate {{{ -template -bool AsyncNetwork::send_and_replicate(std::vector node_indices, messages::Message* m) { - std::lock_guard lck (acceptor_mutex); - shared_ptr message_serialized (save_message(m)); - for (auto i : node_indices) { - channels[i]->do_write(message_serialized); - } - return true; -} -// }}} -// send {{{ -template -bool AsyncNetwork::send (int i, messages::Message* m) { - std::lock_guard lck (acceptor_mutex); - channels[i]->do_write(m); - return true; -} -// }}} -// on_accept {{{ -template -void AsyncNetwork::on_accept (tcp::socket* sock) { - std::lock_guard lck (acceptor_mutex); - if (not TYPE::is_multiple()) { - channels.emplace (accepted_size.load(), std::make_unique (sock, sock, this, accepted_size.load())); - accepted_size++; - channels[accepted_size.load() - 1]->do_read(); - - } else { - auto i = id_of (sock); - if (sockets.find(i) == sockets.end()) - sockets.insert({i, {nullptr, sock}}); - else - sockets[i].second = sock; - - accepted_size++; - - if (is_completed_network()) - start_reading(); - } -} -// }}} -// on_connect {{{ -template -void AsyncNetwork::on_connect (tcp::socket* sock) { - std::lock_guard lck (acceptor_mutex); - auto i = id_of (sock); - - if (sockets.find(i) == sockets.end()) - sockets.insert({i, {sock, nullptr}}); - else - sockets[i].first= sock; - - connected_size++; - - if (is_completed_network()) start_reading(); -} -// }}} -// on_disconnect {{{ -template -void AsyncNetwork::on_disconnect (tcp::socket* sock, int id) { - INFO("Socket disconnecting"); - if (TYPE::is_multiple()) - connected_size--; - - accepted_size--; - - std::lock_guard lck (acceptor_mutex); - channels.erase(id); -} -// }}} -// completed_network {{{ -template -bool AsyncNetwork::is_completed_network () { - if (TYPE::is_multiple() and accepted_size.load() >= net_size and connected_size.load() >= net_size) - return true; - - if (not TYPE::is_multiple() and accepted_size.load() >= 1) return true; - - return false; -} -// }}} -// id_of {{{ -template -int AsyncNetwork::id_of (tcp::socket* sock) { - auto ip = sock->remote_endpoint().address().to_string(); - - auto idx = std::find(nodes.begin(), nodes.end(), ip) - nodes.begin(); - return idx; -} -// }}} -// start_reading {{{ -template -void AsyncNetwork::start_reading () { - if (TYPE::is_multiple()) { - - for (auto& sp : sockets) { - if (channels.find(sp.first) != channels.end()) - channels.erase(sp.first); - - channels.emplace (sp.first, std::make_unique (sp.second.first, sp.second.second, this, sp.first)); - } - sockets.clear(); - for (auto& channel : channels) - channel.second->do_read(); - } - - if (not TYPE::is_multiple()) - channels[accepted_size.load() - 1]->do_read(); - - node->on_connect(); -} -// }}} -// on_read {{{ -template -void AsyncNetwork::on_read (messages::Message* m , int id) { - node->on_read(m, id); -} -// }}} -// attach {{{ -template -void AsyncNetwork::attach (AsyncNode* node_) { - node = node_; -} -// }}} -} -} /* eclipse */ diff --git a/src/network/asyncnode.hh b/src/network/asyncnode.hh deleted file mode 100644 index d59e7a2..0000000 --- a/src/network/asyncnode.hh +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include "../messages/message.hh" - -namespace eclipse { - -class AsyncNode { - public: - virtual ~AsyncNode () { }; - virtual void on_connect() = 0; - virtual void on_disconnect(int) = 0; - virtual void on_read(messages::Message*, int) = 0; -}; - -} /* eclipse */ diff --git a/src/network/channel.cc b/src/network/channel.cc deleted file mode 100644 index b9482da..0000000 --- a/src/network/channel.cc +++ /dev/null @@ -1,9 +0,0 @@ -#include "channel.hh" - -using namespace eclipse::network; - -Channel::Channel () : - iosvc (context.io), - port (context.settings.get("network.ports.internal")), - logger (context.logger.get()) -{} diff --git a/src/network/channel.hh b/src/network/channel.hh index ed0ff30..48fe6e6 100644 --- a/src/network/channel.hh +++ b/src/network/channel.hh @@ -1,27 +1,18 @@ #pragma once #include "../messages/message.hh" -#include "../common/context_singleton.hh" -#include -#include namespace eclipse { namespace network { -using boost::asio::ip::tcp; const int header_size = 16; -class Channel { +class Channel { public: - Channel (); - ~Channel () = default; + Channel () = default; + virtual ~Channel () = default; virtual void do_write (messages::Message*) = 0; - - protected: - boost::asio::io_service& iosvc; - int port; - Logger* logger = nullptr; }; } diff --git a/src/network/client_handler.cc b/src/network/client_handler.cc new file mode 100644 index 0000000..021994a --- /dev/null +++ b/src/network/client_handler.cc @@ -0,0 +1,118 @@ +#include "client_handler.hh" +#include "../messages/factory.hh" +#include "../common/context_singleton.hh" +#include +#include +#include +#include +#include +#include + +using namespace eclipse::network; +using namespace std; + +mutex mut; +// Constructor {{{ +ClientHandler::ClientHandler (uint32_t p): + nodes(context.settings.get ("network.nodes")), + port(p) +{ +} +// }}} +// connect {{{ +void ClientHandler::connect(uint32_t i, shared_ptr server) { + spawn(context.io, [&, server_copy=server, node=nodes[i], p=this->port](boost::asio::yield_context yield) { + try { + shared_ptr s = server_copy; + boost::system::error_code ec; + tcp::resolver resolver (context.io); + tcp::resolver::query query (node, to_string(p)); + + auto it = resolver.async_resolve(query, yield[ec]); + if (ec) { + ERROR("Resolving %s:%u", node.c_str(), p); + return; + } + + tcp::endpoint ep (*it); + while (true) { + s->get_socket().async_connect(ep, yield[ec]); + if (ec) { + if(ec == boost::asio::error::timed_out) { + s->get_socket().close(); + WARN("Re-connecting to %s:%u", node.c_str(), p); + continue; + } + ERROR("Connecting %s:%u ec=%s", node.c_str(), p, ec.message().c_str()); + return; + } + break; + } + + boost::asio::ip::tcp::no_delay option(true); + s->get_socket().set_option(option); + +// current_servers.insert({i, s}); + s->do_write_buffer(); + + } catch (exception& e) { + INFO("Connect handler exception %s", e.what()); + } catch (boost::exception& e) { + INFO("Connect handler exception %s", diagnostic_information(e).c_str()); + } + }); +} +// }}} +// send {{{ +bool ClientHandler::send(uint32_t i, messages::Message* m) { + if (i >= nodes.size()) return false; + // mut.lock(); + + // If connection is still on. + //if (current_servers.find(i) != current_servers.end()) { + // shared_ptr ptr = current_servers[i].lock(); + // if (ptr) { + // ptr->do_write(m); + // return true; + // } + //} + + auto server = make_shared(node); + shared_ptr message_serialized (save_message(m)); + server->commit(message_serialized); + connect(i, server); + + //mut.unlock(); + + return true; +} +// }}} +// send str{{{ +bool ClientHandler::send(uint32_t i, shared_ptr str) { + if (i >= nodes.size()) return false; + + // If connection is still on. + if (current_servers.find(i) != current_servers.end()) { + shared_ptr ptr = current_servers[i].lock(); + if (ptr) { + ptr->do_write(str); + return true; + } + } + + auto server = make_shared(node); + server->commit(str); + connect(i, server); + + return true; +} +// }}} +// send_and_replicate {{{ +bool ClientHandler::send_and_replicate(std::vector node_indices, messages::Message* m) { + shared_ptr message_serialized (save_message(m)); + for (auto i : node_indices) { + send(i, message_serialized); + } + return true; +} +// }}} diff --git a/src/network/client_handler.hh b/src/network/client_handler.hh new file mode 100644 index 0000000..3d8b69c --- /dev/null +++ b/src/network/client_handler.hh @@ -0,0 +1,35 @@ +#pragma once +#include "../messages/message.hh" +#include "netobserver.hh" +#include "server.hh" +#include + +namespace eclipse { +namespace network { + +using boost::asio::ip::tcp; +using vec_str = std::vector; + +class ClientHandler { + public: + ClientHandler(uint32_t port); + ~ClientHandler() = default; + + bool send(uint32_t i, messages::Message* m); + bool send_and_replicate(std::vector, messages::Message*); + + private: + bool send(uint32_t i, std::shared_ptr str); + void connect(uint32_t i, std::shared_ptr server); + + //! Only for internal network, IO_SERVER is owner of + //! it. It can be freed any time. + std::map> current_servers; + + vec_str nodes; + NetObserver* node; + uint32_t port; +}; + +} +} /* eclipse */ diff --git a/src/network/connector.cc b/src/network/connector.cc deleted file mode 100644 index 7f6723d..0000000 --- a/src/network/connector.cc +++ /dev/null @@ -1,47 +0,0 @@ -#include "connector.hh" -#include - -using namespace std; -using namespace eclipse::network; -namespace ph = boost::asio::placeholders; - -Connector::Connector(int p, NetObserver* o) : - nodes (context.settings.get("network.nodes")), - ip_of_this(context.settings.getip()), - observer(o), - iosvc(context.io), - port(p) {} - -// establish {{{ -void Connector::establish () { - for (auto node : nodes) { - if (node != ip_of_this) - do_connect(node); - } -} -// }}} -// do_connect {{{ -void Connector::do_connect (std::string node) { - tcp::resolver resolver (iosvc); - tcp::resolver::query query (node, to_string(port)); - tcp::resolver::iterator it (resolver.resolve(query)); - auto ep = new tcp::endpoint (*it); - auto sock = new tcp::socket(iosvc); - - sock->async_connect (*ep, boost::bind (&Connector::on_connect, this, - ph::error, ep, sock)); -} -// }}} -// on_connect {{{ -void Connector::on_connect (const boost::system::error_code& ec, - tcp::endpoint* ep, tcp::socket* sock) { - - if(!ec) { - delete ep; - observer->on_connect(sock); - - } else { - sock->async_connect (*ep, bind (&Connector::on_connect, this, - ph::error, ep, sock)); - } -} diff --git a/src/network/connector.hh b/src/network/connector.hh deleted file mode 100644 index 325d4a9..0000000 --- a/src/network/connector.hh +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#include "netobserver.hh" -#include "../common/context_singleton.hh" -#include -#include -#include -#include -#include - -namespace eclipse { -namespace network { - -using vec_str = std::vector; - -class Connector { - public: - Connector(int, NetObserver*); - ~Connector() = default; - - void establish (); - - protected: - void do_connect (std::string); - void on_connect (const boost::system::error_code&, tcp::endpoint*, - tcp::socket*); - - vec_str nodes; - std::string ip_of_this; - NetObserver* observer = nullptr; - boost::asio::io_service& iosvc; - int port = 0; -}; - -} -} diff --git a/src/network/netobserver.hh b/src/network/netobserver.hh index df5cdb1..01c6bb1 100644 --- a/src/network/netobserver.hh +++ b/src/network/netobserver.hh @@ -1,18 +1,14 @@ #pragma once #include "../messages/message.hh" -#include +#include "channel.hh" namespace eclipse { namespace network { -using boost::asio::ip::tcp; struct NetObserver { virtual ~NetObserver() { }; - virtual void on_connect(tcp::socket*) = 0; - virtual void on_accept(tcp::socket*) = 0; - virtual void on_disconnect(tcp::socket*, int) = 0; - virtual void on_read (messages::Message*, int) = 0; + virtual void on_read (messages::Message*, Channel*) = 0; }; } diff --git a/src/network/network.hh b/src/network/network.hh deleted file mode 100644 index 63d27e5..0000000 --- a/src/network/network.hh +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once -#include "../messages/message.hh" -#include "asyncnode.hh" -#include - -namespace eclipse { -namespace network { - -class Network { - public: - virtual ~Network () { } - - virtual bool establish() = 0; - virtual bool close () = 0; - virtual size_t size () = 0; - virtual bool send(int, messages::Message*) = 0; - virtual bool send_and_replicate(std::vector, messages::Message*) = 0; - virtual void attach (AsyncNode*) = 0; -}; - -} -} /* eclipse */ diff --git a/src/network/p2p.cc b/src/network/p2p.cc deleted file mode 100644 index d516e70..0000000 --- a/src/network/p2p.cc +++ /dev/null @@ -1,12 +0,0 @@ -#include "p2p.hh" - -using namespace eclipse::network; - -// constructor {{{ -P2P::P2P(tcp::socket* a, tcp::socket* b, NetObserver* node_, int i) : - AsyncChannel(a, b, node_, i) -{ } -// }}} -// is_multiple {{{ -bool P2P::is_multiple () { return true; } -// }}} diff --git a/src/network/p2p.hh b/src/network/p2p.hh deleted file mode 100644 index cbf5f62..0000000 --- a/src/network/p2p.hh +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include "asyncchannel.hh" - -namespace eclipse { -namespace network { - -// @pre server and client socket should be open and not null -class P2P: public AsyncChannel { - public: - P2P(tcp::socket*, tcp::socket*, NetObserver*, int); - static bool is_multiple(); -}; - -} -} diff --git a/src/network/router.hh b/src/network/router.hh new file mode 100644 index 0000000..c2b6197 --- /dev/null +++ b/src/network/router.hh @@ -0,0 +1,32 @@ +#pragma once +#include "../network/netobserver.hh" +#include +#include + +namespace eclipse { +using namespace eclipse::network; + +//! @addtogroup RouterDecorator +//! @{ +//! @brief Router \b component entity of Router decorator pattern. +//! +//! Router is intended to be used as a decorator component. +//! The reason of using decorator is to make possible add +//! multiple functionabilities in a flexible way. This is, +//! We might have multiple component which needs to be connected +//! to our network. +//! +//! Before adding your router, get familiar with decorator pattern. +class Router: public NetObserver { + public: + Router() = default; + virtual ~Router() = default; + + virtual void on_read(messages::Message*, Channel*) = 0; + + protected: + std::map> routing_table; +}; + +} /* eclipse */ +//! @} diff --git a/src/network/router_decorator.cc b/src/network/router_decorator.cc new file mode 100644 index 0000000..f58410f --- /dev/null +++ b/src/network/router_decorator.cc @@ -0,0 +1,14 @@ +#include "router_decorator.hh" + +using namespace eclipse; + +RouterDecorator::RouterDecorator(Router* _router) : router(_router) { } + +void RouterDecorator::on_read(messages::Message* m, Channel* c) { + auto type = m->get_type(); + if (routing_table.find(type) != routing_table.end()) { + routing_table[type](m, c); + } else { + router->on_read(m, c); + } +} diff --git a/src/network/router_decorator.hh b/src/network/router_decorator.hh new file mode 100644 index 0000000..4a7ad88 --- /dev/null +++ b/src/network/router_decorator.hh @@ -0,0 +1,19 @@ +#pragma once +#include "router.hh" +#include + +namespace eclipse { + +//! +class RouterDecorator: public Router { + public: + RouterDecorator(Router*); + virtual ~RouterDecorator() = default; + + virtual void on_read(messages::Message*, Channel*); + + protected: + std::unique_ptr router; +}; + +} /* eclipse */ diff --git a/src/network/server.cc b/src/network/server.cc index 54a7822..77b861b 100644 --- a/src/network/server.cc +++ b/src/network/server.cc @@ -1,11 +1,13 @@ #include "server.hh" using namespace eclipse::network; +using namespace std; // constructor {{{ -Server::Server(tcp::socket*, tcp::socket* s, NetObserver* node_, int i) : - AsyncChannel(s, s, node_, i) -{ } +Server::Server(NetObserver* node_) : + AsyncChannel(node_) +{ +} // }}} // is_multiple {{{ bool Server::is_multiple () { return false; } diff --git a/src/network/server.hh b/src/network/server.hh index 05b40f6..1bee3f3 100644 --- a/src/network/server.hh +++ b/src/network/server.hh @@ -6,7 +6,7 @@ namespace network { class Server: public AsyncChannel { public: - Server(tcp::socket*,tcp::socket*, NetObserver*, int); + Server(NetObserver*); static bool is_multiple(); }; diff --git a/src/network/server_handler.cc b/src/network/server_handler.cc new file mode 100644 index 0000000..59668e6 --- /dev/null +++ b/src/network/server_handler.cc @@ -0,0 +1,63 @@ +#include "server_handler.hh" +#include "../common/context_singleton.hh" +#include "server.hh" +#include +#include +#include +#include +#include +#include +#include + +using namespace eclipse::network; +using namespace std; +using vec_str = std::vector; + +// Constructor {{{ +ServerHandler::ServerHandler (uint32_t p): + port(p) +{ +} +// }}} +// establish {{{ +bool ServerHandler::establish () { + auto& iosvc = context.io; + + spawn(iosvc,[&, p=this->port](boost::asio::yield_context yield) { + INFO("Listening at port %u", p); + tcp::acceptor acceptor (iosvc, tcp::endpoint(tcp::v4(), p) ); + acceptor.listen(1); + boost::system::error_code ec; + for (;;) { + try { + auto server = make_shared(node); + acceptor.async_accept(server->get_socket(), yield[ec]); + + DEBUG("Client accepted"); + if (!ec) { + boost::asio::ip::tcp::no_delay option(true); + server->get_socket().set_option(option); + server->do_read(); + } else + ERROR("ERROR in acceptor reason: %s", ec.message().c_str()); + + } catch (exception& e) { + INFO("Server exception %s", e.what()); + } catch (boost::exception& e) { + INFO("Acceptor exception %s", diagnostic_information(e).c_str()); + } + } + }); + return true; +} +// }}} +// close {{{ +bool ServerHandler::close () { + return true; +} +// }}} +// attach {{{ +void ServerHandler::attach (NetObserver* node_) { + node = node_; +} +// }}} diff --git a/src/network/server_handler.hh b/src/network/server_handler.hh new file mode 100644 index 0000000..8300c20 --- /dev/null +++ b/src/network/server_handler.hh @@ -0,0 +1,23 @@ +#pragma once +#include "netobserver.hh" + +namespace eclipse { +namespace network { + +class ServerHandler { + public: + ServerHandler(uint32_t port); + ~ServerHandler() = default; + void attach(NetObserver*); + + //! @brief It will start the accepting loop + bool establish(); + bool close(); + + private: + NetObserver* node; + uint32_t port; +}; + +} +} /* eclipse */ diff --git a/src/network/simple_router.cc b/src/network/simple_router.cc new file mode 100644 index 0000000..37795d6 --- /dev/null +++ b/src/network/simple_router.cc @@ -0,0 +1,13 @@ +#include "simple_router.hh" +#include "../common/context_singleton.hh" + +using namespace eclipse; +using namespace eclipse::messages; +using namespace std; + +// on_read {{{ +void SimpleRouter::on_read (Message* m, Channel* s) { + string type = m->get_type(); + ERROR("I could not find a handler for the message type: %s", type.c_str()); +} +// }}} diff --git a/src/network/simple_router.hh b/src/network/simple_router.hh new file mode 100644 index 0000000..6fe4608 --- /dev/null +++ b/src/network/simple_router.hh @@ -0,0 +1,12 @@ +#pragma once +#include "router.hh" + +namespace eclipse { + +// +class SimpleRouter: public Router { + public: + void on_read(messages::Message*, Channel*) override; +}; + +} /* eclipse */ diff --git a/src/nodes/block.hh b/src/nodes/block.hh new file mode 100644 index 0000000..f41ee7b --- /dev/null +++ b/src/nodes/block.hh @@ -0,0 +1,4 @@ +#pragma once +#include + +typedef std::pair Block; diff --git a/src/nodes/blockmetadata.hh b/src/nodes/blockmetadata.hh new file mode 100644 index 0000000..ad00ead --- /dev/null +++ b/src/nodes/blockmetadata.hh @@ -0,0 +1,21 @@ +#pragma once +#include +#include + +namespace eclipse { + +struct BlockMetadata { + std::string name; + std::string file_name; + unsigned int seq; + uint32_t hash_key; + uint32_t size; + unsigned int type; + int replica; + std::string node; + std::string l_node; + std::string r_node; + unsigned int is_committed; +}; + +} diff --git a/src/nodes/directory.cc b/src/nodes/directory.cc deleted file mode 100644 index 1641af7..0000000 --- a/src/nodes/directory.cc +++ /dev/null @@ -1,402 +0,0 @@ -#include "directory.hh" -using namespace std; - -namespace eclipse { - Directory::Directory() { - path = context.settings.get("path.metadata") + "/metadata.db"; - zErrMsg = 0; - } - - Directory::~Directory() { - } - - void Directory::open_db() { - mutex.lock(); - rc = sqlite3_open(path.c_str(), &db); - if (rc) { - context.logger->error("Can't open database: %s\n", sqlite3_errmsg(db)); - } else { - context.logger->debug("Opened database successfully\n"); - } - mutex.unlock(); - } - - int Directory::file_callback(void *file_info, int argc, char **argv, char **azColName) { - if(argc > 0) { - int i = 0; - auto file = reinterpret_cast(file_info); - file->name = argv[i++]; - file->hash_key = atoi(argv[i++]); - file->size = atoll(argv[i++]); - file->num_block = atoi(argv[i++]); - file->type = atoi(argv[i++]); - file->replica = atoi(argv[i]); - file->is_valid = true; - } - return 0; - } - - int Directory::block_callback(void *block_info, int argc, char **argv, char **azColName) { - if(argc > 0) { - int i = 0; - auto block = reinterpret_cast(block_info); - block->name = argv[i++]; - block->file_name = argv[i++]; - block->seq = atoi(argv[i++]); - block->hash_key = atoi(argv[i++]); - block->size = atoi(argv[i++]); - block->type = atoi(argv[i++]); - block->replica = atoi(argv[i++]); - block->node = argv[i++]; - block->l_node = argv[i] ? argv[i] : "NULL"; - i++; - block->r_node = argv[i] ? argv[i] : "NULL"; - i++; - block->is_committed = argv[i] ? atoi(argv[i]) : 0; - block->is_valid = true; - } - return 0; - } - - int Directory::display_callback(void *NotUsed, int argc, char **argv, char **azColName) { - for (int i=0; iinfo("%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL"); - printf("\n"); - return 0; - } - - int Directory::file_list_callback(void *list, int argc, char **argv, char **azColName) { - auto file_list = reinterpret_cast*>(list); - for (int i=0; ipush_back(tmp_file); - } - return 0; - } - - int Directory::block_list_callback(void *list, int argc, char **argv, char **azColName) { - auto block_list = reinterpret_cast*>(list); - for (int i=0; ipush_back(tmp_block); - } - return 0; - } - - int Directory::exist_callback(void *result, int argc, char **argv, char **azColName) { - *reinterpret_cast(result) = argv[0] ? true : false; - return 0; - } - - void Directory::init_db() { - open_db(); - mutex.lock(); - sprintf(sql, "CREATE TABLE file_table( \ - name TEXT NOT NULL, \ - hash_key INT NOT NULL, \ - size INT NOT NULL, \ - num_block INT NOT NULL, \ - type INT NOT NULL, \ - replica INT NOT NULL, \ - PRIMARY KEY (name));"); - - // Execute SQL statement - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if(rc != SQLITE_OK) - { - if (rc != SQLITE_ERROR) - context.logger->error("SQL error: %s\n", zErrMsg); - - sqlite3_free(zErrMsg); - } else { - context.logger->info("file_table created successfully\n"); - } - - sprintf(sql, "CREATE TABLE block_table( \ - name TEXT NOT NULL, \ - file_name TEXT NOT NULL, \ - seq INT NOT NULL, \ - hash_key INT NOT NULL, \ - size INT NOT NULL, \ - type INT NOT NULL, \ - replica INT NOT NULL, \ - node TEXT NOT NULL, \ - l_node TEXT , \ - r_node TEXT , \ - is_committed INT , \ - PRIMARY KEY (name)); \ - CREATE INDEX index_file_name_on_block_table \ - ON block_table(file_name); \ - CREATE INDEX index_file_name_seq_on_block_table \ - ON block_table(file_name, seq);"); - - // Execute SQL statement - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if(rc != SQLITE_OK) - { - if (rc != SQLITE_ERROR) - context.logger->error("SQL error: %s\n", zErrMsg); - - sqlite3_free(zErrMsg); - } else { - context.logger->info("block_table created successfully\n"); - } - sqlite3_close(db); - mutex.unlock(); - } - - void Directory::insert_file_metadata(FileInfo &file_info) { - open_db(); - sprintf(sql, "INSERT INTO file_table (\ - name, hash_key, size, num_block, type, replica)\ - VALUES('%s', %" PRIu32 ", %" PRIu64 ", %u, %u, %u);", - file_info.name.c_str(), - file_info.hash_key, - file_info.size, - file_info.num_block, - file_info.type, - file_info.replica); - - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("file_metadata inserted successfully\n"); - } - - sqlite3_close(db); - } - - void Directory::insert_block_metadata(BlockInfo* block_info) { - open_db(); - sprintf(sql, "INSERT INTO block_table (\ - name, file_name, seq, hash_key, size, type, replica, node, l_node, r_node, is_committed)\ - VALUES ('%s', '%s', %u, %" PRIu32 ", %" PRIu32 ", %u, %u, '%s', '%s', '%s', %u);", - block_info->name.c_str(), - block_info->file_name.c_str(), - block_info->seq, - block_info->hash_key, - block_info->size, - block_info->type, - block_info->replica, - block_info->node.c_str(), - block_info->l_node.c_str(), - block_info->r_node.c_str(), - block_info->is_committed); - - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("block_metadata inserted successfully\n"); - } - sqlite3_close(db); - } - - void Directory::select_file_metadata(string name, FileInfo *file_info) { - open_db(); - sprintf(sql, "SELECT * from file_table where name='%s';", name.c_str()); - rc = sqlite3_exec(db, sql, file_callback, (void*)file_info, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("file_metadata selected successfully\n"); - } - sqlite3_close(db); - } - - void Directory::select_block_metadata(string file_name, unsigned int block_seq, BlockInfo *block_info) { - open_db(); - sprintf(sql, "SELECT * from block_table where (file_name='%s') and \ - (seq=%u);", file_name.c_str(), block_seq); - rc = sqlite3_exec(db, sql, block_callback, (void*)block_info, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("block_metadata selected successfully\n"); - } - sqlite3_close(db); - } - - void Directory::select_all_file_metadata(vector &file_list) { - open_db(); - mutex.lock(); - sprintf(sql, "SELECT * from file_table;"); - rc = sqlite3_exec(db, sql, file_list_callback, (void*)&file_list, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("file_metadata selected successfully\n"); - } - - sqlite3_close(db); - mutex.unlock(); - } - - void Directory::select_all_block_metadata(vector &block_info) { - open_db(); - mutex.lock(); - sprintf(sql, "SELECT * from block_table;"); - rc = sqlite3_exec(db, sql, block_list_callback, (void*)&block_info, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("block_metadata selected successfully\n"); - } - - mutex.unlock(); - sqlite3_close(db); - } - - void Directory::update_file_metadata(FileUpdate &file_update) { - open_db(); - mutex.lock(); - sprintf(sql, "UPDATE file_table set \ - size=%" PRIu64 ", num_block=%u where name='%s';", - file_update.size, - file_update.num_block, - file_update.name.c_str()); - - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("file_metadata updated successfully\n"); - } - sqlite3_close(db); - mutex.unlock(); - } - - void Directory::update_block_metadata(BlockUpdate &block_update) { - open_db(); - sprintf(sql, "UPDATE block_table set \ - size=%" PRIu32 " where (file_name='%s') and (seq=%u);", - block_update.size, - block_update.file_name.c_str(), - block_update.seq); - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("block_metadata updated successfully\n"); - } - sqlite3_close(db); - } - - void Directory::delete_file_metadata(string name) { - open_db(); - sprintf(sql, "DELETE from file_table where name='%s';", name.c_str()); - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("file_metadata deleted successfully\n"); - } - sqlite3_close(db); - } - - void Directory::delete_block_metadata(string file_name, unsigned int seq) - { - open_db(); - mutex.lock(); - sprintf(sql, "DELETE from block_table where (file_name='%s') and (seq=%u);", file_name.c_str(), seq); - rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("block_metadata deleted successfully\n"); - } - sqlite3_close(db); - mutex.unlock(); - } - - void Directory::display_file_metadata() - { - open_db(); - sprintf(sql, "SELECT * from file_table"); - rc = sqlite3_exec(db, sql, display_callback, 0, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("file_metadata displayed successfully\n"); - } - sqlite3_close(db); - } - - void Directory::display_block_metadata() { - open_db(); - mutex.lock(); - sprintf(sql, "SELECT * from block_table"); - rc = sqlite3_exec(db, sql, display_callback, 0, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("block_metadata displayed successfully\n"); - } - sqlite3_close(db); - mutex.unlock(); - } - - bool Directory::file_exist(string name) { - bool result = false; - open_db(); - mutex.lock(); - sprintf(sql, "SELECT name from file_table where name='%s';", name.c_str()); - rc = sqlite3_exec(db, sql, exist_callback, &result, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("file_exist executed successfully\n"); - } - sqlite3_close(db); - mutex.unlock(); - return result; - } - - void Directory::select_last_block_metadata(string file_name, BlockInfo *block_info) { - open_db(); - mutex.lock(); - sprintf(sql, "SELECT * FROM block_table WHERE (file_name='%s') ORDER BY seq DESC LIMIT 1;", file_name.c_str()); - rc = sqlite3_exec(db, sql, block_callback, (void*)block_info, &zErrMsg); - if (rc != SQLITE_OK) { - context.logger->error("SQL error: %s\n", zErrMsg); - sqlite3_free(zErrMsg); - } else { - context.logger->info("the last block_metadata selected successfully\n"); - } - sqlite3_close(db); - mutex.unlock(); - } -} diff --git a/src/nodes/directory.hh b/src/nodes/directory.hh deleted file mode 100644 index d627d4d..0000000 --- a/src/nodes/directory.hh +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "../common/context_singleton.hh" -#include "../messages/blockinfo.hh" -#include "../messages/blockupdate.hh" -#include "../messages/fileinfo.hh" -#include "../messages/fileupdate.hh" - -namespace eclipse { - using namespace messages; - class Directory { - protected: - std::mutex mutex; - sqlite3 *db; - char *zErrMsg; - std::string path; - int rc; - char sql[512]; - static int file_callback(void *file_info, int argc, char **argv, char **azColName); - static int block_callback(void *block_info, int argc, char **argv, char **azColName); - static int display_callback(void *NotUsed, int argc, char **argv, char **azColName); - static int file_list_callback(void *list, int argc, char **argv, char **azColName); - static int block_list_callback(void *list, int argc, char **argv, char **azColName); - static int exist_callback(void *result, int argc, char **argv, char **azColName); - - public: - Directory(); - ~Directory(); - void open_db(); - void init_db(); - void insert_file_metadata(FileInfo &file_info); - void insert_block_metadata(BlockInfo* block_info); - void select_file_metadata(std::string file_name, FileInfo *file_info); - void select_block_metadata(std::string file_name, unsigned int block_seq, BlockInfo *block_info); - void select_all_file_metadata(std::vector &file_list); - void select_all_block_metadata(std::vector &block_list); - void update_file_metadata(FileUpdate &file_update); - void update_block_metadata(BlockUpdate &block_update); - void delete_file_metadata(std::string file_name); - void delete_block_metadata(std::string file_name, unsigned int block_seq); - void display_file_metadata(); - void display_block_metadata(); - bool file_exist(std::string file_name); - void select_last_block_metadata(std::string file_name, BlockInfo *block_info); - }; -} diff --git a/src/nodes/node.hh b/src/nodes/node.hh index 715d1a1..699f6dd 100644 --- a/src/nodes/node.hh +++ b/src/nodes/node.hh @@ -3,7 +3,7 @@ #include "machine.hh" #include "../common/context_singleton.hh" #include "../messages/message.hh" -#include "../network/network.hh" +#include "../network/client_handler.hh" #include #include @@ -19,7 +19,7 @@ class Node: public Machine { std::string get_ip () const override; protected: - network::Network* network; + network::ClientHandler* network; Logger* logger; std::string ip_of_this; int port; diff --git a/src/nodes/peerdfs.cc b/src/nodes/peerdfs.cc deleted file mode 100644 index d33aeae..0000000 --- a/src/nodes/peerdfs.cc +++ /dev/null @@ -1,392 +0,0 @@ -// includes & usings {{{ -#include "peerdfs.hh" -#include "../messages/factory.hh" -#include "../messages/boost_impl.hh" - -#include -#include -#include -#include - -using namespace eclipse; -using namespace eclipse::messages; -using namespace eclipse::network; -using namespace boost::asio; -using namespace std; - -// }}} - -namespace eclipse { -// Constructor & destructor {{{ -PeerDFS::PeerDFS (Network* net) : Node () { - network = net; - net->attach(this); - - network_size = context.settings.get("network.nodes").size(); - boundaries.reset( new Histogram {network_size, 0}); - boundaries->initialize(); - - directory.open_db(); -} - -PeerDFS::~PeerDFS() { } -// }}} -// insert {{{ -void PeerDFS::insert(uint32_t hash_key, std::string name, std::string& v) { - int which_node = boundaries->get_index(hash_key); - - if (which_node == id) { - INFO("[DFS] Saving locally KEY: %s", name.c_str()); - local_io.write(name, v); - - } else { - INFO("[DFS] Forwaring KEY: %s -> %d", name.c_str(), which_node); - KeyValue kv (hash_key, name, v); - network->send(which_node, &kv); - } -} -// }}} -// update {{{ -void PeerDFS::update(uint32_t hash_key, std::string name, std::string v, uint32_t p, uint32_t l) { - int which_node = boundaries->get_index(hash_key); - - if (which_node == id) { - INFO("[DFS] Updating locally KEY: %s", name.c_str()); - local_io.update(name, v, p, l); - - } else { - INFO("[DFS] Forwaring KEY: %s -> %d", name.c_str(), which_node); - OffsetKeyValue okv (hash_key, name, v, p, l); - network->send(which_node, &okv); - } -} -// }}} -// request {{{ -void PeerDFS::request(uint32_t key, string name , req_func f) { - int idx = boundaries->get_index(key); - - if (idx != id) { - KeyRequest k_req (name); - k_req.set_origin (id); - network->send (idx, &k_req); - requested_blocks.insert ({name, f}); - - } else { - string value = local_io.read(name); - f(name, value); - } -} -// }}} -// close {{{ -void PeerDFS::close() { exit(EXIT_SUCCESS); } -// }}} -// process (KeyValue* m) {{{ -template<> void PeerDFS::process(KeyValue* m) { - auto key = m->key; - auto name = m->name; - - int which_node = boundaries->get_index(key); - if (which_node == id or m->destination == id) { - INFO("Inserting key = %s", name.c_str()); - insert(key, m->name, m->value); - } - - if (requested_blocks.find(name) != requested_blocks.end()) { - INFO("Executing func"); - requested_blocks[name](name, m->value); - requested_blocks.erase(name); - } -} -// }}} -// process (OffsetKeyValue* m) {{{ -template<> void PeerDFS::process(OffsetKeyValue* m) { - auto key = m->key; - auto name = m->name; - - int which_node = boundaries->get_index(key); - if (which_node == id or m->destination == id) { - INFO("Update key = %s", name.c_str()); - update(key, m->name, m->value, m->pos, m->len); - } -} -// }}} -// process (KeyRequest* m) {{{ -template<> void PeerDFS::process (KeyRequest* m) { - INFO("Arrived req key = %s", m->key.c_str()); - string value = local_io.read(m->key); - - KeyValue kv (0, m->key, value); - kv.destination = m->origin; - network->send(m->origin, &kv); -} -// }}} -// process (Control* m) {{{ -template<> void PeerDFS::process(Control* m) { - switch (m->type) { - case messages::SHUTDOWN: - this->close(); - break; - - case messages::RESTART: - break; - } -} -// }}} -// process (MetaData* m) {{{ -template<> void PeerDFS::process(MetaData* m) { - std::string file_name = m->node + "_replica"; - local_io.write(file_name, m->content); -} -// }}} -// process (BlockInfo* m) {{{ -template<> void PeerDFS::process(BlockInfo* m) { - int which_node = h(m->file_name) % network_size; - - if (which_node == id) { - insert_block(m); - - } else { - local_io.write(m->name, m->content); - logger->info("ideal host = %s", m->node.c_str()); - logger->info("real host = %d", id); - } -} -// }}} -// process (BlockUpdate* m) {{{ -template<> void PeerDFS::process(BlockUpdate* m) { - local_io.update(m->name, m->content, m->pos, m->len); - logger->info("block update real host = %d", id); -} -// }}} -// process (BlockDel* m) {{{ -template<> void PeerDFS::process (BlockDel* m) { - local_io.remove(m->name); -} -// }}} -// process (FileInfo* m) {{{ -template<> void PeerDFS::process (FileInfo* m) { - int which_node = m->hash_key % network_size; - - if (which_node != id){ - network->send(which_node, m); - - } else { - insert_file(m); - } -} -// }}} -// on_read (Message*) {{{ -void PeerDFS::on_read (Message* m, int) { - string type = m->get_type(); - if (type == "KeyValue") { - auto m_ = dynamic_cast(m); - process(m_); - } else if (type == "OffsetKeyValue") { - auto m_ = dynamic_cast(m); - process(m_); - } else if (type == "Control") { - auto m_ = dynamic_cast(m); - process(m_); - } else if (type == "KeyRequest") { - auto m_ = dynamic_cast(m); - process(m_); - } else if (type == "BlockInfo") { - auto m_ = dynamic_cast(m); - process(m_); - } else if (type == "BlockUpdate") { - auto m_ = dynamic_cast(m); - process(m_); - } else if (type == "BlockDel") { - auto m_ = dynamic_cast(m); - process(m_); - } else if (type == "FileInfo") { - auto m_ = dynamic_cast(m); - process(m_); - } else if (type == "MetaData") { - auto m_ = dynamic_cast(m); - process(m_); - } -} -// }}} -// on_connect {{{ -void PeerDFS::on_connect () { - INFO("Network established id=%d", id); -} -// }}} -// on_disconnect {{{ -void PeerDFS::on_disconnect(int id) { -} -// }}} -// insert_file {{{ -bool PeerDFS::insert_file(messages::FileInfo* f) { - bool ret = directory.file_exist(f->name.c_str()); - - if (ret) { - INFO("File:%s exists in db, ret = %i", f->name.c_str(), ret); - return false; - } - - directory.insert_file_metadata(*f); - replicate_metadata(); - - logger->info("Saving to SQLite db"); - return true; -} -// }}} -// update_file {{{ -bool PeerDFS::update_file(messages::FileUpdate* f) { - bool ret = directory.file_exist(f->name.c_str()); - - if (ret) { - directory.update_file_metadata(*f); - logger->info("Updating to SQLite db"); - return true; - } - INFO("File:%s doesn't exist in db, ret = %i", f->name.c_str(), ret); - return false; - -} -// }}} -// insert_block {{{ -bool PeerDFS::insert_block(messages::BlockInfo* m) { - directory.insert_block_metadata(m); - int which_node = boundaries->get_index(m->hash_key); - vector nodes; - - if (which_node == id) { - for (int i=1; ireplica; i++) { - if(i%2 == 1) { - nodes.push_back ((which_node + (i+1)/2 + network_size) % network_size); - } else { - nodes.push_back ((which_node - i/2 + network_size) % network_size); - } - } - - INFO("[DFS] Saving locally KEY: %s", m->name.c_str()); - local_io.write(m->name, m->content); - network->send_and_replicate(nodes, m); - - } else { - insert(m->hash_key, m->name, m->content); - } - replicate_metadata(); - INFO("Block inserted"); - return true; -} -// }}} -// update_block {{{ -bool PeerDFS::update_block(messages::BlockUpdate* m) { - directory.update_block_metadata(*m); - int which_node = boundaries->get_index(m->hash_key); - int tmp_node; - for (int i=0; ireplica; i++) { - if(i%2 == 1) { - tmp_node = (which_node + (i+1)/2 + network_size) % network_size; - } else { - tmp_node = (which_node - i/2 + network_size) % network_size; - } - uint32_t tmp_hash_key = boundaries->random_within_boundaries(tmp_node); - update(tmp_hash_key, m->name, m->content, m->pos, m->len); - } - return true; -} -// }}} -// delete_block {{{ -bool PeerDFS::delete_block(messages::BlockDel* m) { - directory.delete_block_metadata(m->file_name, m->seq); - int which_node = boundaries->get_index(m->hash_key); - int tmp_node; - - for (int i=0; ireplica; i++) { - if (i%2 == 1) { - tmp_node = (which_node + (i+1)/2 + network_size) % network_size; - } else { - tmp_node = (which_node - i/2 + network_size) % network_size; - } - if (id == tmp_node) { - string block_name = m->name; - local_io.remove(block_name); - } else { - network->send(tmp_node, m); - } - } - replicate_metadata(); - return true; -} -// }}} -// delete_file {{{ -bool PeerDFS::delete_file (messages::FileDel* f) { - bool ret = directory.file_exist(f->name.c_str()); - if (!ret) { - INFO("File:%s doesn't exist in db, ret = %i", f->name.c_str(), - ret); - return false; - } - directory.delete_file_metadata(f->name); - replicate_metadata(); - INFO("Removing from SQLite db"); - return true; -} -// }}} -// request_file {{{ -FileDescription PeerDFS::request_file (messages::FileRequest* m) { - string file_name = m->name; - - FileInfo fi; - fi.num_block = 0; - FileDescription fd; - fd.name = file_name; - - directory.select_file_metadata(file_name, &fi); - fd.replica = fi.replica; - fd.size = fi.size; - - int num_blocks = fi.num_block; - for (int i = 0; i< num_blocks; i++) { - BlockInfo bi; - directory.select_block_metadata (file_name, i, &bi); - string block_name = bi.name; - fd.blocks.push_back(block_name); - fd.hash_keys.push_back(bi.hash_key); - fd.block_size.push_back(bi.size); - } - - return fd; -} -// }}} -// list {{{ -bool PeerDFS::list (messages::FileList* m) { - directory.select_all_file_metadata(m->data); - return true; -} -// }}} -// format {{{ -bool PeerDFS::format () { - INFO("Formating DFS"); - local_io.format(); - directory.init_db(); - return true; -} -// }}} -// file_exist {{{ -bool PeerDFS::file_exist (std::string file_name) { - return directory.file_exist(file_name.c_str()); -} -// }}} -// replicate_metadata {{{ -// This function replicates to its right and left neighbor -// node the metadata db. This function is intended to be -// invoked whenever the metadata db is modified. -void PeerDFS::replicate_metadata() { - MetaData md; - md.node = context.settings.getip(); - md.content = local_io.read_metadata(); - - int left_node = ((id - 1) < 0) ? network_size - 1: id - 1; - int right_node = ((id + 1) == network_size) ? 0 : id + 1; - - network->send(left_node, &md); - network->send(right_node, &md); -} -// }}} -} diff --git a/src/nodes/peerdfs.hh b/src/nodes/peerdfs.hh deleted file mode 100644 index 946d3e1..0000000 --- a/src/nodes/peerdfs.hh +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include "node.hh" -#include "local_io.hh" -#include "../network/asyncnode.hh" -#include "../messages/blockinfo.hh" -#include "../messages/blockupdate.hh" -#include "../messages/fileinfo.hh" -#include "../messages/fileupdate.hh" -#include "../messages/keyrequest.hh" -#include "../messages/filerequest.hh" -#include "../messages/filedescription.hh" -#include "../messages/filelist.hh" -#include "../messages/filedel.hh" -#include "../messages/blockdel.hh" -#include "../messages/fileexist.hh" -#include "directory.hh" -#include "../common/histogram.hh" - -#include -#include - -namespace eclipse { - -using vec_str = std::vector; -typedef std::function req_func; - -class PeerDFS: public Node, public AsyncNode { - public: - PeerDFS (network::Network*); - ~PeerDFS (); - - void on_read (messages::Message*, int) override; - void on_connect () override; - void on_disconnect(int) override; - - virtual void insert (uint32_t, std::string, std::string&); - virtual void update (uint32_t, std::string, std::string, uint32_t, uint32_t); - virtual void request (uint32_t, std::string, req_func); - - void close (); - virtual bool insert_block (messages::BlockInfo*); - bool update_block (messages::BlockUpdate*); - virtual bool insert_file (messages::FileInfo*); - bool update_file (messages::FileUpdate*); - bool delete_block (messages::BlockDel*); - bool delete_file (messages::FileDel*); - bool list (messages::FileList*); - virtual bool format (); - FileDescription request_file (messages::FileRequest*); - bool file_exist (std::string); - template void process (T); - - protected: - void replicate_metadata(); - - Directory directory; - Local_io local_io; - std::unique_ptr boundaries; - std::map requested_blocks; - int network_size; - -}; - -} diff --git a/src/nodes/remotedfs.cc b/src/nodes/remotedfs.cc deleted file mode 100644 index e473199..0000000 --- a/src/nodes/remotedfs.cc +++ /dev/null @@ -1,205 +0,0 @@ -#include "remotedfs.hh" -#include -#include - -using namespace std; -using namespace eclipse; -namespace ph = std::placeholders; - -// Constructor {{{ -RemoteDFS::RemoteDFS (PeerDFS* p, network::Network* net) : Router(net) { - peer_dfs = p; - - using namespace std::placeholders; - using std::placeholders::_1; - using std::placeholders::_2; - auto& rt = routing_table; - rt.insert({"BlockInfo", bind(&RemoteDFS::insert_block, this, _1, _2)}); - rt.insert({"BlockUpdate", bind(&RemoteDFS::update_block, this, _1, _2)}); - rt.insert({"FileInfo", bind(&RemoteDFS::insert_file, this, _1, _2)}); - rt.insert({"FileUpdate", bind(&RemoteDFS::update_file, this, _1, _2)}); - rt.insert({"FileRequest", bind(&RemoteDFS::request_file, this, _1, _2)}); - rt.insert({"BlockRequest", bind(&RemoteDFS::request_block, this, _1, _2)}); - rt.insert({"FileList", bind(&RemoteDFS::request_ls, this, _1, _2)}); - rt.insert({"BlockDel", bind(&RemoteDFS::delete_block, this, _1, _2)}); - rt.insert({"FileDel", bind(&RemoteDFS::delete_file, this, _1, _2)}); - rt.insert({"FormatRequest", bind(&RemoteDFS::request_format, this, _1, _2)}); - rt.insert({"FileExist", bind(&RemoteDFS::file_exist, this, _1, _2)}); -} -// }}} -// BlockInfo {{{ -void RemoteDFS::insert_block (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - logger->info ("BlockInfo received"); - bool ret = peer_dfs->insert_block(m); - Reply reply; - - if (ret) { - reply.message = "OK"; - - } else { - reply.message = "FAIL"; - reply.details = "Block already exists"; - } - network->send(n_channel, &reply); -} -// }}} -// BlockUpdate {{{ -void RemoteDFS::update_block (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - logger->info ("BlockUpdate received"); - - bool ret = peer_dfs->update_block(m); - Reply reply; - - if (ret) { - reply.message = "OK"; - - } else { - reply.message = "FAIL"; - reply.details = "Block update failed"; - } - - network->send(n_channel, &reply); -} -// }}} -// delete_block {{{ -void RemoteDFS::delete_block (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - logger->info ("BlockDel received"); - - bool ret = peer_dfs->delete_block(m); - - Reply reply; - - if (ret) { - reply.message = "OK"; - } else { - reply.message = "FAIL"; - reply.details = "Block doesn't exist"; - } - - network->send(n_channel, &reply); -} -// }}} -// FileInfo* {{{ -void RemoteDFS::insert_file (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - logger->info ("FileInfo received"); - - bool ret = peer_dfs->insert_file (m); - Reply reply; - - if (ret) { - reply.message = "OK"; - - } else { - reply.message = "FAIL"; - reply.details = "File already exists"; - } - - network->send(n_channel, &reply); -} -// }}} -// FileUpdate* {{{ -void RemoteDFS::update_file (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - logger->info ("FileUpdate received"); - - bool ret = peer_dfs->update_file (m); - Reply reply; - - if (ret) { - reply.message = "OK"; - - } else { - reply.message = "FAIL"; - reply.details = "File doesn't exist"; - } - - network->send(n_channel, &reply); -} -// }}} -// {{{ FileDel -void RemoteDFS::delete_file (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - logger->info ("FileDel received"); - - bool ret = peer_dfs->delete_file (m); - Reply reply; - - if (ret) { - reply.message = "OK"; - } else { - reply.message = "FAIL"; - reply.details = "File doesn't exist"; - } - - network->send(n_channel, &reply); -} -// }}} -// request_file {{{ -void RemoteDFS::request_file (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - logger->info ("File Info received %s", m->name.c_str()); - - auto fd = peer_dfs->request_file (m); - network->send(n_channel, &fd); -} -// }}} -// request_block {{{ -void RemoteDFS::request_block (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - auto key = m->hash_key; - auto name= m->name; - peer_dfs->request(key, name, std::bind(&RemoteDFS::send_block, this, - ph::_1, ph::_2, n_channel)); -} -// }}} -// request_ls {{{ -void RemoteDFS::request_ls (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - peer_dfs->list(m); - network->send(n_channel, m); -} -// }}} -// send_block {{{ -void RemoteDFS::send_block (std::string k, std::string v, int n_channel) { - logger->info ("Sending Block %s", k.c_str()); - BlockInfo bi; - bi.name = k; - bi.content = v; - - network->send(n_channel, &bi); -} -// }}} -// request_format {{{ -void RemoteDFS::request_format (messages::Message* m_, int n_channel) { - bool ret = peer_dfs->format(); - Reply reply; - - if (ret) { - reply.message = "OK"; - - } else { - reply.message = "FAIL"; - } - - network->send(n_channel, &reply); -} -// }}} -// file_exist {{{ -void RemoteDFS::file_exist (messages::Message* m_, int n_channel) { - auto m = dynamic_cast (m_); - bool ret = peer_dfs->file_exist(m->name); - Reply reply; - - if (ret) { - reply.message = "TRUE"; - - } else { - reply.message = "FALSE"; - } - network->send(n_channel, &reply); -} -// }}} diff --git a/src/nodes/remotedfs.hh b/src/nodes/remotedfs.hh deleted file mode 100644 index 260c1c7..0000000 --- a/src/nodes/remotedfs.hh +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once -#include "peerdfs.hh" -#include "router.hh" -#include "../messages/boost_impl.hh" - -namespace eclipse { - -using boost::system::error_code; -using boost::asio::ip::tcp; - -class RemoteDFS: public Router { - public: - RemoteDFS (PeerDFS*, network::Network*); - ~RemoteDFS () = default; - - void insert_block (messages::Message*, int); - void update_block (messages::Message*, int); - void insert_file (messages::Message*, int); - void update_file (messages::Message*, int); - void request_file (messages::Message*, int); - void request_block (messages::Message*, int); - void request_ls (messages::Message*, int); - void delete_file (messages::Message*, int); - void delete_block (messages::Message*, int); - void send_block (std::string, std::string, int); - void request_format (messages::Message*, int); - void file_exist (messages::Message*, int); - - protected: - PeerDFS* peer_dfs = nullptr; -}; - -} /* eclipse */ diff --git a/src/nodes/router.cc b/src/nodes/router.cc deleted file mode 100644 index 8e0ea2a..0000000 --- a/src/nodes/router.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include "router.hh" -#include "../messages/factory.hh" - -using namespace eclipse; -using namespace eclipse::messages; -using namespace std; - -namespace eclipse { -// Constructor {{{ -Router::Router(network::Network* net) : Node () { - network = net; - net->attach(this); -} - -Router::~Router() { } -// }}} -// on_read {{{ -void Router::on_read (Message* m, int n_channel) { - string type = m->get_type(); - try { - routing_table[type](m, n_channel); - } catch (std::exception& e) { - ERROR("Can not find message type(ROUTER) : %s : E: %s ", type.c_str(), e.what()); - } -} -// }}} -// on_disconnect {{{ -void Router::on_disconnect (int id) { -} -// }}} -// on_connect() {{{ -void Router::on_connect () { - logger->info("Client connected to executor #%d", id); -} -// }}} -} /* eclipse */ diff --git a/src/nodes/router.hh b/src/nodes/router.hh deleted file mode 100644 index ded0c97..0000000 --- a/src/nodes/router.hh +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once -#include "../nodes/node.hh" -#include "../messages/boost_impl.hh" -#include - -namespace eclipse { -using namespace eclipse::network; - -/** - * This class handle every incomming message. - * It inherits from Node and implements AsyncNode - */ -class Router: public Node, public AsyncNode { - public: - Router (network::Network*); - ~Router (); - - void on_connect() override; - void on_disconnect(int) override; - void on_read(messages::Message*, int) override; - - protected: - std::map> routing_table; - std::unique_ptr peer; - int port; -}; - -} /* eclipse */ diff --git a/src/targets/client.cc b/src/targets/client.cc new file mode 100644 index 0000000..93706e9 --- /dev/null +++ b/src/targets/client.cc @@ -0,0 +1,11 @@ +#include "../client/cli_driver.hh" +#include + +using namespace velox; +using namespace std; + +int main(int argc, char** argv) { + cli_driver cli; + cli.parse_args(argc, argv); + return EXIT_SUCCESS; +} diff --git a/src/targets/kmeans.cc b/src/targets/kmeans.cc index 71201e0..8b792ed 100644 --- a/src/targets/kmeans.cc +++ b/src/targets/kmeans.cc @@ -15,7 +15,7 @@ #define INPUT_NAME "kmeans.input" #define OUTPUT_NAME "kmeans.output" #define CENTROID_NAME "kmeans_centroids.data" -#define LOCAL_CENTROID_PATH "/home/deukyeon/EclipseMR/data/kmeans_centroids.data" +#define LOCAL_CENTROID_PATH "/home/vicente/velox_test/kmeans_centroids.data" #define ITERATIONS 5 #define NUM_CLUSTERS 25 @@ -26,7 +26,7 @@ extern "C" { void before_map(std::unordered_map&); void after_map(std::unordered_map&); void mymapper(std::string&, velox::OutputCollection&, std::unordered_map&); - void myreducer(std::string&, std::list&, OutputCollection&); + void myreducer(std::string&, std::vector&, OutputCollection&); } class Point { @@ -129,7 +129,7 @@ void mymapper(std::string& input, velox::OutputCollection& mapper_results, std:: mapper_results.insert(nearest_centroid.to_string(), p.to_string()); } -void myreducer(std::string& key, std::list& values, OutputCollection& output) { +void myreducer(std::string& key, std::vector& values, OutputCollection& output) { if(values.size() == 0) return; double sumX = 0, sumY = 0; @@ -149,6 +149,11 @@ void myreducer(std::string& key, std::list& values, OutputCollectio Point centroid((sumX / count), (sumY / count)); output.insert(centroid.to_string(), value_string); + + std::ofstream os; + os.open(LOCAL_CENTROID_PATH, ios::app); + os << centroid.to_string() << endl; + os.close(); } int main (int argc, char** argv) { @@ -189,25 +194,10 @@ int main (int argc, char** argv) { output_name = "kmeans.output-" + to_string(i); A.map("mymapper"); + std::remove(LOCAL_CENTROID_PATH); A.reduce("myreducer", output_name); - - if(i < ITERATIONS - 1) { - // parse output and make centroid file updated - file output_file = cloud.open(output_name); - std::istringstream stream(output_file.get()); - std::string output_line; - - os.open(LOCAL_CENTROID_PATH); - while(getline(stream, output_line)) { - std::string::size_type pos = output_line.find(':'); - std::string centroid_string = output_line.substr(0, pos) + "\n"; - os.write(centroid_string.c_str(), centroid_string.size()); - } - os.close(); - } } - std::remove(LOCAL_CENTROID_PATH); std::cout << "FINISH k-means clusering" << std::endl; @@ -215,18 +205,16 @@ int main (int argc, char** argv) { std::cout << "========================" << std::endl; std::cout << "Centroids" << std::endl; - file output_file = cloud.open(output_name); - std::istringstream stream(output_file.get()); + ifstream fs; + fs.open(LOCAL_CENTROID_PATH); std::string output_line; - int cnt = 0; - while(getline(stream, output_line)) { - std::string::size_type pos = output_line.find(':'); - std::string centroid_string = output_line.substr(0, pos) + "\n"; - std::cout << centroid_string; + while(getline(fs, output_line)) { + cout << output_line << endl; cnt++; } + std::remove(LOCAL_CENTROID_PATH); std::cout << "Total # of centroids:" << cnt << std::endl; return 0; diff --git a/src/targets/node_main.cc b/src/targets/node_main.cc index 704850b..503a0e7 100644 --- a/src/targets/node_main.cc +++ b/src/targets/node_main.cc @@ -1,23 +1,28 @@ -#include #include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include using namespace eclipse; +using namespace std; int main (int argc, char ** argv) { - int in_port = context.settings.get("network.ports.internal"); - int ex_port = context.settings.get("network.ports.client"); + uint32_t ex_port = GET_INT("network.ports.client"); - auto internal_net = make_unique> (in_port); - PeerDFS peer (internal_net.get()); - internal_net->establish(); + auto internal_net = make_unique (ex_port); + auto external_net = make_unique (ex_port); + + FileLeader file_leader(internal_net.get()); + BlockNode block_node(internal_net.get()); + + // Decorator pattern: I want FileLeader and Block node on the same network + auto router = make_unique(&file_leader, new BlockNodeRouter(&block_node, new SimpleRouter())); + + external_net->attach(router.get()); - auto external_net = make_unique> (ex_port); - RemoteDFS remote (&peer, external_net.get()); external_net->establish(); context.join(); diff --git a/src/targets/node_main_mr.cc b/src/targets/node_main_mr.cc index 78e8370..c47c6e3 100644 --- a/src/targets/node_main_mr.cc +++ b/src/targets/node_main_mr.cc @@ -1,27 +1,91 @@ -#include -#include +#define MALLOC_CHECK_ 3 #include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include +#include +#include +#include +#include using namespace eclipse; int main (int argc, char ** argv) { - int in_port = context.settings.get("network.ports.internal"); - int ex_port = context.settings.get("network.ports.client"); + context.io.notify_fork(boost::asio::io_service::fork_prepare); - auto internal_net = make_unique> (in_port); - PeerMR peer (internal_net.get()); - internal_net->establish(); + pid_t pid = fork(); + if (pid != 0) { + try { + context.io.notify_fork(boost::asio::io_service::fork_parent); + context.run(); - auto external_net = make_unique> (ex_port); - RemoteMR remote (&peer, external_net.get()); - external_net->establish(); + struct rlimit limit; - context.join(); + limit.rlim_cur = 4000; + limit.rlim_max = 4096; + if (setrlimit(RLIMIT_NOFILE, &limit) != 0) { + ERROR("setrlimit() failed with errno=%d\n", errno); + return 1; + } + struct rlimit core_limits; + core_limits.rlim_cur = core_limits.rlim_max = RLIM_INFINITY; + setrlimit(RLIMIT_CORE, &core_limits); + + + sleep(2); + + uint32_t ex_port = GET_INT("network.ports.mapreduce"); + auto internal_net = make_unique (ex_port); + auto external_net = make_unique (ex_port); + + TaskExecutor executor(internal_net.get()); + + auto router = make_unique(&executor, new SimpleRouter()); + + external_net->attach(router.get()); + + external_net->establish(); + + context.join(); + } catch (std::exception& e) { + ERROR("GENERAL exception at %s", e.what()); + } + catch (boost::exception& e) { + INFO("GENERAL exception %s", diagnostic_information(e).c_str()); + } + + + wait(NULL); + + } else { + context.io.notify_fork(boost::asio::io_service::fork_child); + context.run(); + + uint32_t ex_port = GET_INT("network.ports.client"); + + auto internal_net = make_unique (ex_port); + auto external_net = make_unique (ex_port); + + FileLeader file_leader(internal_net.get()); + BlockNode block_node(internal_net.get()); + + // Decorator pattern: I want FileLeader and Block node on the same network + auto router = make_unique(&file_leader, + new BlockNodeRouter(&block_node, new SimpleRouter())); + + external_net->attach(router.get()); + + external_net->establish(); + + context.join(); + } return EXIT_SUCCESS; } diff --git a/src/targets/veloxmr_mapreduce.cc b/src/targets/veloxmr_mapreduce.cc index d25cd95..524ff2b 100644 --- a/src/targets/veloxmr_mapreduce.cc +++ b/src/targets/veloxmr_mapreduce.cc @@ -4,6 +4,7 @@ #include #include #include +#include using namespace velox; using namespace std; diff --git a/tests/Makefile.am b/tests/Makefile.am index 78f6a82..5a6f161 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -2,16 +2,14 @@ AM_CPPFLAGS_T = $(AM_CPPFLAGS) -include unittest++/UnitTest++.h LDADD_T = $(LDADD) -l:libUnitTest++.a -ldl -lsqlite3 # Input your tests files here -check_PROGRAMS = nodes units metadata +check_PROGRAMS = units metadata +check_SCRIPTS = integration -TESTS = units -check_SCRIPTS = tests/nodes_executor.in \ - tests/eclipse_debug.in +TESTS = units check_DATA = $(top_srcdir)/doc/eclipse.json CLEANFILES = eclipse.json - # Input the dependencies of the test files here units_LDADD = $(LDADD_T) units_CPPFLAGS = $(AM_CPPFLAGS_T) @@ -20,34 +18,20 @@ units_SOURCES = $(messages_files) \ tests/messages_test.cc \ tests/dl_test.cc \ src/common/dl_loader.cc \ - src/network/channel.cc \ src/network/asyncchannel.cc \ - src/network/p2p.cc \ - src/network/acceptor.cc \ - src/network/connector.cc \ + src/network/server.cc \ src/nodes/machine.cc \ src/nodes/node.cc -nodes_LDADD = $(LDADD_T) -nodes_CPPFLAGS = $(AM_CPPFLAGS_T) -nodes_SOURCES = tests/nodes.cc \ - $(messages_files) \ - src/network/channel.cc \ - src/network/asyncchannel.cc \ - src/network/p2p.cc \ - src/network/server.cc \ - src/network/acceptor.cc \ - src/network/connector.cc \ - src/nodes/machine.cc \ - src/nodes/peerdfs.cc \ - src/nodes/remotedfs.cc \ - src/nodes/router.cc \ - src/nodes/local_io.cc \ - src/nodes/directory.cc \ - src/nodes/node.cc - metadata_LDADD = $(LDADD_T) metadata_CPPFLAGS = $(AM_CPPFLAGS_T) metadata_SOURCES = tests/metadata_test.cc \ - src/nodes/directory.cc \ + src/fileleader/directory.cc \ $(messages_files) + +check-integration: integration + bash integration.sh + +integration: + cp $(top_srcdir)/tests/integration_test.sh integration.sh + chmod +x integration.sh diff --git a/tests/eclipse_debug.in b/tests/eclipse_debug.in deleted file mode 100644 index 58d323a..0000000 --- a/tests/eclipse_debug.in +++ /dev/null @@ -1,135 +0,0 @@ -#!/bin/env ruby -# vim: ft=ruby : fileencoding=utf-8 : foldmethod=marker : set autoindent - -require 'json' # -require 'optparse' # For parsing the options - -module EclipseDaemon - CONFPATH = "@sysconfdirfull@/eclipse.json" - # find_confpath {{{ - def find_confpath - home = "#{ENV['HOME']}/.eclipse.json" - etc = "/etc/.eclipse.json" - - if File.exists? home - return home - elsif File.exists? etc - return etc - else - return CONFPATH - end - end - # }}} - - class Core - include EclipseDaemon - # Initialize {{{ - def initialize (file, range) - @configfile = File.open(find_confpath) { |f| JSON.parse(f.read) } - @nodelist = @configfile['network']['nodes'] - @nodelist = @nodelist[0..(range - 1)] - @configfile['network']['nodes'] = @nodelist - @verbose = false - @file = file - @range = range - end - - # }}} - # launch {{{ - def launch - @nodelist.each do |node| - cmd = "ssh #{node} 'nohup #{@file} #{JSON.generate(@configfile).dump} /dev/null & exit'" - puts cmd - system cmd - end - end - - #}}} - # debug {{{ - def debug - @nodelist.drop(1).each do |node| - cmd = "ssh #{node} 'nohup #{@file} #{JSON.generate(@configfile).dump} /dev/null & exit'" - puts cmd - system cmd - end - exec "gdb --args #{@file} #{JSON.generate(@configfile).dump}" - end - - #}}} - # debug_at {{{ - def debug_at(index) - i = 0 - @nodelist.each do |node| - if i != index.to_i then - cmd = "ssh #{node} 'nohup #{@file} #{JSON.generate(@configfile).dump} /dev/null & exit'" - puts cmd - system cmd - end - i = i + 1 - end - cmd = "ssh #{@nodelist[index.to_i]} \'gdb --args #{@file} #{JSON.generate(@configfile).dump}\'" - puts cmd - exec cmd - end - #}}} - # unique {{{ - def unique - @nodelist.each do |node| - cmd = "#{@file} #{JSON.generate(@configfile).dump}" - puts cmd - system cmd - end - end - - #}}} - # close {{{ - def close - @nodelist.each do |node| - `ssh #{node} pkill -u #{`whoami`.chomp} #{File.basename(@file)}` - end - end #}}} - # kill {{{ - def kill(input) - @nodelist.each do |node| - cmd = "ssh #{node} \'pkill -u #{`whoami`.chomp} #{input.join}\'" - puts cmd if @verbose - system cmd - end - end #}}} - # pry {{{ - def pry - require 'pry' - binding.pry - end #}}} - end - - class CLI_driver < Core - def initialize file:, range:, input: #{{{ - @options = {} - super(file, range) - OptionParser.new do |opts| - opts.banner = "ecfs (Eclipse FileSystem) is an script to manage the fs\n" + - "Usage: ecfs [options] [FILE]..." - opts.version = 1.0 - opts.program_name = "Eclipse Launcher" - opts.separator "Core actions" - opts.separator " launch Create new Eclipse network" - opts.separator " close Close the network" - opts.separator " status Check the status of the network" - opts.separator " submit Submit application" - opts.separator " kill kill application in each node" - end.parse! input - - case input.shift - when 'launch' then launch - when 'unique' then unique - when 'debug' then debug - when 'debug_at' then debug_at input[0] - when 'close' then close - when 'status' then show - when 'kill' then kill input - else raise "Not action given" - end - end #}}} - end -end diff --git a/tests/executor.cc b/tests/executor.cc deleted file mode 100644 index 894bbd5..0000000 --- a/tests/executor.cc +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include - -using namespace eclipse; - -int main (int argc, char ** argv) { - - string input = argv[1]; - - Context context (input); - context.run (); - - Executor exec (context); - exec.establish (); - - return context.join (); -} diff --git a/tests/integration_test.sh b/tests/integration_test.sh new file mode 100644 index 0000000..4e52440 --- /dev/null +++ b/tests/integration_test.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Simple integration tests to test dfs put | get +# 1. Get generate a file ~20MiB +# 2. we upload it . +# 3. we downloaded and compare to the original file. +# +# We just use velox network with a single node for +# constrictions of the Travis CI plataform. + +trap cleanup SIGHUP SIGINT SIGTERM EXIT # Always call cleanup + +: ${FILE_SIZE:=20M} + +node_pid=0 +root_dir=`pwd` + +die() { echo "$@" 1>&2 ; exit 1; } + +function setup() { + #create scratch folder + [ -d scratch ] || mkdir scratch + [ -d tmp ] || mkdir tmp + + # Setup mandatory variables + export NETWORK_NODES=127.0.0.1 + export PATH_SCRATCH=`readlink -f scratch` + export PATH_IDATA=$PATH_SCRATCH + export PATH_METADATA=$PATH_SCRATCH + export PATH_APPLICATIONS=$PATH_SCRATCH + export NETWORK_PORTS_CLIENT=4333 + export NETWORK_IFACE=lo + + #Generate sample file + dd if=/dev/urandom bs=$FILE_SIZE count=1 | tr -dc 'a-z0-9' | fold -w 255 > sample.txt + + #Make sure I am the only instance + pgrep eclipse_node &> /dev/null && pkill eclipse_node +} + +function cleanup() { + cd $root_dir + rm sample.txt + rm -rf tmp + rm -rf scratch + + kill $node_pid +} + + +setup + +./eclipse_node & +node_pid=$! + +# Let it setup +sleep 1 + +./dfs put sample.txt || die "Put file problem" + +file scratch/sample.txt_0 &>/dev/null || die "DFS did not recieved the file" + +cd tmp +../dfs get sample.txt || die "Get file problem" + +diff -s sample.txt ../sample.txt || die "Files are not identical" diff --git a/tests/metadata_test.cc b/tests/metadata_test.cc index 9a02262..7b88e88 100644 --- a/tests/metadata_test.cc +++ b/tests/metadata_test.cc @@ -1,7 +1,7 @@ #include #include "messages/fileinfo.hh" #include "messages/blockinfo.hh" -#include "nodes/directory.hh" +#include "fileleader/directory.hh" #include "common/context_singleton.hh" using namespace eclipse; @@ -12,7 +12,7 @@ int main() { context.logger->info("==========Start dir test=========="); // Basic metadata io example - dir.init_db(); + dir.create_tables(); FileInfo file_info; BlockInfo block_info; @@ -35,14 +35,13 @@ int main() { block_info.r_node = "1.1.1.2"; block_info.is_committed = 3; - dir.insert_file_metadata(file_info); - dir.insert_block_metadata(&block_info); + dir.file_table_insert(file_info); file_info.name = "test2.txt"; - dir.insert_file_metadata(file_info); + dir.file_table_insert(file_info); file_info.name = "test3.txt"; - dir.insert_file_metadata(file_info); + dir.file_table_insert(file_info); //dir.display_file_metadata(); //dir.display_block_metadata(); diff --git a/tests/nodes.cc b/tests/nodes.cc deleted file mode 100644 index 015762f..0000000 --- a/tests/nodes.cc +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -using namespace eclipse; -using namespace eclipse::network; - -int main (int argc, char ** argv) { - int in_port = context.settings.get("network.ports.internal"); - int ex_port = context.settings.get("network.ports.client"); - - network::Network* internal_net = new network::AsyncNetwork(in_port); - PeerDFS peer (internal_net); - internal_net->establish(); - - network::Network* external_net = new network::AsyncNetwork(ex_port); - RemoteDFS remote (&peer, external_net); - external_net->establish(); - - context.join(); - delete internal_net; - delete external_net; - - return EXIT_SUCCESS; -} diff --git a/tests/nodes_executor.in b/tests/nodes_executor.in deleted file mode 100644 index c5c0608..0000000 --- a/tests/nodes_executor.in +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env ruby -# vim: ft=ruby : fileencoding=utf-8 : foldmethod=marker : set autoindent -load 'tests/eclipse_debug' - -RANGE = 3 -PATH = "#{Dir.pwd}/nodes" - -EclipseDaemon::CLI_driver.new file: PATH, range: RANGE, input: ARGV diff --git a/tests/nodes_test.cc b/tests/nodes_test.cc deleted file mode 100644 index ab503bd..0000000 --- a/tests/nodes_test.cc +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include - -using namespace eclipse; - -SUITE(NODES) { - TEST(BASIC) { - Context context; - RemoteDFS nl; - - sleep (1); - } -} - diff --git a/tests/vdfs.cc b/tests/vdfs.cc new file mode 100644 index 0000000..5cff8fb --- /dev/null +++ b/tests/vdfs.cc @@ -0,0 +1,62 @@ +#include +#include + +#include + +using namespace velox; +using namespace std; + +std::ostream& operator<<(std::ostream& os, const velox::model::metadata& md) { + os << "------------------------------------------" << std::endl; + os << "|name\t\t|hash_key\t|size\t|num_block\t|type\t|replica\t|"<< std::endl; + os << "|" << md.name << "\t|" << md.hash_key << "\t|" << md.size << "\t|" << md.num_block << "\t\t|" << md.type << "\t|" << md.replica << "\t\t|"<< std::endl; + return os; +} + +int main () { + vdfs cloud; + long fd = cloud.open_file("test.txt"); + cout << "test.txt is opened." << endl; + + int write_length = 13; + char input[write_length]; + char abc[3] = { 'a', 'b', 'c' }; + + //for(int i=0; i<3; i++) { + // model::metadata md = cloud.get_metadata(fd); + + // memset(input, abc[i], 3); + // cloud.write(fd, input, md.size, write_length); + //} + + model::metadata md = cloud.get_metadata(fd); + + strncpy(input, abc, 3); + cloud.write(fd, input, md.size, 3); + cloud.append("test.txt", " THIS LINE GOES AT LAST"); + + cout << "write" << endl; + + int read_length = 3; + char str[read_length]; + cloud.read(fd, str, 0, read_length); + + cout << "read: " << str << endl; + + DFS dfs; + cout << "read all: " << dfs.read_all("test.txt") << endl; + + md = cloud.get_metadata(fd); + + cout << md << endl; + + cloud.close(fd); + + cout << "close" << endl; + + cloud.rm("test.txt"); + + + + return 0; +} From e40bd8e6b0e91ce0f4d249899b13d64df44a2649 Mon Sep 17 00:00:00 2001 From: Wonbae Kim Date: Tue, 18 Apr 2017 16:55:41 +0900 Subject: [PATCH 2/8] Iwriter lock (#45) * Modified locking and the termination condition of writing thread. * Modified locking and the termination condition of writing thread. 2 * Modified locking and the termination condition of writing thread. 3 * Modified locking and the termination condition of writing thread. 4 * Modified locking and the termination condition of writing thread. 5 --- src/mapreduce/fs/iwriter.cc | 17 ++++++++++++++--- src/mapreduce/fs/iwriter.h | 3 ++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/mapreduce/fs/iwriter.cc b/src/mapreduce/fs/iwriter.cc index f46b941..ef4cdb4 100644 --- a/src/mapreduce/fs/iwriter.cc +++ b/src/mapreduce/fs/iwriter.cc @@ -43,6 +43,7 @@ IWriter::IWriter() { scratch_path_ = context.settings.get("path.idata"); is_write_start_ = false; is_write_finish_ = false; + copy_phase_done_ = false; index_counter_ = 0; writing_index_ = -1; write_buf_size_ = context.settings.get("mapreduce.write_buf_size"); @@ -86,6 +87,7 @@ void IWriter::finalize() { } } is_write_start_ = true; + copy_phase_done_ = true; writer_thread_->join(); for (uint32_t i = 0; i < reduce_slot_; ++i) { @@ -113,18 +115,27 @@ void IWriter::seek_writable_block() { while(!is_write_finish_) { // Check if there is any block that should be written to disk. // And if it's true, write it onto disk. + std::shared_ptr> writing_block = nullptr; + int reducer_id = -1; mutex.lock(); for (uint32_t i = 0; i < reduce_slot_; ++i) { if (kmv_blocks_[i].size() > 0 && is_write_ready_[i].back()) { - auto writing_block = kmv_blocks_[i].back(); + writing_block = kmv_blocks_[i].back(); kmv_blocks_[i].pop_back(); is_write_ready_[i].pop_back(); - write_block(writing_block, i); + reducer_id = i; + break; } } + mutex.unlock(); + + if (writing_block != nullptr) { + write_block(writing_block, reducer_id); + } // Check if there are no more incoming key value pairs. - if(is_write_start_) { + mutex.lock(); + if(copy_phase_done_) { uint32_t finish_counter = 0; for (uint32_t i = 0; i < reduce_slot_; ++i) { if(kmv_blocks_[i].size() == 0) { diff --git a/src/mapreduce/fs/iwriter.h b/src/mapreduce/fs/iwriter.h index d4b7d41..126b81a 100644 --- a/src/mapreduce/fs/iwriter.h +++ b/src/mapreduce/fs/iwriter.h @@ -12,7 +12,7 @@ #include #include #include "iwriter_interface.hh" -#include "../../common/context.hh" +#include "../../common/context_singleton.hh" #include "../../messages/message.hh" #include "../../messages/reply.hh" #include "../fs/directorymr.hh" @@ -65,6 +65,7 @@ class IWriter: public IWriter_interface { string scratch_path_; bool is_write_start_; bool is_write_finish_; + bool copy_phase_done_; uint32_t index_counter_; uint32_t writing_index_; uint32_t write_buf_size_; From 34fd287c3e1a83c97be7ce79355212ba032c9a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicente=20Adolfo=20Bolea=20S=C3=A1nchez?= Date: Wed, 26 Apr 2017 11:40:13 +0900 Subject: [PATCH 3/8] Fixed several memory leaks - Added dl_loader.close() - Memory fragmentation happens --- Makefile.am | 2 +- data/kmeans.input | 1000 -------------------- data/point_generator.ruby | 9 - src/client/dfs.cc | 2 +- src/common/context.cc | 5 +- src/common/dl_loader.cc | 2 + src/mapreduce/executor.cc | 190 ++-- src/mapreduce/fs/iwriter.cc | 33 +- src/mapreduce/fs/iwriter.h | 2 + src/mapreduce/messages/key_value_shuffle.h | 4 +- src/mapreduce/output_collection.cc | 56 +- src/mapreduce/output_collection.hh | 14 +- src/mapreduce/task_executor.cc | 61 +- src/mapreduce/task_executor.hh | 6 +- src/mapreduce/task_executor_router.cc | 5 + src/messages/factory.cc | 16 + src/messages/factory.hh | 1 + src/network/asyncchannel.cc | 68 +- src/network/asyncchannel.hh | 3 + src/network/client_handler.cc | 162 ++-- src/network/client_handler.hh | 15 +- src/targets/node_main_mr.cc | 55 +- 22 files changed, 405 insertions(+), 1306 deletions(-) delete mode 100644 data/kmeans.input delete mode 100644 data/point_generator.ruby diff --git a/Makefile.am b/Makefile.am index 7f34827..8f1a462 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,6 +1,6 @@ include tests/Makefile.am -AM_CPPFLAGS = -I@srcdir@/src/common -I@srcdir@/src -include ./config.h $(BOOST_CPPFLAGS) +AM_CPPFLAGS = $(CPPFLAGS) -I@srcdir@/src/common -I@srcdir@/src -include ./config.h $(BOOST_CPPFLAGS) AM_CXXFLAGS = $(CXXFLAGS) -Wall bin_PROGRAMS = eclipse_node dfs __velox_mapreduce diff --git a/data/kmeans.input b/data/kmeans.input deleted file mode 100644 index dafa99b..0000000 --- a/data/kmeans.input +++ /dev/null @@ -1,1000 +0,0 @@ -55.69-19.73 -60.03-68.02 -61.68-72.47 -80.35-11.54 -25.94-68.35 -78.83-5.62 -21.05-97.78 -77.69-70.89 -56.69-77.61 -61.39-5.05 -73.23-73.51 -82.8-46.1 -89.34-3.38 -83.12-72.01 -53.48-85.41 -55.56-82.78 -34.24-77.72 -21.21-29.1 -33.69-91.7 -35.56-23.93 -68.96-43.77 -83.96-9.19 -84.91-50.65 -12.53-21.59 -58.99-38.17 -14.44-12.6 -1.98-93.02 -0.49-37.08 -85.29-0.99 -68.49-40.37 -95.8-41.25 -60.77-61.68 -21.34-30.55 -29.86-60.9 -47.0-56.73 -36.38-44.35 -28.95-74.29 -86.79-98.99 -32.03-68.61 -67.72-92.02 -12.16-20.88 -8.23-91.03 -75.52-35.31 -28.13-66.84 -25.54-61.68 -26.07-66.95 -10.77-99.63 -23.87-10.57 -25.0-25.73 -1.58-79.13 -68.62-91.85 -16.03-21.26 -60.86-96.36 -22.2-43.31 -25.2-60.69 -28.13-26.18 -53.9-53.36 -28.18-67.34 -45.85-60.35 -38.68-42.23 -31.47-94.18 -16.21-77.22 -57.58-26.66 -3.1-77.37 -34.75-96.18 -59.15-10.77 -0.88-10.49 -56.25-19.78 -53.59-5.43 -44.47-90.12 -29.57-5.77 -58.57-93.19 -38.4-51.09 -46.25-67.74 -73.63-61.72 -43.66-56.42 -62.48-55.37 -10.47-57.27 -53.68-24.58 -83.92-27.1 -55.71-99.51 -91.12-1.29 -41.0-43.16 -52.79-90.68 -9.58-15.9 -53.13-67.38 -56.02-15.3 -77.08-98.53 -7.38-56.98 -22.39-83.29 -24.54-0.29 -29.77-45.55 -18.82-41.47 -91.65-50.23 -94.59-50.82 -72.83-46.5 -38.28-9.42 -51.34-20.47 -91.41-32.54 -85.39-13.46 -66.77-0.4 -67.23-41.39 -28.55-3.22 -81.2-80.39 -82.39-78.95 -24.66-42.9 -11.72-69.98 -46.2-50.62 -53.63-74.84 -68.34-63.38 -96.53-21.17 -54.64-72.95 -44.67-64.59 -67.6-36.08 -13.8-25.77 -27.46-94.57 -4.75-70.72 -7.93-26.01 -83.63-11.9 -58.31-50.07 -35.08-46.67 -6.2-35.13 -43.02-95.74 -9.88-26.99 -47.66-17.51 -25.94-72.05 -57.78-67.85 -1.64-87.62 -74.97-48.88 -81.92-54.18 -33.01-81.83 -32.24-1.66 -1.83-61.28 -47.05-95.65 -81.28-74.34 -96.67-91.35 -91.03-9.68 -47.01-67.27 -60.67-79.9 -25.07-31.16 -16.19-48.43 -33.48-67.28 -47.0-29.19 -34.79-60.95 -56.34-56.47 -84.07-2.49 -67.43-91.42 -21.29-11.18 -29.2-80.38 -53.14-19.65 -82.48-81.77 -24.79-83.05 -7.96-82.36 -73.83-90.97 -26.55-41.9 -6.73-50.97 -23.86-57.21 -84.92-10.16 -30.94-55.19 -88.78-55.56 -75.67-8.48 -23.1-34.74 -21.0-7.99 -30.04-84.36 -13.61-80.52 -80.9-70.77 -64.23-86.62 -78.13-43.22 -92.74-95.45 -98.15-12.63 -37.05-8.44 -86.12-62.59 -85.58-47.02 -3.95-69.21 -5.97-12.56 -31.57-74.18 -66.0-82.43 -18.22-30.54 -48.88-89.46 -58.5-4.05 -47.06-53.23 -83.79-27.36 -76.86-12.47 -54.26-48.22 -3.87-51.59 -91.54-2.78 -34.57-40.21 -63.6-67.65 -46.2-63.53 -78.48-46.36 -43.3-30.65 -52.35-60.19 -95.26-70.55 -99.57-80.53 -78.66-65.41 -52.27-37.51 -37.69-19.19 -16.63-19.06 -32.26-22.36 -6.29-21.74 -51.67-39.77 -61.42-57.07 -6.13-8.0 -92.68-53.29 -99.72-74.95 -41.07-12.51 -45.53-87.26 -58.84-84.03 -49.48-87.04 -53.48-1.97 -77.83-32.32 -4.16-93.4 -71.59-15.22 -27.33-92.0 -56.31-64.62 -81.26-58.22 -26.23-29.12 -18.98-39.19 -24.25-51.06 -83.27-40.56 -31.62-91.16 -30.13-61.82 -10.86-67.53 -5.02-53.84 -69.5-95.87 -68.03-51.87 -29.8-16.01 -96.54-34.02 -98.46-60.48 -8.74-50.72 -54.7-63.03 -20.0-19.08 -83.72-7.39 -13.19-44.84 -14.93-41.45 -45.45-45.26 -89.79-5.54 -43.05-94.17 -39.99-12.07 -27.36-80.87 -60.39-28.78 -6.56-9.94 -87.34-7.36 -80.98-90.57 -72.41-51.06 -47.6-12.54 -93.97-69.98 -16.16-22.67 -53.91-87.78 -62.3-97.73 -91.29-16.86 -83.02-74.83 -32.28-48.33 -88.15-86.89 -70.27-94.26 -39.73-81.35 -98.47-61.52 -36.39-92.82 -58.26-49.62 -95.66-89.84 -18.02-8.44 -71.47-71.03 -7.37-28.06 -73.67-10.43 -63.32-13.34 -36.57-27.31 -81.2-27.27 -52.65-51.75 -9.03-53.81 -9.21-4.47 -23.87-45.2 -54.05-59.6 -71.79-6.7 -78.73-0.63 -95.68-31.08 -26.82-67.95 -13.19-87.53 -13.92-27.98 -30.03-68.96 -60.76-39.94 -96.15-65.93 -53.66-71.39 -93.84-11.32 -5.89-95.62 -64.07-7.51 -8.72-66.2 -96.48-30.32 -37.01-13.0 -69.42-76.53 -91.93-96.58 -4.63-69.87 -2.53-86.18 -45.92-27.09 -62.75-54.35 -45.65-27.77 -93.05-59.29 -41.98-38.95 -22.14-87.83 -12.57-87.05 -6.64-89.59 -3.05-12.57 -28.8-23.73 -25.15-48.38 -24.64-74.51 -62.28-63.92 -66.16-20.81 -69.29-21.39 -44.07-63.8 -74.64-2.73 -20.04-72.42 -93.38-30.54 -41.64-43.57 -35.53-58.83 -10.89-1.52 -30.38-5.75 -82.22-46.8 -82.97-50.48 -34.17-63.77 -30.84-29.66 -81.21-24.23 -10.67-55.46 -54.65-34.76 -36.38-51.93 -13.12-45.21 -19.51-25.72 -42.78-87.76 -63.46-52.53 -29.71-2.85 -77.22-86.05 -80.17-25.35 -94.25-58.93 -45.85-71.03 -51.82-63.33 -52.91-80.13 -59.0-21.75 -75.75-26.89 -92.5-34.74 -24.47-98.57 -88.4-49.92 -26.03-44.53 -94.65-26.98 -67.27-56.71 -21.62-47.97 -26.69-90.1 -99.43-86.1 -81.43-14.29 -2.8-39.8 -26.98-61.14 -92.84-72.97 -8.39-72.76 -47.74-11.34 -99.84-75.14 -1.23-71.92 -45.41-14.48 -50.19-25.64 -49.28-64.76 -77.78-69.7 -13.1-84.91 -78.2-8.15 -97.61-25.38 -65.4-71.48 -70.54-25.15 -52.45-51.85 -0.71-9.33 -73.33-29.51 -25.22-49.44 -96.74-23.71 -8.44-12.86 -16.26-55.34 -77.28-16.92 -45.08-64.97 -5.62-56.7 -40.09-58.76 -57.6-51.16 -19.23-44.72 -25.29-73.94 -34.6-31.15 -66.35-76.88 -64.13-19.81 -2.72-48.1 -33.31-78.96 -37.19-83.78 -20.37-73.44 -58.34-59.13 -6.52-73.56 -62.98-12.54 -77.05-77.65 -58.1-91.98 -53.04-82.28 -26.15-7.57 -40.19-96.74 -73.37-66.13 -97.18-19.68 -71.57-91.14 -54.49-65.58 -32.31-70.15 -91.32-40.01 -44.31-34.2 -44.88-51.26 -28.52-85.02 -7.98-18.74 -61.42-56.45 -11.95-15.53 -6.67-75.33 -10.36-95.43 -43.09-34.77 -95.16-3.92 -51.43-12.82 -12.58-21.55 -83.23-4.07 -10.96-52.57 -38.48-97.81 -99.88-23.64 -16.94-59.32 -68.01-87.39 -28.93-5.5 -22.65-5.43 -29.06-60.3 -67.91-5.81 -74.64-12.65 -41.59-81.5 -54.62-92.06 -80.41-44.58 -31.59-98.08 -72.2-92.59 -99.04-82.89 -34.78-5.82 -56.82-18.92 -34.87-11.06 -42.22-94.98 -60.65-21.07 -49.54-93.25 -66.99-35.02 -96.03-3.45 -79.41-69.51 -51.58-60.15 -87.29-50.95 -5.84-57.42 -89.23-79.26 -50.33-11.23 -86.18-29.51 -0.71-48.08 -89.83-35.77 -72.1-78.08 -47.67-85.12 -95.52-35.54 -97.66-64.96 -57.19-87.12 -83.86-11.4 -38.97-28.2 -88.48-48.05 -55.8-37.2 -67.81-33.7 -92.62-35.03 -83.63-3.32 -37.29-33.23 -50.57-24.12 -90.94-35.15 -55.71-56.76 -98.41-87.95 -30.36-92.88 -61.17-47.36 -42.35-77.4 -72.22-72.49 -52.44-46.18 -69.29-63.94 -68.84-16.83 -5.2-44.3 -28.15-58.33 -35.0-52.62 -9.23-63.41 -19.06-62.66 -8.98-46.24 -80.92-86.72 -86.69-35.35 -65.91-54.76 -83.08-67.59 -12.97-88.8 -78.11-49.21 -53.18-56.22 -85.89-90.65 -57.64-33.32 -43.03-88.8 -72.89-42.31 -96.19-92.21 -29.99-91.49 -22.85-54.61 -42.86-93.63 -98.26-23.52 -17.89-31.34 -93.98-92.37 -8.83-78.19 -76.01-38.23 -24.68-91.93 -87.46-98.43 -88.59-89.32 -81.54-3.46 -34.36-44.63 -37.01-21.91 -43.05-91.16 -11.29-23.41 -3.92-31.22 -28.6-83.27 -64.51-39.54 -88.22-33.44 -88.62-1.88 -97.04-0.48 -82.09-20.68 -24.43-45.12 -83.56-98.54 -11.53-95.16 -72.29-75.17 -38.35-93.78 -3.63-79.33 -82.53-35.56 -65.58-58.38 -30.96-31.89 -67.53-41.89 -1.52-43.82 -50.62-91.15 -25.09-92.28 -94.11-36.44 -15.76-99.23 -62.01-35.05 -26.93-52.75 -11.51-18.0 -23.71-26.81 -17.3-26.49 -39.08-41.44 -33.62-48.11 -77.24-89.25 -6.45-38.92 -85.83-1.87 -28.22-18.81 -30.39-89.55 -37.54-46.18 -72.32-71.09 -95.47-46.4 -82.72-76.17 -35.18-58.17 -35.31-68.65 -37.64-55.29 -23.71-54.56 -30.62-67.35 -59.3-46.95 -63.18-90.61 -76.91-89.02 -92.5-51.24 -48.0-54.64 -15.63-67.39 -90.57-43.81 -37.83-83.01 -63.66-17.88 -58.51-43.69 -29.16-74.36 -98.54-87.15 -96.6-87.26 -54.25-38.64 -11.65-0.43 -48.26-43.78 -6.76-44.89 -35.42-79.81 -68.8-15.09 -84.74-72.37 -1.12-93.23 -35.33-86.62 -15.09-24.23 -25.52-54.96 -97.63-23.68 -17.96-11.92 -45.36-80.73 -76.74-81.69 -92.76-83.95 -51.36-65.16 -95.49-61.3 -92.78-11.81 -73.02-59.49 -26.33-13.36 -1.21-55.46 -72.4-54.02 -5.31-93.8 -9.75-16.51 -37.45-23.43 -67.87-57.8 -19.67-93.9 -32.04-20.63 -21.75-98.69 -44.49-28.05 -37.16-1.11 -72.48-9.87 -92.31-38.07 -26.19-28.89 -57.75-83.09 -43.81-64.72 -91.59-58.73 -92.94-65.24 -93.71-39.32 -74.29-58.5 -55.1-73.01 -51.85-1.5 -69.51-23.7 -26.64-21.22 -24.92-30.78 -44.32-75.45 -19.73-77.12 -54.1-92.33 -68.48-10.78 -68.39-37.75 -68.79-82.5 -31.97-21.96 -87.76-94.24 -39.07-86.69 -95.93-30.4 -75.26-29.22 -39.36-66.15 -77.84-84.5 -60.31-26.64 -17.72-43.21 -38.87-83.21 -77.8-46.21 -61.21-98.98 -4.76-7.45 -82.48-62.23 -17.86-89.73 -92.4-39.5 -88.25-16.79 -3.83-50.46 -67.1-57.97 -97.37-70.42 -39.52-86.2 -55.84-21.46 -0.27-29.52 -79.52-52.17 -94.49-60.21 -86.91-84.22 -18.75-81.92 -57.62-34.47 -87.41-37.53 -93.49-0.25 -26.78-45.66 -97.16-0.27 -96.63-71.95 -39.66-32.35 -71.9-46.77 -93.0-8.84 -75.01-58.46 -10.22-99.49 -71.31-16.53 -31.99-40.04 -51.19-15.95 -88.04-99.98 -3.82-72.53 -24.74-44.89 -52.89-99.2 -56.19-37.74 -20.48-70.19 -16.56-74.66 -7.88-29.6 -84.66-37.13 -30.15-74.92 -22.63-38.42 -44.28-94.1 -16.67-72.36 -21.87-79.84 -86.24-40.59 -53.68-56.44 -58.98-39.28 -82.61-63.77 -44.8-9.04 -33.42-59.24 -89.58-30.76 -30.06-93.31 -68.05-24.26 -87.42-59.72 -94.1-57.33 -83.59-25.29 -6.24-93.9 -48.52-87.63 -1.38-34.99 -0.01-4.09 -94.07-13.85 -41.41-20.29 -92.01-50.64 -69.61-41.46 -91.51-95.89 -27.37-37.26 -7.13-77.23 -99.91-87.79 -44.95-6.65 -78.46-64.02 -59.76-19.68 -87.49-48.89 -51.83-74.51 -6.38-81.62 -88.7-52.17 -60.12-57.98 -71.43-51.05 -26.96-12.31 -100.0-66.53 -91.77-33.26 -79.76-67.9 -24.74-64.79 -42.86-60.59 -39.94-54.5 -54.14-18.12 -36.0-46.75 -93.09-56.05 -0.92-86.63 -91.54-60.78 -89.72-79.23 -44.79-49.18 -95.86-79.71 -60.54-34.16 -58.15-52.54 -42.18-98.49 -13.82-74.19 -65.31-23.1 -89.76-58.57 -24.35-19.51 -10.11-8.55 -44.85-8.03 -39.86-58.89 -25.58-31.41 -51.69-65.52 -80.91-69.96 -65.38-8.46 -49.15-96.46 -21.38-46.82 -34.7-25.8 -64.62-7.09 -88.72-4.11 -68.52-13.02 -31.27-33.99 -57.61-31.57 -84.76-75.65 -13.51-16.43 -60.0-23.35 -18.88-66.67 -98.89-61.4 -27.14-45.74 -59.59-80.75 -78.49-61.74 -3.57-82.5 -18.35-90.46 -41.31-83.95 -57.58-36.94 -72.89-96.65 -82.51-20.47 -58.52-52.31 -1.93-18.59 -1.9-86.77 -60.46-35.97 -45.52-43.73 -94.03-99.57 -43.03-79.32 -16.85-19.86 -74.3-60.26 -72.81-19.12 -4.99-39.41 -16.01-53.42 -90.7-22.22 -14.51-70.05 -73.84-21.07 -36.46-83.42 -75.7-52.83 -47.56-57.73 -1.04-47.44 -43.11-21.96 -6.5-96.89 -77.98-73.31 -20.01-90.97 -27.35-23.71 -20.33-63.8 -14.47-87.91 -43.38-30.46 -13.95-84.72 -20.0-40.11 -69.31-45.18 -10.07-53.19 -54.27-33.34 -15.77-59.53 -62.88-52.58 -76.93-58.47 -30.52-78.81 -20.1-51.86 -88.87-35.14 -73.92-62.96 -61.04-58.32 -84.98-58.02 -20.42-29.64 -39.55-14.8 -68.28-19.58 -97.23-79.62 -46.29-57.63 -12.43-11.02 -18.59-70.17 -94.32-58.63 -82.77-69.46 -9.68-46.8 -39.87-54.58 -73.37-68.55 -88.87-13.34 -17.64-63.83 -45.92-25.25 -45.64-84.88 -62.04-34.9 -93.79-35.53 -78.9-76.74 -82.13-93.71 -26.21-42.4 -41.41-52.91 -99.44-71.7 -32.32-25.34 -71.25-1.15 -16.83-70.73 -38.05-75.14 -48.93-68.08 -52.47-96.67 -70.36-8.87 -58.66-77.62 -70.21-38.38 -19.7-17.56 -0.73-33.87 -32.74-92.84 -13.98-88.24 -96.77-19.2 -39.73-24.26 -20.4-90.91 -89.44-1.29 -77.85-96.69 -40.41-49.5 -41.25-7.58 -72.89-61.08 -42.17-76.08 -34.37-55.35 -68.57-3.98 -53.5-51.06 -69.68-33.38 -63.31-83.99 -92.28-4.05 -40.89-97.32 -98.86-91.4 -34.11-92.9 -40.41-4.47 -7.99-90.22 -62.48-0.12 -14.99-0.64 -96.41-24.71 -39.82-31.99 -61.67-29.58 -4.21-65.56 -86.49-28.77 -88.71-0.06 -89.58-76.09 -65.05-62.63 -18.21-75.76 -16.73-87.36 -28.39-80.46 -94.12-35.99 -6.14-60.03 -77.75-63.74 -51.25-76.62 -51.76-25.65 -88.4-18.06 -6.36-1.6 -21.04-20.22 -89.89-16.93 -59.17-10.53 -99.04-49.52 -25.17-93.75 -67.35-35.1 -55.85-51.26 -69.18-21.84 -87.82-8.42 -83.46-80.31 -25.45-38.57 -61.68-52.2 -58.53-35.61 -46.23-66.82 -71.12-54.66 -24.28-97.66 -3.41-52.82 -26.45-72.61 -0.96-73.29 -60.91-63.49 -18.57-14.54 -15.36-22.29 -59.19-79.62 -39.89-81.13 -35.35-1.56 -39.57-85.85 -34.99-5.78 -58.89-33.21 -91.14-94.32 -19.48-62.29 -19.46-80.08 -43.2-49.18 -65.05-69.5 -37.32-29.29 -51.67-92.07 -45.23-52.85 -91.0-13.83 -82.27-27.41 -28.84-62.33 -43.01-5.84 -70.88-84.23 -50.59-49.19 -21.32-59.53 -10.74-34.73 -44.69-55.72 -10.1-3.41 -82.79-23.9 -29.26-86.16 -35.3-61.19 -40.05-75.88 -29.58-88.54 -73.57-76.2 -54.98-50.45 -53.37-17.33 -26.99-32.0 -33.99-42.91 -90.76-29.35 -81.19-48.88 -67.54-14.7 -10.9-57.04 -25.2-22.35 -6.41-79.15 -67.01-57.97 -48.21-58.96 -66.74-91.48 -49.19-49.78 -70.66-74.27 -39.22-67.67 -26.02-60.99 -70.94-97.96 -35.9-57.09 -13.58-69.41 -32.19-94.94 -99.79-94.64 -8.9-53.94 -57.95-21.7 -64.07-72.75 -10.13-86.57 -41.89-95.94 -28.79-35.06 -7.45-6.4 -2.83-4.75 -92.95-99.19 -63.92-47.82 -77.41-89.58 -69.53-34.04 -25.51-28.36 -2.12-50.47 -4.67-22.16 -47.38-36.5 -41.99-61.07 -5.65-0.13 -41.81-32.52 -38.4-83.39 -82.65-58.53 -32.99-63.05 -89.55-38.04 -34.79-26.22 -90.83-56.76 -17.52-46.09 -56.84-11.88 -20.55-0.15 -19.47-81.68 -68.06-87.09 -73.6-49.1 -86.04-35.54 -88.52-17.22 -52.41-74.96 -58.74-5.47 -51.84-52.01 -13.84-72.09 -17.35-87.4 -81.08-78.37 -99.15-96.61 -79.7-32.0 -43.82-37.89 -70.86-6.6 -16.47-10.34 -18.07-16.99 -20.21-44.44 -70.94-0.2 -81.26-54.9 -9.77-52.15 -93.72-0.97 -95.47-38.08 -10.0-49.57 -7.5-17.81 -74.55-73.51 -61.95-76.37 -16.22-59.44 -73.32-49.54 -95.28-52.53 -12.19-46.13 -41.6-11.41 -40.81-86.18 diff --git a/data/point_generator.ruby b/data/point_generator.ruby deleted file mode 100644 index ac4971c..0000000 --- a/data/point_generator.ruby +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/ruby - -i = 0 - -while (i < ARGV[0].to_i) do - puts "#{Random.rand(0.0...100.0).round(2)},#{Random.rand(0.00...100.00).round(2)}" - - i += 1 -end diff --git a/src/client/dfs.cc b/src/client/dfs.cc index c49bb4b..df11c90 100644 --- a/src/client/dfs.cc +++ b/src/client/dfs.cc @@ -95,7 +95,7 @@ unique_ptr get_file_description static bool file_exists_local(std::string filename) { ifstream ifile(filename); - return ifile; + return ifile.good(); } // }}} // Constructors and misc {{{ diff --git a/src/common/context.cc b/src/common/context.cc index b7ade6c..694a026 100644 --- a/src/common/context.cc +++ b/src/common/context.cc @@ -48,8 +48,9 @@ void Context::run (){ int concurrency = settings.get ("cache.concurrency"); for (int i = 0; i < concurrency; i++ ) { auto t = new std::thread ( [this] { - try { - this->io.run(); + try { + this->io.run(); + } catch (exception& e) { logger->error("iosvc exception %s", e.what()); } catch (boost::exception& e) { diff --git a/src/common/dl_loader.cc b/src/common/dl_loader.cc index 9858401..9e0f320 100644 --- a/src/common/dl_loader.cc +++ b/src/common/dl_loader.cc @@ -8,6 +8,8 @@ using namespace std; DL_loader::DL_loader(string lib): lib_name(lib){ } DL_loader::~DL_loader() { + if(!lib) + close(); } // }}} // init_executor {{{ diff --git a/src/mapreduce/executor.cc b/src/mapreduce/executor.cc index e377a5e..3c34892 100644 --- a/src/mapreduce/executor.cc +++ b/src/mapreduce/executor.cc @@ -4,7 +4,7 @@ #include "../common/hash.hh" #include "../mapreduce/output_collection.hh" #include "../mapreduce/fs/ireader.h" -#include "../messages/keyvalue.hh" +#include "messages/key_value_shuffle.h" #include "../client/dfs.hh" #include @@ -12,10 +12,12 @@ #include #include #include +#include #include #include #include #include + #define MAP_MAX_LINE 10000 using namespace eclipse; @@ -28,10 +30,14 @@ Executor::~Executor() { } // }}} // run_map {{{ bool Executor::run_map (messages::Task* m) { + std::unordered_map options; + vector keys_blocks; + queue threads; + mutex mut; auto path_lib = GET_STR("path.applications"); auto network_size = GET_VEC_STR("network.nodes").size(); - auto mappers = GET_INT("mapreduce.mappers"); + size_t mappers = GET_INT("mapreduce.mappers"); path_lib += ("/" + m->library); DL_loader loader (path_lib); @@ -42,118 +48,134 @@ bool Executor::run_map (messages::Task* m) { } before_map_t _before_map_ = loader.load_function_before_map("before_map"); + after_map_t _after_map_ = loader.load_function_after_map("after_map"); + mapper_t _map_ = loader.load_function(m->func_name); - vector keys_blocks; + if(_before_map_ != nullptr) + _before_map_(options); - vector threads; - mutex mut; - INFO("LAunching mapper with %i threads", mappers); - for (int reducer_id = 0; reducer_id < m->blocks.size(); reducer_id++) { + INFO("LAunching mapper with %i threads", m->blocks.size()); + try { + for (size_t map_id = 0; map_id < m->blocks.size(); map_id++) { // Make sure we only execute 'mappers' threads at that time if (threads.size() >= mappers) { threads.front().join(); - threads.erase(threads.begin(), threads.begin()+1); + threads.pop(); } - threads.emplace_back(std::thread([&] (int id) { - std::mt19937 rng; - rng.seed(std::random_device()()); - std::uniform_int_distribution dist(0, INT_MAX); - - std::unordered_map options; - if(_before_map_ != nullptr) - _before_map_(options); - - mapper_t _map_ = loader.load_function(m->func_name); + threads.emplace([&, this] (size_t id) { + try { - const string block_name = m->blocks[id].second; - INFO("Executing map on block: %s", block_name.c_str()); - Local_io local_io; - string input = local_io.read(block_name); - stringstream ss (input); + { + auto* kv_blocks = new map(); - char next_line[MAP_MAX_LINE]; - velox::OutputCollection results; + { + const string block_name = m->blocks[id].second; + Local_io local_io; + string input = local_io.read(block_name); + istringstream ss (std::move(input)); - while (!ss.eof()) { - bzero(next_line, MAP_MAX_LINE); - ss.getline (next_line, MAP_MAX_LINE); - if (strnlen(next_line, MAP_MAX_LINE) == 0) - continue; + velox::OutputCollection results; + char* next_line = new char[MAP_MAX_LINE]; - std::string line(next_line); - _map_ (line, results, options); - } - - map kv_blocks; - try { + while (!ss.eof()) { + bzero(next_line, MAP_MAX_LINE); + ss.getline (next_line, MAP_MAX_LINE); + if (strnlen(next_line, MAP_MAX_LINE) == 0) + continue; - auto run_block = [&mut, &m, &kv_blocks, &keys_blocks, network_size, &dist, &rng](std::string key, std::vector* value) mutable { - int node = h(key) % network_size; - auto it = kv_blocks.find(node); - if (it == kv_blocks.end()) { - it = kv_blocks.insert({node, {}}).first; - - it->second.node_id = node; - it->second.job_id_ = m->job_id; - it->second.map_id_ = 0; - - uint32_t random_id = dist(rng); - mut.lock(); - keys_blocks.push_back(node); - mut.unlock(); - it->second.kv_id = random_id; + std::string line(next_line); + _map_ (line, results, options); } + delete[] next_line; + + for (auto& pair : results) { + auto& key = pair.first; + auto& value = pair.second; + uint32_t node = h(key) % network_size; + auto it = kv_blocks->find(node); + if (it == kv_blocks->end()) { + it = kv_blocks->insert({node, {}}).first; + + it->second.node_id = node; + it->second.job_id_ = m->job_id; + it->second.map_id_ = 0; + it->second.origin_id = context.id; + } + + it->second.kv_pairs.insert({key, std::move(value)}); + } + } - it->second.kv_pairs.insert({key, std::move(*value)}); - }; - results.travel(run_block); + mut.lock(); + for (auto it: *kv_blocks) + keys_blocks.push_back(it.first); + mut.unlock(); vector shuffled_array; - for (int i = 0; i < network_size; i++) - shuffled_array.push_back(i); + { + for (int i = 0; i < network_size; i++) + shuffled_array.push_back(i); - auto engine = std::default_random_engine{}; - std::shuffle(shuffled_array.begin(), shuffled_array.end(), engine); - - - for (auto& index: shuffled_array) { mut.lock(); - auto it = kv_blocks.find(index); - if (it != kv_blocks.end()) { - peer->insert_key_value(&(it->second)); - } + unsigned seed1 = std::chrono::system_clock::now().time_since_epoch().count(); + auto engine = std::default_random_engine{seed1}; + std::shuffle(shuffled_array.begin(), shuffled_array.end(), engine); mut.unlock(); } + for (auto& index : shuffled_array) { + auto it = kv_blocks->find(index); + if (it != kv_blocks->end()) { + KeyValueShuffle* kv = &(it->second); + peer->insert_key_value(kv); + } + } - after_map_t _after_map_ = loader.load_function_after_map("after_map"); - if(_after_map_ != nullptr) - _after_map_(options); - INFO("MAP thread finishing"); + //auto it = kv_blocks->begin(); + //while (it != kv_blocks->end()) { + // peer->insert_key_value(it->second.get()); + // ++it; + //} - } catch (exception& e) { - INFO("Mapper exception %s", e.what()); - } catch (boost::exception& e) { - INFO("Mapper exception %s", diagnostic_information(e).c_str()); +// kv_blocks->clear(); + delete kv_blocks; } + INFO("Finishing map threads"); + } catch (exception& e) { + ERROR("Mapper exception %s", e.what()); + } catch (boost::exception& e) { + ERROR("Mapper exception %s", diagnostic_information(e).c_str()); + } - }, reducer_id)); + }, map_id); } - try { - for (auto& thread : threads) - thread.join(); + while (!threads.empty()) { + threads.front().join(); + threads.pop(); + } + + + if(_after_map_ != nullptr) { + INFO("CALLING AFTER MAP"); + _after_map_(options); + } + + loader.close(); peer->notify_map_is_finished(m->job_id, keys_blocks); + keys_blocks.clear(); + } catch (exception& e) { - INFO("Mapper parent exception %s", e.what()); - } catch (boost::exception& e) { - INFO("Mapper parent exception %s", diagnostic_information(e).c_str()); - } + ERROR("Mapper parent exception %s", e.what()); + } catch (boost::exception& e) { + ERROR("Mapper parent exception %s", diagnostic_information(e).c_str()); + } + return true; } // }}} @@ -186,7 +208,7 @@ bool Executor::run_reduce (messages::Task* task) { mutex mut; DEBUG("LAunching reducer with %i threads", reducer_slot); for (int reducer_id = 0; reducer_id < reducer_slot; reducer_id++) { - threads.push_back(std::thread([&] (int id) { + threads.push_back(std::thread([&, this] (int id) { DEBUG("%i %i", task->job_id, id); IReader ireader; @@ -232,9 +254,9 @@ bool Executor::run_reduce (messages::Task* task) { std::string current_block_content = ""; - auto make_block_content = [&] (std::string key, std::vector* values) mutable { + auto make_block_content = [&] (std::string key, std::vector values) mutable { - for(std::string& value : *values) { + for(std::string& value : values) { current_block_content += key + ": " + value + "\n"; num_keys++; } diff --git a/src/mapreduce/fs/iwriter.cc b/src/mapreduce/fs/iwriter.cc index ef4cdb4..3af75d0 100644 --- a/src/mapreduce/fs/iwriter.cc +++ b/src/mapreduce/fs/iwriter.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "../../common/context_singleton.hh" #include "../../messages/message.hh" @@ -43,7 +44,6 @@ IWriter::IWriter() { scratch_path_ = context.settings.get("path.idata"); is_write_start_ = false; is_write_finish_ = false; - copy_phase_done_ = false; index_counter_ = 0; writing_index_ = -1; write_buf_size_ = context.settings.get("mapreduce.write_buf_size"); @@ -52,6 +52,8 @@ IWriter::IWriter() { write_pos_ = write_buf_; kmv_blocks_.resize(reduce_slot_); is_write_ready_.resize(reduce_slot_); + mutex_end_thread.lock(); + mutex_start_thread.lock(); for (uint32_t i = 0; i < reduce_slot_; ++i) { block_size_.emplace_back(0); write_count_.emplace_back(0); @@ -87,7 +89,8 @@ void IWriter::finalize() { } } is_write_start_ = true; - copy_phase_done_ = true; + mutex_start_thread.unlock(); + mutex_end_thread.unlock(); writer_thread_->join(); for (uint32_t i = 0; i < reduce_slot_; ++i) { @@ -106,36 +109,32 @@ void IWriter::finalize() { } void IWriter::run(IWriter *obj) { - obj->seek_writable_block(); + try { + obj->seek_writable_block(); + } catch (std::exception& e) { + ERROR("exeception in iwriter worker thread %s", e.what()); + } } void IWriter::seek_writable_block() { // while loops should be changed to lock + mutex_start_thread.lock(); while(!is_write_start_); while(!is_write_finish_) { // Check if there is any block that should be written to disk. // And if it's true, write it onto disk. - std::shared_ptr> writing_block = nullptr; - int reducer_id = -1; mutex.lock(); for (uint32_t i = 0; i < reduce_slot_; ++i) { if (kmv_blocks_[i].size() > 0 && is_write_ready_[i].back()) { - writing_block = kmv_blocks_[i].back(); + auto writing_block = kmv_blocks_[i].back(); kmv_blocks_[i].pop_back(); is_write_ready_[i].pop_back(); - reducer_id = i; - break; + write_block(writing_block, i); } } - mutex.unlock(); - - if (writing_block != nullptr) { - write_block(writing_block, reducer_id); - } // Check if there are no more incoming key value pairs. - mutex.lock(); - if(copy_phase_done_) { + if(is_write_start_) { uint32_t finish_counter = 0; for (uint32_t i = 0; i < reduce_slot_; ++i) { if(kmv_blocks_[i].size() == 0) { @@ -148,6 +147,8 @@ void IWriter::seek_writable_block() { } mutex.unlock(); } + + mutex_end_thread.lock(); } void IWriter::add_key_value(const string &key, const string &value) { deb.insert(key); @@ -172,6 +173,7 @@ void IWriter::add_key_value(const string &key, const string &value) { if (new_size > iblock_size_) { is_write_ready_[index].front() = true; is_write_start_ = true; + mutex_start_thread.unlock(); } mutex.unlock(); } @@ -243,6 +245,7 @@ void IWriter::write_block(std::shared_ptr> block, } i++; } + block.reset(); flush_buffer(); file_.close(); messages::IBlockInsert iblock_insert; diff --git a/src/mapreduce/fs/iwriter.h b/src/mapreduce/fs/iwriter.h index 126b81a..6073c02 100644 --- a/src/mapreduce/fs/iwriter.h +++ b/src/mapreduce/fs/iwriter.h @@ -79,6 +79,8 @@ class IWriter: public IWriter_interface { unordered_map key_index_; // index of key std::ofstream file_; std::mutex mutex; + std::mutex mutex_start_thread; + std::mutex mutex_end_thread; std::set deb; }; diff --git a/src/mapreduce/messages/key_value_shuffle.h b/src/mapreduce/messages/key_value_shuffle.h index 59c6371..4661be8 100644 --- a/src/mapreduce/messages/key_value_shuffle.h +++ b/src/mapreduce/messages/key_value_shuffle.h @@ -17,8 +17,8 @@ struct KeyValueShuffle: public Message { std::map> kv_pairs; uint32_t number_of_keys = 0; uint32_t kv_id = 0; - int node_id = 0; - int origin_id = 0; + uint32_t node_id = 0; + uint32_t origin_id = 0; }; diff --git a/src/mapreduce/output_collection.cc b/src/mapreduce/output_collection.cc index fb1d615..b31dbc9 100644 --- a/src/mapreduce/output_collection.cc +++ b/src/mapreduce/output_collection.cc @@ -1,57 +1,39 @@ #include "output_collection.hh" #include +#include -namespace velox { -OutputCollection::OutputCollection() { - collection_ = nullptr; -} +using std::make_shared; -OutputCollection::~OutputCollection() { - if(collection_ != nullptr) { - for(auto iter = collection_->begin(); iter != collection_->end(); ++iter) - delete iter->second; +namespace velox { - delete collection_; - } -} +OutputCollection::OutputCollection() { } +OutputCollection::~OutputCollection() { } bool OutputCollection::insert(std::string key, std::string value) { - this->check_or_alloc_collection(); - auto collection_item = collection_->find(key); + auto collection_item = collection_.find(key); + + if (collection_item == collection_.end()) { + collection_item = collection_.insert({key, {}}).first; + collection_item->second.reserve(2048); + } - if(collection_item != collection_->end()) - (reinterpret_cast(collection_item->second))->push_back(value); - else - collection_->insert(std::pair(key, new std::vector(1, value))); + collection_item->second.push_back(value); return true; }; -void OutputCollection::check_or_alloc_collection() { - if(collection_ == nullptr) - collection_ = new std::map(); -} - -auto OutputCollection::begin() { - this->check_or_alloc_collection(); - return collection_->begin(); -} +void OutputCollection::check_or_alloc_collection() { } -auto OutputCollection::end() { - this->check_or_alloc_collection(); - return collection_->end(); -} +std::map::iterator OutputCollection::begin() { return collection_.begin(); } +std::map::iterator OutputCollection::end() { return collection_.end(); } -void OutputCollection::travel(std::function*)> run_block_with_kv) { - for(auto key_values = this->begin(); key_values != this->end(); ++key_values) { +void OutputCollection::travel( + std::function run_block_with_kv) { + for(auto key_values = begin(); key_values != end(); ++key_values) { run_block_with_kv(key_values->first, key_values->second); } } -void OutputCollection::print_all() { -// travel([](std::string k, std::string v) { -// std::cout << "<" << k << ", " << v << ">" << std::endl; -// }); -} +void OutputCollection::print_all() { } } diff --git a/src/mapreduce/output_collection.hh b/src/mapreduce/output_collection.hh index ea63756..535879a 100644 --- a/src/mapreduce/output_collection.hh +++ b/src/mapreduce/output_collection.hh @@ -2,10 +2,14 @@ #include #include +#include #include #include namespace velox { + + using key_t = std::string; + using value_t = std::vector; class OutputCollection { public: OutputCollection(); @@ -14,19 +18,17 @@ namespace velox { /* TODO: arguments for any types */ bool insert(std::string, std::string); - auto begin(); - auto end(); + std::map::iterator begin(); + std::map::iterator end(); /* TODO: arguments for any types */ - void travel(std::function*)>); + void travel(std::function)>); void print_all(); private: - using key_t = std::string; - using value_t = std::vector*; - std::map* collection_; + std::map collection_; void check_or_alloc_collection(); }; diff --git a/src/mapreduce/task_executor.cc b/src/mapreduce/task_executor.cc index c91201d..d21fa37 100644 --- a/src/mapreduce/task_executor.cc +++ b/src/mapreduce/task_executor.cc @@ -4,6 +4,11 @@ #include "../common/histogram.hh" #include "messages/idatalist.hh" #include "messages/finish_shuffle.h" +#include "messages/key_value_shuffle.h" +#include "messages/nodes_shuffling.hh" +#include "messages/taskstatus.hh" +#include "messages/job.hh" +#include "messages/task.hh" #include "executor.hh" #include "py_executor.hh" #include "fs/iwriter.h" @@ -23,8 +28,6 @@ using namespace std; using namespace velox; -mutex local_mut; -mutex local_mut2; namespace eclipse { // Constructors {{{ @@ -43,6 +46,7 @@ void TaskExecutor::job_accept(messages::Job* m, std::function fn) { jobs_callback[m->job_id] = fn; tasks_remaining[m->job_id] = 0; + INFO("JOB recieved"); if (m->type == "MAP") { std::map>>map_nodes; @@ -157,19 +161,21 @@ void TaskExecutor::task_accept_status(TaskStatus* m) { // key_value_store {{{ void TaskExecutor::key_value_store(KeyValueShuffle *kv) { INFO("KVshuffle KV_ID=%lu, ID=%i, DST=%i", kv->kv_id, id, kv->node_id); - + if (kv->node_id == id) { std::thread([&, this] (KeyValueShuffle kv) { - write_key_value(&kv); + try { + write_key_value(&kv); + } catch (exception& e) { + ERROR("Error in key_value_store routine ex:%s", e.what()); + } NodesShuffling fs; fs.job_id = kv.job_id_; fs.id = id; - local_mut2.lock(); network->send(kv.origin_id, &fs); - local_mut2.unlock(); - }, *kv).detach(); + }, std::move(*kv)).detach(); } else { DEBUG("Forwarding KVS to another node"); @@ -195,9 +201,11 @@ void TaskExecutor::write_key_value(messages::KeyValueShuffle *kv_shuffle) { const std::string& key = pair.first; auto& values = pair.second; + local_mut2.lock(); for (auto& v : values) { iwriter->add_key_value(key, v); } + local_mut2.unlock(); } } // }}} @@ -216,11 +224,14 @@ void TaskExecutor::write_key_value(messages::KeyValueShuffle *kv_shuffle) { void TaskExecutor::request_local_map (messages::Task* task) { if (task->lang == "C++") { - std::thread([&](Task task) { + Task stask = *task; + + std::thread([&, this](Task task) { Executor exec(this); exec.run_map(&task); - INFO("MAP has finished"); - }, *task).detach(); + }, stask).detach(); + + sleep(1); } else if (task->lang == "Python") { PYexecutor exec(this); @@ -244,7 +255,7 @@ void TaskExecutor::notify_task_leader(int leader, uint32_t job_id, string type) } // }}} // notify_map_is_finished {{{ -void TaskExecutor::notify_map_is_finished(uint32_t job_id, +void TaskExecutor::notify_map_is_finished(uint32_t job_id, std::vector nodes) { FinishMap ts; @@ -261,10 +272,10 @@ void TaskExecutor::notify_map_is_finished(uint32_t job_id, // }}} // insert_key_value {{{ void TaskExecutor::insert_key_value(KeyValueShuffle *kv) { - kv->origin_id = id; local_mut.lock(); tasker_remaining_nodes_shuffling.insert(kv->node_id); local_mut.unlock(); + network->send(kv->node_id, kv); } // }}} @@ -273,9 +284,13 @@ void TaskExecutor::try_finish_map(uint32_t job_id) { local_mut.lock(); if (tasker_remaining_job.find(job_id) != tasker_remaining_job.end()) { if (tasker_remaining_nodes_shuffling.empty()) { - auto& ts = tasker_remaining_job[job_id]; + auto ts = tasker_remaining_job[job_id]; + tasker_remaining_job.erase(job_id); + local_mut.unlock(); + uint32_t leader = job_id % network_size; network->send(leader, &ts); + return; } } local_mut.unlock(); @@ -285,9 +300,9 @@ void TaskExecutor::try_finish_map(uint32_t job_id) { void TaskExecutor::shuffle_is_done(uint32_t job_id, uint32_t id) { local_mut.lock(); auto itr = tasker_remaining_nodes_shuffling.find(id); - if(itr!=tasker_remaining_nodes_shuffling.end()){ - tasker_remaining_nodes_shuffling.erase(itr); - } + if(itr!=tasker_remaining_nodes_shuffling.end()){ + tasker_remaining_nodes_shuffling.erase(itr); + } local_mut.unlock(); try_finish_map(job_id); @@ -299,7 +314,7 @@ void TaskExecutor::schedule_reduce(messages::Job* m) { auto reduce_nodes = nodes_shuffling; tasks_remaining[m->job_id] = reduce_nodes.size(); - logger->info("JOB LEADER %i Processing REDUCE %i jobs", id, reduce_nodes.size()); + INFO("JOB LEADER %i Processing REDUCE %i jobs", id, reduce_nodes.size()); if (dfs.exists(m->file_output)) dfs.remove(m->file_output); @@ -317,9 +332,6 @@ void TaskExecutor::schedule_reduce(messages::Job* m) { task.func_body = m->func_body; task.lang = m->lang; - //if (which_node == id) - // process(&task); - //else network->send(which_node, &task); } } @@ -348,16 +360,9 @@ void TaskExecutor::request_local_reduce (messages::Task* m) { } else if (m->lang == "Python") { PYexecutor exec(this); - Reply reply; - - if (exec.run_reduce(m)) - reply.message = "MAPDONE"; - else - reply.message = "MAPFAILED"; + exec.run_reduce(m); } } - - //notify_task_leader(m->leader, m->job_id, m->job_id, "REDUCE"); } // }}} } // namespace eclipse diff --git a/src/mapreduce/task_executor.hh b/src/mapreduce/task_executor.hh index bea35d8..6f76598 100644 --- a/src/mapreduce/task_executor.hh +++ b/src/mapreduce/task_executor.hh @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -51,8 +52,11 @@ class TaskExecutor : public Node { std::unordered_map> iwriters_; std::set nodes_shuffling; - std::multiset tasker_remaining_nodes_shuffling; std::map tasker_remaining_job; + std::multiset tasker_remaining_nodes_shuffling; + + std::mutex local_mut; + std::mutex local_mut2; uint32_t current_nodes_shuffling = 0; uint32_t network_size; diff --git a/src/mapreduce/task_executor_router.cc b/src/mapreduce/task_executor_router.cc index 8d58b4e..cadee16 100644 --- a/src/mapreduce/task_executor_router.cc +++ b/src/mapreduce/task_executor_router.cc @@ -1,7 +1,12 @@ #include "task_executor_router.hh" #include "../common/context_singleton.hh" #include "../messages/boost_impl.hh" +#include "messages/task.hh" +#include "messages/job.hh" +#include "messages/taskstatus.hh" +#include "messages/nodes_shuffling.hh" #include "messages/finish_shuffle.h" +#include "messages/key_value_shuffle.h" #include #include diff --git a/src/messages/factory.cc b/src/messages/factory.cc index 89b3d96..65c17d7 100644 --- a/src/messages/factory.cc +++ b/src/messages/factory.cc @@ -15,6 +15,21 @@ using namespace std; namespace eclipse { namespace messages { +Message* load_message (std::string& str) { + Message* m; + if (GET_STR("network.serialization") == "xml") { + std::istringstream ist (str); + xml_iarchive is (ist); + is >> BOOST_SERIALIZATION_NVP(m); + + } else { + std::istringstream ist (str); + binary_iarchive is (ist); + is >> BOOST_SERIALIZATION_NVP(m); + } + return m; +} + Message* load_message (boost::asio::streambuf& buf) { Message* m; if (GET_STR("network.serialization") == "xml") { @@ -57,6 +72,7 @@ void send_message(boost::asio::ip::tcp::socket* socket, socket->set_option(option); string* to_send = save_message(msg); socket->send(boost::asio::buffer(*to_send)); + delete to_send; } diff --git a/src/messages/factory.hh b/src/messages/factory.hh index ed2da0a..d929cbd 100644 --- a/src/messages/factory.hh +++ b/src/messages/factory.hh @@ -10,6 +10,7 @@ namespace eclipse { namespace messages { Message* load_message (boost::asio::streambuf&); +Message* load_message (std::string&); std::string* save_message (Message*); void send_message(boost::asio::ip::tcp::socket*, eclipse::messages::Message*); diff --git a/src/network/asyncchannel.cc b/src/network/asyncchannel.cc index 4e0f68e..cb255a5 100644 --- a/src/network/asyncchannel.cc +++ b/src/network/asyncchannel.cc @@ -43,7 +43,9 @@ tcp::socket& AsyncChannel::get_socket() { //}}} // do_write str {{{ void AsyncChannel::do_write(std::shared_ptr& str_p) { + queue_mutex.lock(); messages_queue.push(str_p); + queue_mutex.unlock(); if (!is_writing.exchange(true)) { do_write_impl (); } @@ -51,9 +53,12 @@ void AsyncChannel::do_write(std::shared_ptr& str_p) { //}}} // do_write {{{ void AsyncChannel::do_write(Message* m) { + string* str = save_message(m); + queue_mutex.lock(); messages_queue.push(shared_ptr(str)); + queue_mutex.unlock(); if (!is_writing.exchange(true)) { do_write_impl (); } @@ -61,17 +66,25 @@ void AsyncChannel::do_write(Message* m) { // }}} // do_write_buffer {{{ void AsyncChannel::do_write_buffer() { - do_write_impl(); + if (!is_writing.exchange(true)) { + do_write_impl(); + } } // }}} // commit{{{ void AsyncChannel::commit(std::shared_ptr& str_p) { + queue_mutex.lock(); messages_queue.push(str_p); + queue_mutex.unlock(); } //}}} // do_write_impl {{{ void AsyncChannel::do_write_impl() { - auto to_write = messages_queue.front(); + shared_ptr to_write; + queue_mutex.lock(); + to_write = messages_queue.front(); + queue_mutex.unlock(); + host = socket.remote_endpoint().address().to_string().c_str(); port = socket.remote_endpoint().port(); async_write (socket, buffer(*to_write), transfer_exactly(to_write->size()), @@ -86,7 +99,7 @@ void AsyncChannel::on_write(const boost::system::error_code& ec, ec.message().c_str(), s, host.c_str()); if (ec == boost::asio::error::connection_reset) { - INFO("Reconnecting to %s %u", host.c_str(), port); + WARN("Reconnecting to %s %u", host.c_str(), port); try { socket.close(); @@ -117,18 +130,26 @@ void AsyncChannel::on_write(const boost::system::error_code& ec, socket.async_connect(*ep, connect_callback); } catch (exception& e) { - INFO("Mapper exception %s", e.what()); + ERROR("Mapper exception %s", e.what()); } catch (boost::exception& e) { - INFO("Mapper exception %s", diagnostic_information(e).c_str()); + ERROR("Mapper exception %s", diagnostic_information(e).c_str()); } } } else { + + queue_mutex.lock(); messages_queue.pop(); + queue_mutex.unlock(); if (!messages_queue.empty()) { do_write_impl (); } else { + // Keep it alive for 10s + auto t = std::make_shared(context.io, boost::posix_time::seconds(10)); + auto self(shared_from_this()); + t->async_wait([t, self] (const boost::system::error_code& ec) { }); + is_writing.exchange(false); } } @@ -137,7 +158,7 @@ void AsyncChannel::on_write(const boost::system::error_code& ec, // do_read {{{ void AsyncChannel::do_read () { DEBUG("Connection established, starting to read"); - spawn(iosvc, boost::bind(&AsyncChannel::read_coroutine, shared_from_this(), _1)); + spawn(context.io, boost::bind(&AsyncChannel::read_coroutine, shared_from_this(), _1)); } // }}} // read_coroutine {{{ @@ -147,29 +168,29 @@ void AsyncChannel::do_read () { void AsyncChannel::read_coroutine (yield_context yield) { boost::asio::streambuf buf; boost::system::error_code ec; - char header [header_size + 1] = {'\0'}; + char* header = new char[header_size + 1]; // Extra element for strtoul + bzero(header, header_size + 1); try { while (true) { - auto keep_alive = shared_from_this(); + auto self(shared_from_this()); //! Read header of incoming message, we know its size. + //auto& header_buffer = ; size_t recv = async_read(socket, buffer(header, header_size), yield[ec]); + if (ec == boost::asio::error::eof) + break; if (recv != (size_t)header_size or ec) throw std::runtime_error("header error"); - DEBUG("Header has arrived"); - //! The header gives us the length of the incoming message. //! Note, that buf.prepare is the fastest way to read. size_t size = strtoul(header, NULL, 10); - try { const auto& read_buffer = buf.prepare(size); - INFO("READING %lu", size); + recv = async_read(socket, read_buffer, yield[ec]); - } catch (std::bad_alloc& e) { - ERROR("Running out of memory"); - } + if (ec == boost::asio::error::eof) + break; if (recv != size or ec) throw std::runtime_error("body error"); @@ -178,20 +199,19 @@ void AsyncChannel::read_coroutine (yield_context yield) { unique_ptr msg {load_message(buf)}; buf.consume(recv); - DEBUG("Package has been deserialized"); - node->on_read(msg.get(), this); + node->on_read(msg.get(), self.get()); } - } catch (std::exception& e) { - if (ec == boost::asio::error::eof) - DEBUG("AsyncChannel: Closing server socket to client"); - else - ERROR("AsyncChannel: unformed message arrived from host %s, ex: %s", + } catch (std::bad_alloc& e) { + ERROR("Running out of memory"); + + } catch (std::exception& e) { + ERROR("AsyncChannel: unformed message arrived from host %s, ex: %s", socket.remote_endpoint().address().to_string().c_str(), e.what()); } catch (boost::exception& e) { - INFO("read exception %s", diagnostic_information(e).c_str()); + ERROR("read exception %s", diagnostic_information(e).c_str()); } - + delete[] header; } // }}} diff --git a/src/network/asyncchannel.hh b/src/network/asyncchannel.hh index 1f5e0ac..f016f41 100644 --- a/src/network/asyncchannel.hh +++ b/src/network/asyncchannel.hh @@ -10,6 +10,7 @@ #include #include #include +#include namespace eclipse { namespace network { @@ -56,6 +57,8 @@ class AsyncChannel: public Channel, public std::enable_shared_from_this is_writing; boost::asio::io_service& iosvc; + std::mutex queue_mutex; + std::string host; uint32_t port; }; diff --git a/src/network/client_handler.cc b/src/network/client_handler.cc index 021994a..94dc919 100644 --- a/src/network/client_handler.cc +++ b/src/network/client_handler.cc @@ -6,83 +6,124 @@ #include #include #include -#include +#include +#include using namespace eclipse::network; using namespace std; +using boost::scoped_ptr; +using boost::asio::ip::tcp; +using vec_str = std::vector; -mutex mut; // Constructor {{{ ClientHandler::ClientHandler (uint32_t p): nodes(context.settings.get ("network.nodes")), - port(p) + port(p), + id(context.id) { } +void ClientHandler::attach(NetObserver* n) { + local_router = n; +} // }}} // connect {{{ void ClientHandler::connect(uint32_t i, shared_ptr server) { - spawn(context.io, [&, server_copy=server, node=nodes[i], p=this->port](boost::asio::yield_context yield) { - try { - shared_ptr s = server_copy; - boost::system::error_code ec; - tcp::resolver resolver (context.io); - tcp::resolver::query query (node, to_string(p)); - - auto it = resolver.async_resolve(query, yield[ec]); - if (ec) { - ERROR("Resolving %s:%u", node.c_str(), p); - return; - } - - tcp::endpoint ep (*it); - while (true) { - s->get_socket().async_connect(ep, yield[ec]); - if (ec) { - if(ec == boost::asio::error::timed_out) { + spawn(context.io, [this, index = i, server_copy=server, node=nodes[i]] + (boost::asio::yield_context yield) { + try { + shared_ptr s = server_copy; + boost::system::error_code ec; + tcp::resolver resolver (context.io); + tcp::resolver::query query (node, to_string(port)); + + auto it = resolver.async_resolve(query, yield[ec]); + if (ec) + BOOST_THROW_EXCEPTION(std::runtime_error("Resolving")); + + tcp::endpoint ep (*it); + + s->get_socket().async_connect(ep, yield[ec]); + while (ec == boost::asio::error::timed_out) { s->get_socket().close(); - WARN("Re-connecting to %s:%u", node.c_str(), p); - continue; - } - ERROR("Connecting %s:%u ec=%s", node.c_str(), p, ec.message().c_str()); - return; - } - break; - } + WARN("Re-connecting to %s:%u", node.c_str(), port); + s->get_socket().async_connect(ep, yield[ec]); + } - boost::asio::ip::tcp::no_delay option(true); - s->get_socket().set_option(option); + if (ec) + BOOST_THROW_EXCEPTION(std::runtime_error("Connecting")); -// current_servers.insert({i, s}); - s->do_write_buffer(); + tcp::no_delay option(true); + s->get_socket().set_option(option); - } catch (exception& e) { - INFO("Connect handler exception %s", e.what()); - } catch (boost::exception& e) { - INFO("Connect handler exception %s", diagnostic_information(e).c_str()); - } + rw_lock.lock(); + current_servers.insert({index, s}); + rw_lock.unlock(); + + s->do_write_buffer(); + + } catch (exception& e) { + ERROR("Connect coroutine exception %s", e.what()); + throw; + + } catch (boost::exception& e) { + ERROR("Connect corourine exception %s", diagnostic_information(e).c_str()); + throw; + } }); } // }}} +// try_reuse_client {{{ +bool ClientHandler::try_reuse_client(uint32_t i, shared_ptr str) { + // If connection is still on. + rw_lock.lock_shared(); + auto it = current_servers.find(i); + rw_lock.unlock_shared(); + + if (it != current_servers.end()) { + shared_ptr ptr = current_servers[i].lock(); + if (ptr) { + DEBUG("REUSING SOCKET"); + ptr->do_write(str); + return true; + + } else { + rw_lock.lock(); + current_servers.erase(i); + rw_lock.unlock(); + } + } + return false; +} +// }}} // send {{{ bool ClientHandler::send(uint32_t i, messages::Message* m) { if (i >= nodes.size()) return false; - // mut.lock(); - // If connection is still on. - //if (current_servers.find(i) != current_servers.end()) { - // shared_ptr ptr = current_servers[i].lock(); - // if (ptr) { - // ptr->do_write(m); - // return true; - // } - //} - - auto server = make_shared(node); shared_ptr message_serialized (save_message(m)); - server->commit(message_serialized); - connect(i, server); - //mut.unlock(); + //if (i == id and local_router != nullptr) { + // INFO("Message len=%lu", message_serialized->length()); + // // Dispatch to local + // context.io.post([message_serialized, this] () { + // try { + // INFO("Message len=%lu", message_serialized->length()); + // scoped_ptr msg {messages::load_message(*message_serialized)}; + // local_router->on_read(msg.get(), nullptr); + + // } catch (exception& e) { + // INFO("Mapper exception %s", e.what()); + // } catch (boost::exception& e) { + // INFO("Mapper exception %s", diagnostic_information(e).c_str()); + // } + // }); + // return true; + //} + + if (!try_reuse_client(i, message_serialized)) { + auto server = make_shared(local_router); + server->commit(message_serialized); + connect(i, server); + } return true; } @@ -91,18 +132,11 @@ bool ClientHandler::send(uint32_t i, messages::Message* m) { bool ClientHandler::send(uint32_t i, shared_ptr str) { if (i >= nodes.size()) return false; - // If connection is still on. - if (current_servers.find(i) != current_servers.end()) { - shared_ptr ptr = current_servers[i].lock(); - if (ptr) { - ptr->do_write(str); - return true; - } - } - - auto server = make_shared(node); - server->commit(str); - connect(i, server); + if (!try_reuse_client(i, str)) { + auto server = make_shared(local_router); + server->commit(str); + connect(i, server); + } return true; } diff --git a/src/network/client_handler.hh b/src/network/client_handler.hh index 3d8b69c..3f5aac6 100644 --- a/src/network/client_handler.hh +++ b/src/network/client_handler.hh @@ -2,23 +2,24 @@ #include "../messages/message.hh" #include "netobserver.hh" #include "server.hh" + #include +#include namespace eclipse { namespace network { -using boost::asio::ip::tcp; -using vec_str = std::vector; - class ClientHandler { public: ClientHandler(uint32_t port); ~ClientHandler() = default; + void attach(NetObserver*); bool send(uint32_t i, messages::Message* m); bool send_and_replicate(std::vector, messages::Message*); private: + bool try_reuse_client(uint32_t i, std::shared_ptr); bool send(uint32_t i, std::shared_ptr str); void connect(uint32_t i, std::shared_ptr server); @@ -26,9 +27,13 @@ class ClientHandler { //! it. It can be freed any time. std::map> current_servers; - vec_str nodes; - NetObserver* node; + //! Reader/Writer lock for current_servers map + boost::shared_mutex rw_lock; + + std::vector nodes; + NetObserver* local_router = nullptr; uint32_t port; + uint32_t id; }; } diff --git a/src/targets/node_main_mr.cc b/src/targets/node_main_mr.cc index c47c6e3..0fec610 100644 --- a/src/targets/node_main_mr.cc +++ b/src/targets/node_main_mr.cc @@ -1,4 +1,3 @@ -#define MALLOC_CHECK_ 3 #include #include #include @@ -23,50 +22,51 @@ int main (int argc, char ** argv) { pid_t pid = fork(); if (pid != 0) { try { - context.io.notify_fork(boost::asio::io_service::fork_parent); - context.run(); + context.io.notify_fork(boost::asio::io_service::fork_parent); - struct rlimit limit; + //struct rlimit limit; - limit.rlim_cur = 4000; - limit.rlim_max = 4096; - if (setrlimit(RLIMIT_NOFILE, &limit) != 0) { - ERROR("setrlimit() failed with errno=%d\n", errno); - return 1; - } - struct rlimit core_limits; - core_limits.rlim_cur = core_limits.rlim_max = RLIM_INFINITY; - setrlimit(RLIMIT_CORE, &core_limits); + //limit.rlim_cur = 4000; + //limit.rlim_max = 4096; + //if (setrlimit(RLIMIT_NOFILE, &limit) != 0) { + // ERROR("setrlimit() failed with errno=%d\n", errno); + // return 1; + //} + //struct rlimit core_limits; + //core_limits.rlim_cur = core_limits.rlim_max = RLIM_INFINITY; + //setrlimit(RLIMIT_CORE, &core_limits); - sleep(2); + sleep(2); - uint32_t ex_port = GET_INT("network.ports.mapreduce"); - auto internal_net = make_unique (ex_port); - auto external_net = make_unique (ex_port); - - TaskExecutor executor(internal_net.get()); + uint32_t ex_port = GET_INT("network.ports.mapreduce"); + auto internal_net = make_unique (ex_port); + auto external_net = make_unique (ex_port); - auto router = make_unique(&executor, new SimpleRouter()); + TaskExecutor executor(internal_net.get()); - external_net->attach(router.get()); + auto router = make_unique(&executor, new SimpleRouter()); - external_net->establish(); + external_net->attach(router.get()); + internal_net->attach(router.get()); + + external_net->establish(); + + context.run(); + context.join(); - context.join(); } catch (std::exception& e) { ERROR("GENERAL exception at %s", e.what()); + + } catch (boost::exception& e) { + ERROR("GENERAL exception %s", diagnostic_information(e).c_str()); } - catch (boost::exception& e) { - INFO("GENERAL exception %s", diagnostic_information(e).c_str()); - } wait(NULL); } else { context.io.notify_fork(boost::asio::io_service::fork_child); - context.run(); uint32_t ex_port = GET_INT("network.ports.client"); @@ -84,6 +84,7 @@ int main (int argc, char ** argv) { external_net->establish(); + context.run(); context.join(); } From 79596e144f26316431807dbf94bf9c3227b85d87 Mon Sep 17 00:00:00 2001 From: Vicente Adolfo Bolea Sanchez Date: Tue, 23 May 2017 00:40:52 +0900 Subject: [PATCH 4/8] Added python support, changed python API --- Makefile.am | 4 +- src/client/veloxmr.in | 46 ------ src/client/veloxmr_lib.py | 21 ++- src/mapreduce/executor.cc | 53 +++---- src/mapreduce/messages/boost_impl.cc | 2 + src/mapreduce/messages/task.hh | 2 +- src/mapreduce/py_executor.hh | 3 +- src/mapreduce/task_cxx.cc | 67 +++++++++ src/mapreduce/task_cxx.hh | 31 +++++ src/mapreduce/task_executor.cc | 40 ++---- src/mapreduce/task_handler.cc | 5 + src/mapreduce/task_handler.hh | 26 ++++ src/mapreduce/task_python.cc | 200 +++++++++++++++++++++++++++ src/mapreduce/task_python.hh | 29 ++++ src/targets/veloxmr_mapreduce.cc | 21 ++- 15 files changed, 436 insertions(+), 114 deletions(-) create mode 100644 src/mapreduce/task_cxx.cc create mode 100644 src/mapreduce/task_cxx.hh create mode 100644 src/mapreduce/task_handler.cc create mode 100644 src/mapreduce/task_handler.hh create mode 100644 src/mapreduce/task_python.cc create mode 100644 src/mapreduce/task_python.hh diff --git a/Makefile.am b/Makefile.am index 8f1a462..709b27c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -102,7 +102,9 @@ eclipse_node_SOURCES = src/targets/node_main_mr.cc \ src/mapreduce/fs/iwriter.cc \ src/mapreduce/fs/ireader.cc \ src/mapreduce/executor.cc \ - src/mapreduce/py_executor.cc + src/mapreduce/task_handler.cc \ + src/mapreduce/task_cxx.cc \ + src/mapreduce/task_python.cc eclipse_node_LDADD = libvmr.la $(LDADD) -lpython2.7 diff --git a/src/client/veloxmr.in b/src/client/veloxmr.in index be563bd..2395cec 100755 --- a/src/client/veloxmr.in +++ b/src/client/veloxmr.in @@ -3,50 +3,4 @@ from veloxmr_lib import * from subprocess import call from IPython import embed -#code.interact(local=locals()) embed() -#@staticmethod -#def parse_argv(opt,arg): -# if opt == 'put': -# velox.put(arg) -# elif opt == 'rm': -# velox.rm(arg) -# -# elif opt == 'cat': -# velox.cat(arg) -# -# elif opt == 'ls': -# velox.ls() -# -# elif opt == 'format': -# velox.formating() -# -# elif opt == 'shell': -# velox.shell() - - -# @staticmethod -# def shell(): -# while True: -# try: -# var = raw_input("VeloxMR>") -# opts = var.split() -# opt = opts[0] -# -# if (opt == 'mr'): -# mapreduce(opts[1], opts[2], opts[3], opts[4]) -# -# else: -# arg = '' -# if len(opts) == 2: -# arg= opts[1] -# velox.parse_argv(opt, arg) -# except (EOFError): -# break -# -#opt = sys.argv[1] -#arg = '' -#if len(sys.argv) == 3: -# arg= sys.argv[2] -# -#velox.parse_argv(opt, arg) diff --git a/src/client/veloxmr_lib.py b/src/client/veloxmr_lib.py index 5c24bf7..27a50f3 100644 --- a/src/client/veloxmr_lib.py +++ b/src/client/veloxmr_lib.py @@ -24,9 +24,26 @@ def format(): def show(file_name): call(["dfs", "show", file_name]) -def mapreduce(file_name, mapf, redf, output): +def mapreduce(file_name, mapf, redf, output, pmap = "", amap = ""): map_src = inspect.getsource(mapf) red_src = inspect.getsource(redf) map_src.replace(' ', '|') map_src.replace('\n', '~') - call(["__velox_mapreduce", file_name, map_src, red_src, output]) + + red_src.replace(' ', '|') + red_src.replace('\n', '~') + + pmap_src = "NULL" + amap_src = "NULL" + if pmap is not "": + pmap_src = inspect.getsource(pmap) + pmap_src.replace(' ', '|') + pmap_src.replace('\n', '~') + + if amap is not "": + amap_src = inspect.getsource(amap) + amap_src.replace(' ', '|') + amap_src.replace('\n', '~') + + + call(["__velox_mapreduce", file_name, map_src, red_src, output, pmap_src, amap_src]) diff --git a/src/mapreduce/executor.cc b/src/mapreduce/executor.cc index 3c34892..9d68855 100644 --- a/src/mapreduce/executor.cc +++ b/src/mapreduce/executor.cc @@ -6,6 +6,8 @@ #include "../mapreduce/fs/ireader.h" #include "messages/key_value_shuffle.h" #include "../client/dfs.hh" +#include "task_cxx.hh" +#include "task_python.hh" #include #include @@ -35,26 +37,22 @@ bool Executor::run_map (messages::Task* m) { queue threads; mutex mut; - auto path_lib = GET_STR("path.applications"); auto network_size = GET_VEC_STR("network.nodes").size(); size_t mappers = GET_INT("mapreduce.mappers"); - path_lib += ("/" + m->library); - DL_loader loader (path_lib); - try { - loader.init_lib(); - } catch (std::exception& e) { - context.logger->error ("Not found library path[%s]", path_lib.c_str()); + task_handler* task_execution = nullptr; + + if (m->lang == "C++") { + task_execution = new task_cxx(m->library, m->func_name); + } else { + task_execution = new task_python(m->func_body, m->pre_map, m->after_map); } + task_execution->setup(true); - before_map_t _before_map_ = loader.load_function_before_map("before_map"); - after_map_t _after_map_ = loader.load_function_after_map("after_map"); - mapper_t _map_ = loader.load_function(m->func_name); + INFO("Launching mapper with %i threads", m->blocks.size()); - if(_before_map_ != nullptr) - _before_map_(options); + task_execution->pre_map(options); - INFO("LAunching mapper with %i threads", m->blocks.size()); try { for (size_t map_id = 0; map_id < m->blocks.size(); map_id++) { @@ -86,7 +84,7 @@ bool Executor::run_map (messages::Task* m) { continue; std::string line(next_line); - _map_ (line, results, options); + task_execution->map(line, results, options); } delete[] next_line; @@ -159,13 +157,8 @@ bool Executor::run_map (messages::Task* m) { threads.pop(); } - - if(_after_map_ != nullptr) { - INFO("CALLING AFTER MAP"); - _after_map_(options); - } - - loader.close(); + task_execution->after_map(options); + delete task_execution; peer->notify_map_is_finished(m->job_id, keys_blocks); keys_blocks.clear(); @@ -181,24 +174,22 @@ bool Executor::run_map (messages::Task* m) { // }}} // run_reduce {{{ bool Executor::run_reduce (messages::Task* task) { - auto path_lib = context.settings.get("path.applications"); auto block_size = GET_INT("filesystem.block"); //auto reducer_slot = GET_INT("mapreduce.reduce_slot"); - path_lib += ("/" + task->library); - DL_loader loader (path_lib); auto network_size = GET_VEC_STR("network.nodes").size(); Histogram boundaries(network_size, 100); boundaries.initialize(); velox::model::metadata metadata; - try { - loader.init_lib(); - } catch (std::exception& e) { - context.logger->error ("Not found library path[%s]", path_lib.c_str()); - } + task_handler* task_execution = nullptr; - reducer_t _reducer_ = loader.load_function_reduce(task->func_name); + if (task->lang == "C++") { + task_execution = new task_cxx(task->library, task->func_name); + } else { + task_execution = new task_python(task->func_body, task->pre_map, task->after_map); + } + task_execution->setup(false); uint32_t total_size = 0; uint32_t num_keys = 0; @@ -243,7 +234,7 @@ bool Executor::run_reduce (messages::Task* task) { if(values.size() > 0) { try { - _reducer_ (key, values, output); + task_execution->reduce(key, values, output); } catch (std::exception& e) { ERROR("Error in the executer: %s", e.what()); diff --git a/src/mapreduce/messages/boost_impl.cc b/src/mapreduce/messages/boost_impl.cc index a36e907..0f9a585 100644 --- a/src/mapreduce/messages/boost_impl.cc +++ b/src/mapreduce/messages/boost_impl.cc @@ -30,6 +30,8 @@ template ar & BOOST_SERIALIZATION_NVP(c.leader); ar & BOOST_SERIALIZATION_NVP(c.func_body); ar & BOOST_SERIALIZATION_NVP(c.lang); + ar & BOOST_SERIALIZATION_NVP(c.pre_map); + ar & BOOST_SERIALIZATION_NVP(c.after_map); } template diff --git a/src/mapreduce/messages/task.hh b/src/mapreduce/messages/task.hh index 9a9b10d..48e9de9 100644 --- a/src/mapreduce/messages/task.hh +++ b/src/mapreduce/messages/task.hh @@ -15,7 +15,7 @@ struct Task: public Message { std::string lang; std::string library, func_name, input_path; std::string file_output; - std::string func_body; + std::string func_body, pre_map, after_map; std::vector> blocks; uint32_t subjob_id = 0; uint32_t job_id = 0; diff --git a/src/mapreduce/py_executor.hh b/src/mapreduce/py_executor.hh index f423f1a..18efef0 100644 --- a/src/mapreduce/py_executor.hh +++ b/src/mapreduce/py_executor.hh @@ -16,7 +16,8 @@ #include #include #include -#include + +extern "C" typedef struct _object PyObject; namespace eclipse { diff --git a/src/mapreduce/task_cxx.cc b/src/mapreduce/task_cxx.cc new file mode 100644 index 0000000..e523322 --- /dev/null +++ b/src/mapreduce/task_cxx.cc @@ -0,0 +1,67 @@ +#include "task_cxx.hh" +#include "../common/context_singleton.hh" + +#include +#include +#include + +using namespace eclipse; +using namespace std; + +// Constructor {{{ +task_cxx::task_cxx(std::string library, std::string func_name) : + library_path(library), + func_name(func_name) +{ + std::string path_lib; + try { + path_lib = GET_STR("path.applications"); + path_lib += ("/" + library); + loader.reset(new DL_loader(path_lib)); + + loader->init_lib(); + + } catch (std::exception& e) { + ERROR("Not found library path[%s]", path_lib.c_str()); + } +} + +task_cxx::~task_cxx() { + +} +// }}} +// setup {{{ +void task_cxx::setup(bool is_map) { + + if (is_map) { + before_map_f = loader->load_function_before_map("before_map"); + after_map_f = loader->load_function_after_map("after_map"); + mapper = loader->load_function(func_name); + + } else { + reducer = loader->load_function_reduce(func_name); + } +} +// }}} +// pre_map {{{ +void task_cxx::pre_map(TaskOptions& options) { + if(before_map_f != nullptr) + before_map_f(options); +} +//}}} +// after_map {{{ +void task_cxx::after_map(TaskOptions& options) { + if(after_map_f != nullptr) + after_map_f(options); +} +// }}} +// map {{{ +void task_cxx::map(std::string& line, TaskOutput& out, TaskOptions& options) { + mapper(line, out, options); +} +// }}} +// reduce {{{ +void task_cxx::reduce(std::string& key, vec_str& values, TaskOutput& out) { + reducer(key, values, out); +} +// }}} diff --git a/src/mapreduce/task_cxx.hh b/src/mapreduce/task_cxx.hh new file mode 100644 index 0000000..0272429 --- /dev/null +++ b/src/mapreduce/task_cxx.hh @@ -0,0 +1,31 @@ +#pragma once +#include "task_handler.hh" +#include "../common/dl_loader.hh" +#include + +namespace eclipse { + +class task_cxx : public task_handler { + public: + task_cxx(std::string library, std::string func_name); + ~task_cxx(); + + void setup(bool is_map) override; + void pre_map(TaskOptions&) override; + void after_map(TaskOptions&) override; + void map(std::string&, TaskOutput&, TaskOptions&) override; + void reduce(std::string&, vec_str&, TaskOutput&) override; + + private: + before_map_t before_map_f; + after_map_t after_map_f; + mapper_t mapper; + reducer_t reducer; + + std::string library_path; + std::string func_name; + + std::unique_ptr loader; +}; + +} diff --git a/src/mapreduce/task_executor.cc b/src/mapreduce/task_executor.cc index d21fa37..8d83205 100644 --- a/src/mapreduce/task_executor.cc +++ b/src/mapreduce/task_executor.cc @@ -10,7 +10,6 @@ #include "messages/job.hh" #include "messages/task.hh" #include "executor.hh" -#include "py_executor.hh" #include "fs/iwriter.h" #include #include @@ -222,24 +221,12 @@ void TaskExecutor::write_key_value(messages::KeyValueShuffle *kv_shuffle) { // }}} // request_local_map {{{ void TaskExecutor::request_local_map (messages::Task* task) { - if (task->lang == "C++") { + Task stask = *task; - Task stask = *task; - - std::thread([&, this](Task task) { - Executor exec(this); - exec.run_map(&task); + std::thread([&, this](Task task) { + Executor exec(this); + exec.run_map(&task); }, stask).detach(); - - sleep(1); - - } else if (task->lang == "Python") { - PYexecutor exec(this); - for (auto& block : task->blocks) { - string block_str = local_io.read(block.second); - exec.run_map(task, block_str); - } - } } // }}} // notify_task_leader {{{ @@ -349,19 +336,12 @@ void TaskExecutor::request_local_reduce (messages::Task* m) { directory.select_idata_metadata(job_id, map_id, &di); if (di.num_reducer > 0) { //! Perform reduce operation - if (m->lang == "C++") { - - std::async(std::launch::async, [&]() { - logger->info("Performing reduce operation"); - Executor exec(this); - Task copy_task = *m; - exec.run_reduce(©_task); - }); - - } else if (m->lang == "Python") { - PYexecutor exec(this); - exec.run_reduce(m); - } + std::async(std::launch::async, [&]() { + logger->info("Performing reduce operation"); + Executor exec(this); + Task copy_task = *m; + exec.run_reduce(©_task); + }); } } // }}} diff --git a/src/mapreduce/task_handler.cc b/src/mapreduce/task_handler.cc new file mode 100644 index 0000000..f343915 --- /dev/null +++ b/src/mapreduce/task_handler.cc @@ -0,0 +1,5 @@ +#include "task_handler.hh" + +using namespace eclipse; + +task_handler::task_handler() { } diff --git a/src/mapreduce/task_handler.hh b/src/mapreduce/task_handler.hh new file mode 100644 index 0000000..ea3db51 --- /dev/null +++ b/src/mapreduce/task_handler.hh @@ -0,0 +1,26 @@ +#pragma once +#include "output_collection.hh" +#include +#include +#include + +namespace eclipse { + +typedef velox::OutputCollection TaskOutput; +typedef std::vector vec_str; +typedef std::unordered_map TaskOptions; + +class task_handler { + public: + + task_handler(); + virtual ~task_handler() = default; + + virtual void setup(bool is_map) = 0; + virtual void pre_map(TaskOptions&) = 0; + virtual void after_map(TaskOptions&) = 0; + virtual void map(std::string&, TaskOutput&, TaskOptions&) = 0; + virtual void reduce(std::string&, vec_str&, TaskOutput&) = 0; +}; + +} diff --git a/src/mapreduce/task_python.cc b/src/mapreduce/task_python.cc new file mode 100644 index 0000000..512873d --- /dev/null +++ b/src/mapreduce/task_python.cc @@ -0,0 +1,200 @@ +#include "task_python.hh" +#include "../common/context_singleton.hh" + +#include +#include +#include +#include + +using namespace eclipse; +using namespace std; + +// Constructor {{{ +task_python::task_python(std::string function_body, + std::string pre_map = "", + std::string after_map = "") : + function_body_f(function_body), + pre_map_f(pre_map), + after_map_f(after_map) +{ } + +task_python::~task_python() { + Py_XDECREF(python_module); + Py_Finalize(); +} +// }}} +// setup {{{ +void task_python::setup(bool is_map) { + Py_Initialize(); + + PyObject *pCompiledFn, *pModule; + char module_name[] = "VELOXMR_BACKEND"; + + pCompiledFn = Py_CompileString(function_body_f.c_str(), "", Py_file_input); + + if (pCompiledFn == NULL) + ERROR("[PY interpreter] I am not able to parse your function"); + + pModule = PyImport_ExecCodeModule(module_name, pCompiledFn); + + if (pre_map_f != "") { + pCompiledFn = Py_CompileString(pre_map_f.c_str(), "", Py_file_input); + + if (pCompiledFn == NULL) + ERROR("[PY interpreter] I am not able to parse your function"); + + pModule = PyImport_ExecCodeModule(module_name, pCompiledFn); + } + + if (after_map_f != "") { + pCompiledFn = Py_CompileString(after_map_f.c_str(), "", Py_file_input); + + if (pCompiledFn == NULL) + ERROR("[PY interpreter] I am not able to parse your function"); + + pModule = PyImport_ExecCodeModule(module_name, pCompiledFn); + } + + if (pModule == NULL) + ERROR("[PY interpreter] I am not able to create a module for your function"); + + python_module = pModule; +} +// }}} +// pre_map {{{ +void task_python::pre_map(TaskOptions& options) { + if (pre_map_f == "") + return; + + PyObject *key = NULL, *value = NULL, *pFunc = NULL; + Py_ssize_t pos = 0; + PyObject* pOptions = PyDict_New(); + + pFunc = PyObject_GetAttrString(python_module, "pre_map" ) ; + PyObject_CallFunctionObjArgs(pFunc, pOptions); + + if (pOptions == NULL) { + ERROR("Python map did not return anything :( "); + } + + // Save dictionary values as a PyObject pointer + while (PyDict_Next(pOptions, &pos, &key, &value)) { + string k = PyString_AsString(key); + Py_INCREF(value); + + if (options.find(k) != options.end()) { + options[k] = value; + + } else { + options.insert({k, value}); + } + } + + Py_DECREF(pOptions); +} +//}}} +// after_map {{{ +void task_python::after_map(TaskOptions& options) { + if(after_map_f != "") + return; + + PyObject *pFunc = NULL; + PyObject* pOptions = PyDict_New(); + + for (auto kv_pair : options) { + PyObject* current_key = PyString_FromString(kv_pair.first.c_str()); + PyDict_SetItem(pOptions, current_key, static_cast(kv_pair.second)); + } + + pFunc = PyObject_GetAttrString(python_module, "after_map" ) ; + PyObject_CallFunctionObjArgs(pFunc, pOptions); + + if (pOptions == NULL) { + ERROR("Python map did not return anything :( "); + } + + Py_DECREF(pOptions); +} +// }}} +// map {{{ +// @brief it runs the map function in python +// +// The python map function returns a dictionary +// we iterate the dictionary to return all the +// key pair functions +// +// @todo multiples key values +void task_python::map(std::string& line, TaskOutput& out, TaskOptions& options) { + PyObject *key = NULL, *value = NULL, *pFunc = NULL; + Py_ssize_t pos = 0; + + PyObject* pOptions = PyDict_New(); + PyObject* pOutput = PyDict_New(); + + for (auto kv_pair : options) { + PyObject* current_key = PyString_FromString(kv_pair.first.c_str()); + PyDict_SetItem(pOptions, current_key, static_cast(kv_pair.second)); + } + + pFunc = PyObject_GetAttrString(python_module, "map" ) ; + PyObject_CallFunction(pFunc, (char*)"soo", const_cast(line.c_str()), + pOutput, pOptions); + + + if (pOutput == NULL) { + ERROR("Python map did not return anything :( "); + } + + // Save dictionary values as a PyObject pointer + while (PyDict_Next(pOutput, &pos, &key, &value)) { + string k = PyString_AsString(key); + + for (Py_ssize_t i = 0; i < PyList_Size(value); i++) { + PyObject* item = PyList_GetItem(value, i); + string v = PyString_AsString(item); + out.insert(k, v); + } + } + + Py_DECREF(pOutput); +} +// }}} +// reduce {{{ +// @brief it runs the reduce python function +// +void task_python::reduce(std::string& key, vec_str& values, TaskOutput& out) { + PyObject *pKey = NULL, *pValue = NULL, *pFunc = NULL; + Py_ssize_t pos = 0; + + PyObject* pOutput = PyDict_New(); + PyObject* pInput = PyList_New(values.size()); + + for (auto& value : values) { + PyObject* pValue = PyString_FromString(value.c_str()); + PyList_Append(pInput, pValue); + } + + pFunc = PyObject_GetAttrString(python_module, "reduce"); + PyObject_CallFunction(pFunc, (char*)"soo", const_cast(key.c_str()), + pInput, pOutput); + + + if (pOutput == NULL) { + ERROR("Python map did not return anything :( "); + } + + // Save dictionary values as a PyObject pointer + while (PyDict_Next(pOutput, &pos, &pKey, &pValue)) { + string k = PyString_AsString(pKey); + + for (Py_ssize_t i = 0; i < PyList_Size(pValue); i++) { + PyObject* item = PyList_GetItem(pValue, i); + string v = PyString_AsString(item); + out.insert(k, v); + } + } + + Py_DECREF(pOutput); + Py_DECREF(pInput); +} +// }}} diff --git a/src/mapreduce/task_python.hh b/src/mapreduce/task_python.hh new file mode 100644 index 0000000..98a0e29 --- /dev/null +++ b/src/mapreduce/task_python.hh @@ -0,0 +1,29 @@ +#pragma once +#include "task_handler.hh" +#include "../common/dl_loader.hh" +#include + +extern "C" typedef struct _object PyObject; + +namespace eclipse { + +class task_python : public task_handler { + public: + task_python(std::string function_body, std::string pre_map, std::string after_map); + ~task_python(); + + void setup(bool is_map) override; + void pre_map(TaskOptions&) override; + void after_map(TaskOptions&) override; + void map(std::string&, TaskOutput&, TaskOptions&) override; + void reduce(std::string&, vec_str&, TaskOutput&) override; + + private: + std::string function_body_f; + std::string pre_map_f; + std::string after_map_f; + + PyObject* python_module; +}; + +} diff --git a/src/targets/veloxmr_mapreduce.cc b/src/targets/veloxmr_mapreduce.cc index 524ff2b..11fd5f6 100644 --- a/src/targets/veloxmr_mapreduce.cc +++ b/src/targets/veloxmr_mapreduce.cc @@ -10,8 +10,8 @@ using namespace velox; using namespace std; int main(int argc, char** argv) { - if (argc < 4) { - cout << "ERROR: usage __velox_mapreduce inputfile mapfunc reducefunc outputfile" << endl; + if (argc < 6) { + cout << "ERROR: usage __velox_mapreduce inputfile mapfunc reducefunc outputfile [premap] [aftermap]" << endl; return EXIT_FAILURE; } @@ -28,7 +28,24 @@ int main(int argc, char** argv) { dataset A = mr.make_dataset({argv[1]}); + string pmap_func = argv[5]; + + if (pmap_func != "NULL") { + std::replace(pmap_func.begin(), pmap_func.end(), '|', ' '); + std::replace(pmap_func.begin(), pmap_func.end(), '~', '\n'); + // pre-map + } + A.pymap(map_func); + + string amap_func = argv[6]; + + if (amap_func != "NULL") { + std::replace(amap_func.begin(), amap_func.end(), '|', ' '); + std::replace(amap_func.begin(), amap_func.end(), '~', '\n'); + // after-remap + } + A.pyreduce(red_func, argv[4]); return EXIT_SUCCESS; From 7c6eb3b1c6d955bf1c8e42a2747146a3d2d151bc Mon Sep 17 00:00:00 2001 From: Vicente Adolfo Bolea Sanchez Date: Tue, 23 May 2017 03:40:16 +0900 Subject: [PATCH 5/8] Small memory leak remaining to debug, it grows memory very quick in leader node --- src/client/vmr.cc | 4 ++- src/client/vmr.hh | 2 +- src/fileleader/directory.cc | 8 +++-- src/mapreduce/executor.cc | 16 ++++++--- src/mapreduce/messages/boost_impl.cc | 2 ++ src/mapreduce/messages/job.hh | 2 +- src/mapreduce/task_cxx.cc | 1 + src/mapreduce/task_python.cc | 53 +++++++++++++++++++--------- src/targets/veloxmr_mapreduce.cc | 11 +++--- 9 files changed, 68 insertions(+), 31 deletions(-) diff --git a/src/client/vmr.cc b/src/client/vmr.cc index 19714ff..8176b4c 100644 --- a/src/client/vmr.cc +++ b/src/client/vmr.cc @@ -93,7 +93,7 @@ void dataset::reduce(std::string func, std::string output) { } // }}} // pymap {{{ -void dataset::pymap(std::string func) { +void dataset::pymap(std::string func, std::string pmap = "", std::string amap = "") { tcp::socket socket (context.io); socket.connect(*find_local_master(job_id)); @@ -103,6 +103,8 @@ void dataset::pymap(std::string func) { job.files = files; job.job_id = job_id; job.func_body = func; + job.pre_map = pmap; + job.after_map = amap; send_message(&socket, &job); auto reply = read_reply (&socket); diff --git a/src/client/vmr.hh b/src/client/vmr.hh index 097db95..b10d419 100644 --- a/src/client/vmr.hh +++ b/src/client/vmr.hh @@ -12,7 +12,7 @@ class dataset { void map(std::string); void reduce(std::string, std::string); - void pymap(std::string); + void pymap(std::string, std::string, std::string); void pyreduce(std::string, std::string); protected: diff --git a/src/fileleader/directory.cc b/src/fileleader/directory.cc index 1b136d9..301565a 100644 --- a/src/fileleader/directory.cc +++ b/src/fileleader/directory.cc @@ -108,10 +108,14 @@ bool Directory::query_exec_simple(char* query, int (*fn)(void*,int,char**,char** char *zErrMsg = nullptr; sqlite3* db = open(path); - int rc = sqlite3_exec(db, query, fn, argv, &zErrMsg); - if (rc != SQLITE_OK) { + int rc; + while (SQLITE_OK != (rc = sqlite3_exec(db, query, fn, argv, &zErrMsg))) { ERROR("SQL error: %s", zErrMsg); sqlite3_free(zErrMsg); + if (rc == SQLITE_LOCKED) + sleep(1); + else + break; } sqlite3_close(db); diff --git a/src/mapreduce/executor.cc b/src/mapreduce/executor.cc index 9d68855..ccc4509 100644 --- a/src/mapreduce/executor.cc +++ b/src/mapreduce/executor.cc @@ -193,14 +193,18 @@ bool Executor::run_reduce (messages::Task* task) { uint32_t total_size = 0; uint32_t num_keys = 0; - vector threads; + queue threads; DirectoryMR directory; uint32_t reducer_slot = directory.select_number_of_reducers(task->job_id); mutex mut; DEBUG("LAunching reducer with %i threads", reducer_slot); for (int reducer_id = 0; reducer_id < reducer_slot; reducer_id++) { - threads.push_back(std::thread([&, this] (int id) { - DEBUG("%i %i", task->job_id, id); + + if (threads.size() >= 1) { + threads.front().join(); + threads.pop(); + } + threads.emplace(std::thread([&, this] (int id) { IReader ireader; ireader.set_job_id(task->job_id); @@ -281,8 +285,10 @@ bool Executor::run_reduce (messages::Task* task) { }, reducer_id)); } - for (auto& thread : threads) - thread.join(); + while (!threads.empty()) { + threads.front().join(); + threads.pop(); + } velox::DFS dfs; INFO("REDUCER APPENDING FILE_METADATA KP:%u", num_keys); diff --git a/src/mapreduce/messages/boost_impl.cc b/src/mapreduce/messages/boost_impl.cc index 0f9a585..5473658 100644 --- a/src/mapreduce/messages/boost_impl.cc +++ b/src/mapreduce/messages/boost_impl.cc @@ -148,6 +148,8 @@ template ar & BOOST_SERIALIZATION_NVP(c.file_output); ar & BOOST_SERIALIZATION_NVP(c.func_body); ar & BOOST_SERIALIZATION_NVP(c.lang); + ar & BOOST_SERIALIZATION_NVP(c.pre_map); + ar & BOOST_SERIALIZATION_NVP(c.after_map); } template diff --git a/src/mapreduce/messages/job.hh b/src/mapreduce/messages/job.hh index dca4e86..84a0eb3 100644 --- a/src/mapreduce/messages/job.hh +++ b/src/mapreduce/messages/job.hh @@ -17,7 +17,7 @@ struct Job: public Message { std::string reduce_name; std::string file_output; std::vector files; - std::string func_body; + std::string func_body, pre_map, after_map; std::string lang; }; diff --git a/src/mapreduce/task_cxx.cc b/src/mapreduce/task_cxx.cc index e523322..87e3231 100644 --- a/src/mapreduce/task_cxx.cc +++ b/src/mapreduce/task_cxx.cc @@ -27,6 +27,7 @@ task_cxx::task_cxx(std::string library, std::string func_name) : } task_cxx::~task_cxx() { + loader->close(); } // }}} diff --git a/src/mapreduce/task_python.cc b/src/mapreduce/task_python.cc index 512873d..068d0b4 100644 --- a/src/mapreduce/task_python.cc +++ b/src/mapreduce/task_python.cc @@ -63,14 +63,16 @@ void task_python::setup(bool is_map) { // }}} // pre_map {{{ void task_python::pre_map(TaskOptions& options) { - if (pre_map_f == "") + if (pre_map_f == "") { + INFO("Skipping pre-map function"); return; + } PyObject *key = NULL, *value = NULL, *pFunc = NULL; Py_ssize_t pos = 0; PyObject* pOptions = PyDict_New(); - pFunc = PyObject_GetAttrString(python_module, "pre_map" ) ; + pFunc = PyObject_GetAttrString(python_module, "pre_map") ; PyObject_CallFunctionObjArgs(pFunc, pOptions); if (pOptions == NULL) { @@ -123,7 +125,6 @@ void task_python::after_map(TaskOptions& options) { // we iterate the dictionary to return all the // key pair functions // -// @todo multiples key values void task_python::map(std::string& line, TaskOutput& out, TaskOptions& options) { PyObject *key = NULL, *value = NULL, *pFunc = NULL; Py_ssize_t pos = 0; @@ -137,9 +138,17 @@ void task_python::map(std::string& line, TaskOutput& out, TaskOptions& options) } pFunc = PyObject_GetAttrString(python_module, "map" ) ; - PyObject_CallFunction(pFunc, (char*)"soo", const_cast(line.c_str()), - pOutput, pOptions); + if (pFunc == NULL) { + ERROR("Could not load python map function"); + } + + char format[] = "sOO"; + if (NULL == PyObject_CallFunction(pFunc, format, const_cast(line.c_str()), + pOutput, pOptions)) { + ERROR("Cannot execute python map function"); + PyErr_Print(); + } if (pOutput == NULL) { ERROR("Python map did not return anything :( "); @@ -166,17 +175,31 @@ void task_python::reduce(std::string& key, vec_str& values, TaskOutput& out) { PyObject *pKey = NULL, *pValue = NULL, *pFunc = NULL; Py_ssize_t pos = 0; + //PyGILState_STATE gstate; + //gstate = PyGILState_Ensure(); PyObject* pOutput = PyDict_New(); - PyObject* pInput = PyList_New(values.size()); + PyObject* pInput = PyList_New(0); for (auto& value : values) { - PyObject* pValue = PyString_FromString(value.c_str()); - PyList_Append(pInput, pValue); + PyObject* pString = PyString_FromString(value.c_str()); + PyList_Append(pInput, pString); + Py_DECREF(pString); } pFunc = PyObject_GetAttrString(python_module, "reduce"); - PyObject_CallFunction(pFunc, (char*)"soo", const_cast(key.c_str()), - pInput, pOutput); + if (pFunc == NULL) { + ERROR("Could not load python reduce function"); + } + + char format[] = "sOO"; + if (NULL == PyObject_CallFunction(pFunc, format, const_cast(key.c_str()), + pInput, pOutput)) { + + ERROR("Cannot execute python reduce function"); + PyErr_Print(); + } + Py_XDECREF(pInput); + Py_XDECREF(pFunc); if (pOutput == NULL) { @@ -186,15 +209,11 @@ void task_python::reduce(std::string& key, vec_str& values, TaskOutput& out) { // Save dictionary values as a PyObject pointer while (PyDict_Next(pOutput, &pos, &pKey, &pValue)) { string k = PyString_AsString(pKey); - - for (Py_ssize_t i = 0; i < PyList_Size(pValue); i++) { - PyObject* item = PyList_GetItem(pValue, i); - string v = PyString_AsString(item); - out.insert(k, v); - } + string v = PyString_AsString(pValue); + out.insert(k, v); } Py_DECREF(pOutput); - Py_DECREF(pInput); + //PyGILState_Release(gstate); } // }}} diff --git a/src/targets/veloxmr_mapreduce.cc b/src/targets/veloxmr_mapreduce.cc index 11fd5f6..9e1c9a4 100644 --- a/src/targets/veloxmr_mapreduce.cc +++ b/src/targets/veloxmr_mapreduce.cc @@ -28,24 +28,27 @@ int main(int argc, char** argv) { dataset A = mr.make_dataset({argv[1]}); + string pmap_src = ""; + string amap_src = ""; string pmap_func = argv[5]; if (pmap_func != "NULL") { std::replace(pmap_func.begin(), pmap_func.end(), '|', ' '); std::replace(pmap_func.begin(), pmap_func.end(), '~', '\n'); - // pre-map + pmap_src = pmap_func; } - A.pymap(map_func); - string amap_func = argv[6]; if (amap_func != "NULL") { std::replace(amap_func.begin(), amap_func.end(), '|', ' '); std::replace(amap_func.begin(), amap_func.end(), '~', '\n'); - // after-remap + amap_src = amap_func; } + A.pymap(map_func, pmap_src, amap_src); + + A.pyreduce(red_func, argv[4]); return EXIT_SUCCESS; From 93e28c6597e7037ad129033d541cf5095399162c Mon Sep 17 00:00:00 2001 From: Vicente Adolfo Bolea Sanchez Date: Tue, 23 May 2017 17:22:57 +0900 Subject: [PATCH 6/8] Fixed memory leak in directory class Basically when two or more threads try to access the sqlite database, one of them gets SQLITE_BUSY or SQLITE_LOCKED. In such case I added a timed loop to keep trying. Before this commit those states where ignored. Thus, failed sqlite access lead to many unitialized values all around VeloxMR. --- src/fileleader/directory.cc | 28 ++++++++++++++++++---------- src/mapreduce/fs/directorymr.cc | 2 +- src/mapreduce/messages/igroupinfo.hh | 8 ++++---- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/fileleader/directory.cc b/src/fileleader/directory.cc index 301565a..8b371c3 100644 --- a/src/fileleader/directory.cc +++ b/src/fileleader/directory.cc @@ -106,17 +106,25 @@ Directory::Directory() { // query_exec_simple {{{ bool Directory::query_exec_simple(char* query, int (*fn)(void*,int,char**,char**) = NULL, void* argv = NULL) { char *zErrMsg = nullptr; + sqlite3* db = nullptr; + + int rc = SQLITE_OK; + do { + db = open(path); + rc = sqlite3_exec(db, query, fn, argv, &zErrMsg); + if (rc != SQLITE_OK) { + ERROR("SQL error: %s error_code=%d", zErrMsg, rc); + sqlite3_free(zErrMsg); + zErrMsg = nullptr; + if (rc == SQLITE_LOCKED or rc == SQLITE_BUSY) { + INFO("SQLITE locked, retrying..."); + sleep(1); // Try again + } + else + break; + } + } while (SQLITE_OK != rc); - sqlite3* db = open(path); - int rc; - while (SQLITE_OK != (rc = sqlite3_exec(db, query, fn, argv, &zErrMsg))) { - ERROR("SQL error: %s", zErrMsg); - sqlite3_free(zErrMsg); - if (rc == SQLITE_LOCKED) - sleep(1); - else - break; - } sqlite3_close(db); return rc; diff --git a/src/mapreduce/fs/directorymr.cc b/src/mapreduce/fs/directorymr.cc index 572bf18..68f0a74 100644 --- a/src/mapreduce/fs/directorymr.cc +++ b/src/mapreduce/fs/directorymr.cc @@ -155,7 +155,7 @@ void DirectoryMR::select_igroup_metadata(uint32_t job_id, uint32_t map_id, uint32_t reducer_id, IGroupInfo *igroup_info) { char sql[DEFAULT_QUERY_SIZE]; - sprintf(sql, "SELECT * from igroup_table where job_id=%" PRIu32 " and \ + snprintf(sql, DEFAULT_QUERY_SIZE, "SELECT * from igroup_table where job_id=%" PRIu32 " and \ map_id=%" PRIu32 " and reducer_id=%" PRIu32 ";", job_id, map_id, reducer_id); diff --git a/src/mapreduce/messages/igroupinfo.hh b/src/mapreduce/messages/igroupinfo.hh index e5fdd6a..6011580 100644 --- a/src/mapreduce/messages/igroupinfo.hh +++ b/src/mapreduce/messages/igroupinfo.hh @@ -10,10 +10,10 @@ struct IGroupInfo: public Message { std::string get_type() const override; - uint32_t job_id; - uint32_t map_id; - uint32_t reducer_id; - uint32_t num_block; + uint32_t job_id = 0; + uint32_t map_id = 0; + uint32_t reducer_id = 0; + uint32_t num_block = 0; }; } From 0be5e49a32372ebaf819d58e24d40eb4329d1f6a Mon Sep 17 00:00:00 2001 From: deukyeon Date: Sun, 28 May 2017 15:01:47 +0900 Subject: [PATCH 7/8] changed dfs to veloxdfs and pre_map to before_map --- Makefile.am | 6 +++--- src/client/veloxmr_lib.py | 13 ++++++------- src/client/vmr.cc | 2 +- src/mapreduce/executor.cc | 6 +++--- src/mapreduce/messages/boost_impl.cc | 4 ++-- src/mapreduce/messages/job.hh | 2 +- src/mapreduce/messages/task.hh | 2 +- src/mapreduce/task_cxx.cc | 4 ++-- src/mapreduce/task_cxx.hh | 2 +- src/mapreduce/task_executor.cc | 2 ++ src/mapreduce/task_handler.hh | 2 +- src/mapreduce/task_python.cc | 20 ++++++++++---------- src/mapreduce/task_python.hh | 6 +++--- src/targets/veloxmr_mapreduce.cc | 1 - 14 files changed, 36 insertions(+), 36 deletions(-) diff --git a/Makefile.am b/Makefile.am index 709b27c..22667f9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,7 +3,7 @@ include tests/Makefile.am AM_CPPFLAGS = $(CPPFLAGS) -I@srcdir@/src/common -I@srcdir@/src -include ./config.h $(BOOST_CPPFLAGS) AM_CXXFLAGS = $(CXXFLAGS) -Wall -bin_PROGRAMS = eclipse_node dfs __velox_mapreduce +bin_PROGRAMS = eclipse_node veloxdfs __velox_mapreduce bin_SCRIPTS = src/client/veloxmr src/client/veloxmr_lib.py messages_files = src/messages/boundaries.cc \ @@ -108,8 +108,8 @@ eclipse_node_SOURCES = src/targets/node_main_mr.cc \ eclipse_node_LDADD = libvmr.la $(LDADD) -lpython2.7 -dfs_SOURCES = src/targets/client.cc \ - src/client/cli_driver.cc +veloxdfs_SOURCES = src/targets/client.cc \ + src/client/cli_driver.cc __velox_mapreduce_SOURCES = src/targets/veloxmr_mapreduce.cc __velox_mapreduce_LDADD = libvmr.la $(LDADD) libvdfs.la diff --git a/src/client/veloxmr_lib.py b/src/client/veloxmr_lib.py index 27a50f3..fe8f3fd 100644 --- a/src/client/veloxmr_lib.py +++ b/src/client/veloxmr_lib.py @@ -7,22 +7,22 @@ sys.ps1 ='VeloxMR>' def put(file_name): - call(["dfs", "put", file_name]) + call(["veloxdfs", "put", file_name]) def rm(file_name): - call(["dfs", "rm", file_name]) + call(["veloxdfs", "rm", file_name]) def cat(file_name): - call(["dfs", "cat", file_name]) + call(["veloxdfs", "cat", file_name]) def ls(): - call(["dfs", "ls"]) + call(["veloxdfs", "ls"]) def format(): - call(["dfs", "format"]) + call(["veloxdfs", "format"]) def show(file_name): - call(["dfs", "show", file_name]) + call(["veloxdfs", "show", file_name]) def mapreduce(file_name, mapf, redf, output, pmap = "", amap = ""): map_src = inspect.getsource(mapf) @@ -45,5 +45,4 @@ def mapreduce(file_name, mapf, redf, output, pmap = "", amap = ""): amap_src.replace(' ', '|') amap_src.replace('\n', '~') - call(["__velox_mapreduce", file_name, map_src, red_src, output, pmap_src, amap_src]) diff --git a/src/client/vmr.cc b/src/client/vmr.cc index 8176b4c..33a8d93 100644 --- a/src/client/vmr.cc +++ b/src/client/vmr.cc @@ -103,7 +103,7 @@ void dataset::pymap(std::string func, std::string pmap = "", std::string amap = job.files = files; job.job_id = job_id; job.func_body = func; - job.pre_map = pmap; + job.before_map = pmap; job.after_map = amap; send_message(&socket, &job); diff --git a/src/mapreduce/executor.cc b/src/mapreduce/executor.cc index ccc4509..d174738 100644 --- a/src/mapreduce/executor.cc +++ b/src/mapreduce/executor.cc @@ -45,13 +45,13 @@ bool Executor::run_map (messages::Task* m) { if (m->lang == "C++") { task_execution = new task_cxx(m->library, m->func_name); } else { - task_execution = new task_python(m->func_body, m->pre_map, m->after_map); + task_execution = new task_python(m->func_body, m->before_map, m->after_map); } task_execution->setup(true); INFO("Launching mapper with %i threads", m->blocks.size()); - task_execution->pre_map(options); + task_execution->before_map(options); try { for (size_t map_id = 0; map_id < m->blocks.size(); map_id++) { @@ -187,7 +187,7 @@ bool Executor::run_reduce (messages::Task* task) { if (task->lang == "C++") { task_execution = new task_cxx(task->library, task->func_name); } else { - task_execution = new task_python(task->func_body, task->pre_map, task->after_map); + task_execution = new task_python(task->func_body, task->before_map, task->after_map); } task_execution->setup(false); diff --git a/src/mapreduce/messages/boost_impl.cc b/src/mapreduce/messages/boost_impl.cc index 5473658..5e26e05 100644 --- a/src/mapreduce/messages/boost_impl.cc +++ b/src/mapreduce/messages/boost_impl.cc @@ -30,7 +30,7 @@ template ar & BOOST_SERIALIZATION_NVP(c.leader); ar & BOOST_SERIALIZATION_NVP(c.func_body); ar & BOOST_SERIALIZATION_NVP(c.lang); - ar & BOOST_SERIALIZATION_NVP(c.pre_map); + ar & BOOST_SERIALIZATION_NVP(c.before_map); ar & BOOST_SERIALIZATION_NVP(c.after_map); } @@ -148,7 +148,7 @@ template ar & BOOST_SERIALIZATION_NVP(c.file_output); ar & BOOST_SERIALIZATION_NVP(c.func_body); ar & BOOST_SERIALIZATION_NVP(c.lang); - ar & BOOST_SERIALIZATION_NVP(c.pre_map); + ar & BOOST_SERIALIZATION_NVP(c.before_map); ar & BOOST_SERIALIZATION_NVP(c.after_map); } diff --git a/src/mapreduce/messages/job.hh b/src/mapreduce/messages/job.hh index 84a0eb3..5dbe7e5 100644 --- a/src/mapreduce/messages/job.hh +++ b/src/mapreduce/messages/job.hh @@ -17,7 +17,7 @@ struct Job: public Message { std::string reduce_name; std::string file_output; std::vector files; - std::string func_body, pre_map, after_map; + std::string func_body, before_map, after_map; std::string lang; }; diff --git a/src/mapreduce/messages/task.hh b/src/mapreduce/messages/task.hh index 48e9de9..e8f440f 100644 --- a/src/mapreduce/messages/task.hh +++ b/src/mapreduce/messages/task.hh @@ -15,7 +15,7 @@ struct Task: public Message { std::string lang; std::string library, func_name, input_path; std::string file_output; - std::string func_body, pre_map, after_map; + std::string func_body, before_map, after_map; std::vector> blocks; uint32_t subjob_id = 0; uint32_t job_id = 0; diff --git a/src/mapreduce/task_cxx.cc b/src/mapreduce/task_cxx.cc index 87e3231..3c72b18 100644 --- a/src/mapreduce/task_cxx.cc +++ b/src/mapreduce/task_cxx.cc @@ -44,8 +44,8 @@ void task_cxx::setup(bool is_map) { } } // }}} -// pre_map {{{ -void task_cxx::pre_map(TaskOptions& options) { +// before_map {{{ +void task_cxx::before_map(TaskOptions& options) { if(before_map_f != nullptr) before_map_f(options); } diff --git a/src/mapreduce/task_cxx.hh b/src/mapreduce/task_cxx.hh index 0272429..eabb641 100644 --- a/src/mapreduce/task_cxx.hh +++ b/src/mapreduce/task_cxx.hh @@ -11,7 +11,7 @@ class task_cxx : public task_handler { ~task_cxx(); void setup(bool is_map) override; - void pre_map(TaskOptions&) override; + void before_map(TaskOptions&) override; void after_map(TaskOptions&) override; void map(std::string&, TaskOutput&, TaskOptions&) override; void reduce(std::string&, vec_str&, TaskOutput&) override; diff --git a/src/mapreduce/task_executor.cc b/src/mapreduce/task_executor.cc index 8d83205..e95ac24 100644 --- a/src/mapreduce/task_executor.cc +++ b/src/mapreduce/task_executor.cc @@ -81,6 +81,8 @@ void TaskExecutor::job_accept(messages::Job* m, std::function fn) { task.func_body = m->func_body; task.lang = m->lang; task.blocks = task_stub.second; + task.before_map = m->before_map; + task.after_map = m->after_map; network->send(task_stub.first, &task); } } else if (m->type == "REDUCE") { diff --git a/src/mapreduce/task_handler.hh b/src/mapreduce/task_handler.hh index ea3db51..825a63e 100644 --- a/src/mapreduce/task_handler.hh +++ b/src/mapreduce/task_handler.hh @@ -17,7 +17,7 @@ class task_handler { virtual ~task_handler() = default; virtual void setup(bool is_map) = 0; - virtual void pre_map(TaskOptions&) = 0; + virtual void before_map(TaskOptions&) = 0; virtual void after_map(TaskOptions&) = 0; virtual void map(std::string&, TaskOutput&, TaskOptions&) = 0; virtual void reduce(std::string&, vec_str&, TaskOutput&) = 0; diff --git a/src/mapreduce/task_python.cc b/src/mapreduce/task_python.cc index 068d0b4..bda71a8 100644 --- a/src/mapreduce/task_python.cc +++ b/src/mapreduce/task_python.cc @@ -11,10 +11,10 @@ using namespace std; // Constructor {{{ task_python::task_python(std::string function_body, - std::string pre_map = "", + std::string before_map = "", std::string after_map = "") : function_body_f(function_body), - pre_map_f(pre_map), + before_map_f(before_map), after_map_f(after_map) { } @@ -37,8 +37,8 @@ void task_python::setup(bool is_map) { pModule = PyImport_ExecCodeModule(module_name, pCompiledFn); - if (pre_map_f != "") { - pCompiledFn = Py_CompileString(pre_map_f.c_str(), "", Py_file_input); + if (before_map_f != "") { + pCompiledFn = Py_CompileString(before_map_f.c_str(), "", Py_file_input); if (pCompiledFn == NULL) ERROR("[PY interpreter] I am not able to parse your function"); @@ -61,9 +61,9 @@ void task_python::setup(bool is_map) { python_module = pModule; } // }}} -// pre_map {{{ -void task_python::pre_map(TaskOptions& options) { - if (pre_map_f == "") { +// before_map {{{ +void task_python::before_map(TaskOptions& options) { + if (before_map_f == "") { INFO("Skipping pre-map function"); return; } @@ -72,8 +72,8 @@ void task_python::pre_map(TaskOptions& options) { Py_ssize_t pos = 0; PyObject* pOptions = PyDict_New(); - pFunc = PyObject_GetAttrString(python_module, "pre_map") ; - PyObject_CallFunctionObjArgs(pFunc, pOptions); + pFunc = PyObject_GetAttrString(python_module, "before_map") ; + PyObject_CallFunctionObjArgs(pFunc, pOptions, NULL); if (pOptions == NULL) { ERROR("Python map did not return anything :( "); @@ -109,7 +109,7 @@ void task_python::after_map(TaskOptions& options) { } pFunc = PyObject_GetAttrString(python_module, "after_map" ) ; - PyObject_CallFunctionObjArgs(pFunc, pOptions); + PyObject_CallFunctionObjArgs(pFunc, pOptions, NULL); if (pOptions == NULL) { ERROR("Python map did not return anything :( "); diff --git a/src/mapreduce/task_python.hh b/src/mapreduce/task_python.hh index 98a0e29..e567da8 100644 --- a/src/mapreduce/task_python.hh +++ b/src/mapreduce/task_python.hh @@ -9,18 +9,18 @@ namespace eclipse { class task_python : public task_handler { public: - task_python(std::string function_body, std::string pre_map, std::string after_map); + task_python(std::string function_body, std::string before_map, std::string after_map); ~task_python(); void setup(bool is_map) override; - void pre_map(TaskOptions&) override; + void before_map(TaskOptions&) override; void after_map(TaskOptions&) override; void map(std::string&, TaskOutput&, TaskOptions&) override; void reduce(std::string&, vec_str&, TaskOutput&) override; private: std::string function_body_f; - std::string pre_map_f; + std::string before_map_f; std::string after_map_f; PyObject* python_module; diff --git a/src/targets/veloxmr_mapreduce.cc b/src/targets/veloxmr_mapreduce.cc index 9e1c9a4..a72065c 100644 --- a/src/targets/veloxmr_mapreduce.cc +++ b/src/targets/veloxmr_mapreduce.cc @@ -48,7 +48,6 @@ int main(int argc, char** argv) { A.pymap(map_func, pmap_src, amap_src); - A.pyreduce(red_func, argv[4]); return EXIT_SUCCESS; From bbb1e0b67b0b22a152f53cc3f88506a8c734562f Mon Sep 17 00:00:00 2001 From: Vicente Adolfo Bolea Sanchez Date: Mon, 9 Oct 2017 18:01:22 +0900 Subject: [PATCH 8/8] Fixed segfault in destructor of dl_loader class --- src/common/dl_loader.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/dl_loader.cc b/src/common/dl_loader.cc index 9e0f320..d2ad272 100644 --- a/src/common/dl_loader.cc +++ b/src/common/dl_loader.cc @@ -8,7 +8,7 @@ using namespace std; DL_loader::DL_loader(string lib): lib_name(lib){ } DL_loader::~DL_loader() { - if(!lib) + if(lib) close(); } // }}}