Skip to content

Commit

Permalink
#950: refactor options line parser v2 (#448)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomuben authored Sep 20, 2024
1 parent 60c5aef commit 3233961
Show file tree
Hide file tree
Showing 21 changed files with 492 additions and 212 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/check_bazel_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ jobs:
runs-on: ubuntu-latest

env:
USE_BAZEL_VERSION=7.2.1

USE_BAZEL_VERSION: 7.2.1
steps:
- uses: actions/checkout@v4

Expand Down
2 changes: 1 addition & 1 deletion exaudfclient/base/exaudflib/impl/swig/swig_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

extern "C" {

SWIGVMContainers::SWIGMetadata* create_SWIGMetaData() {
SWIGVMContainers::SWIGMetadataIf* create_SWIGMetaData() {
return new SWIGVMContainers::SWIGMetadata_Impl();
}

Expand Down
3 changes: 1 addition & 2 deletions exaudfclient/base/exaudflib/impl/swig/swig_meta_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@

namespace SWIGVMContainers {

class SWIGMetadata_Impl : public SWIGMetadata {
class SWIGMetadata_Impl : public SWIGMetadataIf {
public:
SWIGMetadata_Impl():
SWIGMetadata(false),
m_connection_id(exaudflib::global.SWIGVM_params_ref->connection_id),
m_socket(*(exaudflib::global.sock)),
m_exch(&exaudflib::global.exchandler),
Expand Down
53 changes: 50 additions & 3 deletions exaudfclient/base/exaudflib/swig/swig_meta_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,56 @@

namespace SWIGVMContainers {

class SWIGMetadata {
SWIGMetadata* impl=nullptr;
typedef SWIGVMContainers::SWIGMetadata* (*CREATE_METADATA_FUN)();
struct SWIGMetadataIf {

virtual ~SWIGMetadataIf() {};
virtual const char* databaseName() = 0;
virtual const char* databaseVersion() = 0;
virtual const char* scriptName() = 0;
virtual const char* scriptSchema() = 0;
virtual const char* currentUser() = 0;
virtual const char* scopeUser() = 0;
virtual const char* currentSchema() = 0;
virtual const char* scriptCode() = 0;
virtual const unsigned long long sessionID() = 0;
virtual const char *sessionID_S() = 0;
virtual const unsigned long statementID() = 0;
virtual const unsigned int nodeCount() = 0;
virtual const unsigned int nodeID() = 0;
virtual const unsigned long long vmID() = 0;
virtual const unsigned long long memoryLimit() = 0;
virtual const VMTYPE vmType() = 0;
virtual const char *vmID_S() = 0;
virtual const ExecutionGraph::ConnectionInformationWrapper* connectionInformation(const char* connection_name) = 0;
virtual const char* moduleContent(const char* name) = 0;
virtual const unsigned int inputColumnCount() = 0;
virtual const char *inputColumnName(unsigned int col) = 0;
virtual const SWIGVM_datatype_e inputColumnType(unsigned int col) = 0;
virtual const char *inputColumnTypeName(unsigned int col) = 0;
virtual const unsigned int inputColumnSize(unsigned int col) = 0;
virtual const unsigned int inputColumnPrecision(unsigned int col) = 0;
virtual const unsigned int inputColumnScale(unsigned int col) = 0;
virtual const SWIGVM_itertype_e inputType() = 0;
virtual const unsigned int outputColumnCount() = 0;
virtual const char *outputColumnName(unsigned int col) = 0;
virtual const SWIGVM_datatype_e outputColumnType(unsigned int col) = 0;
virtual const char *outputColumnTypeName(unsigned int col) = 0;
virtual const unsigned int outputColumnSize(unsigned int col) = 0;
virtual const unsigned int outputColumnPrecision(unsigned int col) = 0;
virtual const unsigned int outputColumnScale(unsigned int col) = 0;
virtual const SWIGVM_itertype_e outputType() = 0;
virtual const bool isEmittedColumn(unsigned int col) = 0;
virtual const char* checkException() = 0;
virtual const char* pluginLanguageName() = 0;
virtual const char* pluginURI() = 0;
virtual const char* outputAddress() = 0;
};



class SWIGMetadata : public SWIGMetadataIf {
SWIGMetadataIf* impl=nullptr;
typedef SWIGVMContainers::SWIGMetadataIf* (*CREATE_METADATA_FUN)();
public:
SWIGMetadata()
{
Expand Down
2 changes: 1 addition & 1 deletion exaudfclient/base/exaudflib/udf_plugin_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class SWIGTableIterator;
}

extern "C" {
SWIGVMContainers::SWIGMetadata* create_SWIGMetaData();
SWIGVMContainers::SWIGMetadataIf* create_SWIGMetaData();
SWIGVMContainers::AbstractSWIGTableIterator* create_SWIGTableIterator();
SWIGVMContainers::SWIGRAbstractResultHandler* create_SWIGResultHandler(SWIGVMContainers::SWIGTableIterator* table_iterator);
}
Expand Down
2 changes: 1 addition & 1 deletion exaudfclient/base/javacontainer/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ cc_library(
name = "javacontainer",
srcs = [":javacontainer.cc", ":javacontainer.h", ":javacontainer_impl.cc", ":javacontainer_impl.h", ":dummy"],
hdrs = [":filter_swig_code_exascript_java_h", "exascript_java_jni_decl.h"],
deps = ["@ssl//:ssl","@java//:java", ":exascript_java", "//base/exaudflib:header", "//base:debug_message_h","//base/javacontainer/script_options:java_scriptoptionlines"],
deps = ["@ssl//:ssl","@java//:java", ":exascript_java", "//base/exaudflib:header", "//base:debug_message_h","//base/javacontainer/script_options:java_script_option_lines"],
# copts= ["-O0","-fno-lto"],
alwayslink=True,
)
Expand Down
18 changes: 9 additions & 9 deletions exaudfclient/base/javacontainer/javacontainer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
#include "base/debug_message.h"
#include "base/javacontainer/javacontainer.h"
#include "base/javacontainer/javacontainer_impl.h"
#include "base/javacontainer/script_options/converter.h"
#include "base/javacontainer/script_options/extractor.h"
#include "base/javacontainer/script_options/parser_legacy.h"


using namespace SWIGVMContainers;
Expand All @@ -23,18 +24,17 @@ JavaVMImpl::JavaVMImpl(bool checkOnly, bool noJNI): m_checkOnly(checkOnly), m_ex
stringstream ss;
m_exaJavaPath = "/exaudf/base/javacontainer"; // TODO hardcoded path

JavaScriptOptions::ScriptOptionLinesParserLegacy scriptOptionsParser;
JavaScriptOptions::Extractor extractor(scriptOptionsParser,
[&](const std::string &msg){throwException(msg);});

DBG_FUNC_CALL(cerr,extractor.extract(m_scriptCode)); // To be called before scripts are imported. Otherwise, the script classname from an imported script could be used

JavaScriptOptions::ScriptOptionsConverter optionsConverter([&](const std::string &msg){throwException(msg);},
m_jvmOptions);

DBG_FUNC_CALL(cerr,optionsConverter.getScriptClassName(m_scriptCode)); // To be called before scripts are imported. Otherwise, the script classname from an imported script could be used
DBG_FUNC_CALL(cerr,optionsConverter.convertImportScripts(m_scriptCode));
DBG_FUNC_CALL(cerr,optionsConverter.getExternalJvmOptions(m_scriptCode));
DBG_FUNC_CALL(cerr,setClasspath());
DBG_FUNC_CALL(cerr,optionsConverter.getExternalJarPaths(m_scriptCode));

for (set<string>::iterator it = optionsConverter.getJarPaths().begin(); it != optionsConverter.getJarPaths().end();
m_jvmOptions = std::move(extractor.moveJvmOptions());

for (set<string>::iterator it = extractor.getJarPaths().begin(); it != extractor.getJarPaths().end();
++it) {
addJarToClasspath(*it);
}
Expand Down
8 changes: 5 additions & 3 deletions exaudfclient/base/javacontainer/script_options/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package(default_visibility = ["//visibility:public"])

cc_library(
name = "java_script_option_lines",
hdrs = [":converter.h"],
srcs = [":parser.h", ":converter.h", ":converter.cc", ":parser_legacy.h", ":parser_legacy.cc"],
deps = ["//base/script_options_parser:script_option_lines_parser", "//base/exaudflib:header", "//base/exaudflib:exaudflib-deps"],
hdrs = [":extractor.h", ":parser_legacy.h"],
srcs = [":parser.h", ":converter.h", ":converter.cc", ":parser_legacy.cc", ":extractor.cc",
":keywords.h", ":checksum.h", ":checksum.cc"],
deps = ["//base/script_options_parser:script_option_lines_parser", "//base:debug_message_h",
"//base/exaudflib:header", "//base/exaudflib:exaudflib-deps"],
)
26 changes: 26 additions & 0 deletions exaudfclient/base/javacontainer/script_options/checksum.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include "base/javacontainer/script_options/checksum.h"
#include <openssl/md5.h>
#include <string.h>

namespace SWIGVMContainers {

namespace JavaScriptOptions {

inline std::vector<unsigned char> scriptToMd5(const char *script) {
MD5_CTX ctx;
unsigned char md5[MD5_DIGEST_LENGTH];
MD5_Init(&ctx);
MD5_Update(&ctx, script, strlen(script));
MD5_Final(md5, &ctx);
return std::vector<unsigned char>(md5, md5 + sizeof(md5));
}


bool Checksum::addScript(const char *script) {
return m_importedScriptChecksums.insert(scriptToMd5(script)).second;
}


} //namespace JavaScriptOptions

} //namespace SWIGVMContainers
29 changes: 29 additions & 0 deletions exaudfclient/base/javacontainer/script_options/checksum.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef SCRIPTOPTIONLINEPARSERCHECKSUM_H
#define SCRIPTOPTIONLINEPARSERCHECKSUM_H 1

#include <string>
#include <vector>
#include <set>


namespace SWIGVMContainers {

namespace JavaScriptOptions {

class Checksum {

public:
Checksum() = default;

bool addScript(const char *script);

private:
std::set<std::vector<unsigned char> > m_importedScriptChecksums;
};


} //namespace JavaScriptOptions

} //namespace SWIGVMContainers

#endif //SCRIPTOPTIONLINEPARSERCHECKSUM_H
133 changes: 32 additions & 101 deletions exaudfclient/base/javacontainer/script_options/converter.cc
Original file line number Diff line number Diff line change
@@ -1,123 +1,54 @@
#include "base/javacontainer/script_options/converter.h"
#include "base/javacontainer/script_options/parser_legacy.h"
#include "base/exaudflib/swig/swig_meta_data.h"
#include <openssl/md5.h>
#include <string.h>


#include <iostream>

namespace SWIGVMContainers {

namespace JavaScriptOptions {

inline std::vector<unsigned char> scriptToMd5(const char *script) {
MD5_CTX ctx;
unsigned char md5[MD5_DIGEST_LENGTH];
MD5_Init(&ctx);
MD5_Update(&ctx, script, strlen(script));
MD5_Final(md5, &ctx);
return std::vector<unsigned char>(md5, md5 + sizeof(md5));
}


ScriptOptionsConverter::ScriptOptionsConverter(std::function<void(const std::string&)> throwException,
std::vector<std::string>& jvmOptions):
m_scriptOptionsParser(std::make_unique<ScriptOptionLinesParserLegacy>()),
m_throwException(throwException),
m_jvmOptions(jvmOptions),
m_jarPaths()
{}

void ScriptOptionsConverter::getExternalJarPaths(std::string & src_scriptCode) {
std::vector<std::string> jarPaths;
m_scriptOptionsParser->findExternalJarPaths(src_scriptCode, jarPaths,
[&](const std::string& msg){m_throwException("F-UDF-CL-SL-JAVA-1600" + msg);});
for (const std::string& jarPath : jarPaths) {
for (size_t start = 0, delim = 0; ; start = delim + 1) {
delim = jarPath.find(":", start);
if (delim != std::string::npos) {
std::string jar = jarPath.substr(start, delim - start);
if (m_jarPaths.find(jar) == m_jarPaths.end())
m_jarPaths.insert(jar);
}
else {
std::string jar = jarPath.substr(start);
if (m_jarPaths.find(jar) == m_jarPaths.end())
m_jarPaths.insert(jar);
break;
}
Converter::Converter()
: m_jvmOptions()
, m_jarPaths()
, m_whitespace(" \t\f\v") {}

void Converter::convertExternalJar(const std::string & value) {
for (size_t start = 0, delim = 0; ; start = delim + 1) {
delim = value.find(":", start);
if (delim != std::string::npos) {
std::string jar = value.substr(start, delim - start);
if (m_jarPaths.find(jar) == m_jarPaths.end())
m_jarPaths.insert(jar);
}
else {
std::string jar = value.substr(start);
if (m_jarPaths.find(jar) == m_jarPaths.end())
m_jarPaths.insert(jar);
break;
}
}
}

void ScriptOptionsConverter::getScriptClassName(std::string & src_scriptCode) {
std::string scriptClass;

m_scriptOptionsParser->getScriptClassName(src_scriptCode, scriptClass,
[&](const std::string& msg){m_throwException("F-UDF-CL-SL-JAVA-1601: " + msg);});

if (scriptClass != "") {
m_jvmOptions.push_back("-Dexasol.scriptclass=" + scriptClass);
}
}

void ScriptOptionsConverter::getExternalJvmOptions(std::string & src_scriptCode) {
std::vector<std::string> jvmOptions;
const std::string whitespace = " \t\f\v";
m_scriptOptionsParser->getExternalJvmOptions(src_scriptCode, jvmOptions,
[&](const std::string& msg){m_throwException("F-UDF-CL-SL-JAVA-1602" + msg);});

for (const std::string& jvmOption: jvmOptions) {
for (size_t start = 0, delim = 0; ; start = delim + 1) {
start = jvmOption.find_first_not_of(whitespace, start);
if (start == std::string::npos)
break;
delim = jvmOption.find_first_of(whitespace, start);
if (delim != std::string::npos) {
m_jvmOptions.push_back(jvmOption.substr(start, delim - start));
}
else {
m_jvmOptions.push_back(jvmOption.substr(start));
break;
}
}
void Converter::convertScriptClassName(const std::string & value) {
if (value != "") {
m_jvmOptions.push_back("-Dexasol.scriptclass=" + value);
}
}

void ScriptOptionsConverter::convertImportScripts(std::string & src_scriptCode) {
SWIGMetadata *meta = NULL;
// Attention: We must hash the parent script before modifying it (adding the
// package definition). Otherwise we don't recognize if the script imports its self
m_importedScriptChecksums.insert(scriptToMd5(src_scriptCode.c_str()));
while (true) {
std::pair<std::string, size_t> nextImportStatement;
m_scriptOptionsParser->getNextImportScript(src_scriptCode, nextImportStatement,
[&](const std::string& msg){m_throwException("F-UDF-CL-SL-JAVA-1604" + msg);});
if (nextImportStatement.first == "")
void Converter::convertJvmOption(const std::string & value) {
for (size_t start = 0, delim = 0; ; start = delim + 1) {
start = value.find_first_not_of(m_whitespace, start);
if (start == std::string::npos)
break;
if (!meta) {
meta = new SWIGMetadata();
if (!meta)
m_throwException("F-UDF-CL-SL-JAVA-1603: Failure while importing scripts");
delim = value.find_first_of(m_whitespace, start);
if (delim != std::string::npos) {
m_jvmOptions.push_back(value.substr(start, delim - start));
}
const char *scriptCode = meta->moduleContent(nextImportStatement.first.c_str());
const char *exception = meta->checkException();
if (exception)
m_throwException("F-UDF-CL-SL-JAVA-1605: "+std::string(exception));
if (m_importedScriptChecksums.insert(scriptToMd5(scriptCode)).second) {
// Script has not been imported yet
// If this imported script contains %import statements
// they will be resolved in this while loop.
src_scriptCode.insert(nextImportStatement.second, scriptCode);
else {
m_jvmOptions.push_back(value.substr(start));
break;
}
}
if (meta)
delete meta;
}

const std::set<std::string> & ScriptOptionsConverter::getJarPaths() {
return m_jarPaths;
}

} //namespace JavaScriptOptions

Expand Down
Loading

0 comments on commit 3233961

Please sign in to comment.