Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#950: refactor options line parser v2 #448

Merged
merged 20 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/check_bazel_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ jobs:
runs-on: ubuntu-latest

env:
USE_BAZEL_VERSION=7.2.1

USE_BAZEL_VERSION: 7.2.1
steps:
- uses: actions/checkout@v4

Expand Down
2 changes: 1 addition & 1 deletion exaudfclient/base/exaudflib/impl/swig/swig_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

extern "C" {

SWIGVMContainers::SWIGMetadata* create_SWIGMetaData() {
SWIGVMContainers::SWIGMetadataIf* create_SWIGMetaData() {
return new SWIGVMContainers::SWIGMetadata_Impl();
}

Expand Down
3 changes: 1 addition & 2 deletions exaudfclient/base/exaudflib/impl/swig/swig_meta_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@

namespace SWIGVMContainers {

class SWIGMetadata_Impl : public SWIGMetadata {
class SWIGMetadata_Impl : public SWIGMetadataIf {
public:
SWIGMetadata_Impl():
SWIGMetadata(false),
m_connection_id(exaudflib::global.SWIGVM_params_ref->connection_id),
m_socket(*(exaudflib::global.sock)),
m_exch(&exaudflib::global.exchandler),
Expand Down
53 changes: 50 additions & 3 deletions exaudfclient/base/exaudflib/swig/swig_meta_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,56 @@

namespace SWIGVMContainers {

class SWIGMetadata {
SWIGMetadata* impl=nullptr;
typedef SWIGVMContainers::SWIGMetadata* (*CREATE_METADATA_FUN)();
struct SWIGMetadataIf {

virtual ~SWIGMetadataIf() {};
virtual const char* databaseName() = 0;
virtual const char* databaseVersion() = 0;
virtual const char* scriptName() = 0;
virtual const char* scriptSchema() = 0;
virtual const char* currentUser() = 0;
virtual const char* scopeUser() = 0;
virtual const char* currentSchema() = 0;
virtual const char* scriptCode() = 0;
virtual const unsigned long long sessionID() = 0;
virtual const char *sessionID_S() = 0;
virtual const unsigned long statementID() = 0;
virtual const unsigned int nodeCount() = 0;
virtual const unsigned int nodeID() = 0;
virtual const unsigned long long vmID() = 0;
virtual const unsigned long long memoryLimit() = 0;
virtual const VMTYPE vmType() = 0;
virtual const char *vmID_S() = 0;
virtual const ExecutionGraph::ConnectionInformationWrapper* connectionInformation(const char* connection_name) = 0;
virtual const char* moduleContent(const char* name) = 0;
virtual const unsigned int inputColumnCount() = 0;
virtual const char *inputColumnName(unsigned int col) = 0;
virtual const SWIGVM_datatype_e inputColumnType(unsigned int col) = 0;
virtual const char *inputColumnTypeName(unsigned int col) = 0;
virtual const unsigned int inputColumnSize(unsigned int col) = 0;
virtual const unsigned int inputColumnPrecision(unsigned int col) = 0;
virtual const unsigned int inputColumnScale(unsigned int col) = 0;
virtual const SWIGVM_itertype_e inputType() = 0;
virtual const unsigned int outputColumnCount() = 0;
virtual const char *outputColumnName(unsigned int col) = 0;
virtual const SWIGVM_datatype_e outputColumnType(unsigned int col) = 0;
virtual const char *outputColumnTypeName(unsigned int col) = 0;
virtual const unsigned int outputColumnSize(unsigned int col) = 0;
virtual const unsigned int outputColumnPrecision(unsigned int col) = 0;
virtual const unsigned int outputColumnScale(unsigned int col) = 0;
virtual const SWIGVM_itertype_e outputType() = 0;
virtual const bool isEmittedColumn(unsigned int col) = 0;
virtual const char* checkException() = 0;
virtual const char* pluginLanguageName() = 0;
virtual const char* pluginURI() = 0;
virtual const char* outputAddress() = 0;
};



class SWIGMetadata : public SWIGMetadataIf {
SWIGMetadataIf* impl=nullptr;
typedef SWIGVMContainers::SWIGMetadataIf* (*CREATE_METADATA_FUN)();
public:
SWIGMetadata()
{
Expand Down
2 changes: 1 addition & 1 deletion exaudfclient/base/exaudflib/udf_plugin_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class SWIGTableIterator;
}

extern "C" {
SWIGVMContainers::SWIGMetadata* create_SWIGMetaData();
SWIGVMContainers::SWIGMetadataIf* create_SWIGMetaData();
SWIGVMContainers::AbstractSWIGTableIterator* create_SWIGTableIterator();
SWIGVMContainers::SWIGRAbstractResultHandler* create_SWIGResultHandler(SWIGVMContainers::SWIGTableIterator* table_iterator);
}
Expand Down
2 changes: 1 addition & 1 deletion exaudfclient/base/javacontainer/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ cc_library(
name = "javacontainer",
srcs = [":javacontainer.cc", ":javacontainer.h", ":javacontainer_impl.cc", ":javacontainer_impl.h", ":dummy"],
hdrs = [":filter_swig_code_exascript_java_h", "exascript_java_jni_decl.h"],
deps = ["@ssl//:ssl","@java//:java", ":exascript_java", "//base/exaudflib:header", "//base:debug_message_h","//base/javacontainer/script_options:java_scriptoptionlines"],
deps = ["@ssl//:ssl","@java//:java", ":exascript_java", "//base/exaudflib:header", "//base:debug_message_h","//base/javacontainer/script_options:java_script_option_lines"],
# copts= ["-O0","-fno-lto"],
alwayslink=True,
)
Expand Down
18 changes: 9 additions & 9 deletions exaudfclient/base/javacontainer/javacontainer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
#include "base/debug_message.h"
#include "base/javacontainer/javacontainer.h"
#include "base/javacontainer/javacontainer_impl.h"
#include "base/javacontainer/script_options/converter.h"
#include "base/javacontainer/script_options/extractor.h"
#include "base/javacontainer/script_options/parser_legacy.h"


using namespace SWIGVMContainers;
Expand All @@ -23,18 +24,17 @@ JavaVMImpl::JavaVMImpl(bool checkOnly, bool noJNI): m_checkOnly(checkOnly), m_ex
stringstream ss;
m_exaJavaPath = "/exaudf/base/javacontainer"; // TODO hardcoded path

JavaScriptOptions::ScriptOptionLinesParserLegacy scriptOptionsParser;
JavaScriptOptions::Extractor extractor(scriptOptionsParser,
[&](const std::string &msg){throwException(msg);});

DBG_FUNC_CALL(cerr,extractor.extract(m_scriptCode)); // To be called before scripts are imported. Otherwise, the script classname from an imported script could be used

JavaScriptOptions::ScriptOptionsConverter optionsConverter([&](const std::string &msg){throwException(msg);},
m_jvmOptions);

DBG_FUNC_CALL(cerr,optionsConverter.getScriptClassName(m_scriptCode)); // To be called before scripts are imported. Otherwise, the script classname from an imported script could be used
DBG_FUNC_CALL(cerr,optionsConverter.convertImportScripts(m_scriptCode));
DBG_FUNC_CALL(cerr,optionsConverter.getExternalJvmOptions(m_scriptCode));
DBG_FUNC_CALL(cerr,setClasspath());
DBG_FUNC_CALL(cerr,optionsConverter.getExternalJarPaths(m_scriptCode));

for (set<string>::iterator it = optionsConverter.getJarPaths().begin(); it != optionsConverter.getJarPaths().end();
m_jvmOptions = std::move(extractor.moveJvmOptions());

for (set<string>::iterator it = extractor.getJarPaths().begin(); it != extractor.getJarPaths().end();
++it) {
addJarToClasspath(*it);
}
Expand Down
8 changes: 5 additions & 3 deletions exaudfclient/base/javacontainer/script_options/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package(default_visibility = ["//visibility:public"])

cc_library(
name = "java_script_option_lines",
hdrs = [":converter.h"],
srcs = [":parser.h", ":converter.h", ":converter.cc", ":parser_legacy.h", ":parser_legacy.cc"],
deps = ["//base/script_options_parser:script_option_lines_parser", "//base/exaudflib:header", "//base/exaudflib:exaudflib-deps"],
hdrs = [":extractor.h", ":parser_legacy.h"],
srcs = [":parser.h", ":converter.h", ":converter.cc", ":parser_legacy.cc", ":extractor.cc",
":keywords.h", ":checksum.h", ":checksum.cc"],
deps = ["//base/script_options_parser:script_option_lines_parser", "//base:debug_message_h",
"//base/exaudflib:header", "//base/exaudflib:exaudflib-deps"],
)
26 changes: 26 additions & 0 deletions exaudfclient/base/javacontainer/script_options/checksum.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include "base/javacontainer/script_options/checksum.h"
#include <openssl/md5.h>
#include <string.h>

namespace SWIGVMContainers {

namespace JavaScriptOptions {

inline std::vector<unsigned char> scriptToMd5(const char *script) {
MD5_CTX ctx;
unsigned char md5[MD5_DIGEST_LENGTH];
MD5_Init(&ctx);
MD5_Update(&ctx, script, strlen(script));
MD5_Final(md5, &ctx);
return std::vector<unsigned char>(md5, md5 + sizeof(md5));
}


bool Checksum::addScript(const char *script) {
return m_importedScriptChecksums.insert(scriptToMd5(script)).second;
}


} //namespace JavaScriptOptions

} //namespace SWIGVMContainers
29 changes: 29 additions & 0 deletions exaudfclient/base/javacontainer/script_options/checksum.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef SCRIPTOPTIONLINEPARSERCHECKSUM_H
#define SCRIPTOPTIONLINEPARSERCHECKSUM_H 1

#include <string>
#include <vector>
#include <set>


namespace SWIGVMContainers {

namespace JavaScriptOptions {

class Checksum {

public:
Checksum() = default;

bool addScript(const char *script);

private:
std::set<std::vector<unsigned char> > m_importedScriptChecksums;
};


} //namespace JavaScriptOptions

} //namespace SWIGVMContainers

#endif //SCRIPTOPTIONLINEPARSERCHECKSUM_H
133 changes: 32 additions & 101 deletions exaudfclient/base/javacontainer/script_options/converter.cc
Original file line number Diff line number Diff line change
@@ -1,123 +1,54 @@
#include "base/javacontainer/script_options/converter.h"
#include "base/javacontainer/script_options/parser_legacy.h"
#include "base/exaudflib/swig/swig_meta_data.h"
#include <openssl/md5.h>
#include <string.h>


#include <iostream>

namespace SWIGVMContainers {

namespace JavaScriptOptions {

inline std::vector<unsigned char> scriptToMd5(const char *script) {
MD5_CTX ctx;
unsigned char md5[MD5_DIGEST_LENGTH];
MD5_Init(&ctx);
MD5_Update(&ctx, script, strlen(script));
MD5_Final(md5, &ctx);
return std::vector<unsigned char>(md5, md5 + sizeof(md5));
}


ScriptOptionsConverter::ScriptOptionsConverter(std::function<void(const std::string&)> throwException,
std::vector<std::string>& jvmOptions):
m_scriptOptionsParser(std::make_unique<ScriptOptionLinesParserLegacy>()),
m_throwException(throwException),
m_jvmOptions(jvmOptions),
m_jarPaths()
{}

void ScriptOptionsConverter::getExternalJarPaths(std::string & src_scriptCode) {
std::vector<std::string> jarPaths;
m_scriptOptionsParser->findExternalJarPaths(src_scriptCode, jarPaths,
[&](const std::string& msg){m_throwException("F-UDF-CL-SL-JAVA-1600" + msg);});
for (const std::string& jarPath : jarPaths) {
for (size_t start = 0, delim = 0; ; start = delim + 1) {
delim = jarPath.find(":", start);
if (delim != std::string::npos) {
std::string jar = jarPath.substr(start, delim - start);
if (m_jarPaths.find(jar) == m_jarPaths.end())
m_jarPaths.insert(jar);
}
else {
std::string jar = jarPath.substr(start);
if (m_jarPaths.find(jar) == m_jarPaths.end())
m_jarPaths.insert(jar);
break;
}
Converter::Converter()
: m_jvmOptions()
, m_jarPaths()
, m_whitespace(" \t\f\v") {}

void Converter::convertExternalJar(const std::string & value) {
for (size_t start = 0, delim = 0; ; start = delim + 1) {
delim = value.find(":", start);
if (delim != std::string::npos) {
std::string jar = value.substr(start, delim - start);
if (m_jarPaths.find(jar) == m_jarPaths.end())
m_jarPaths.insert(jar);
}
else {
std::string jar = value.substr(start);
if (m_jarPaths.find(jar) == m_jarPaths.end())
m_jarPaths.insert(jar);
break;
}
}
}

void ScriptOptionsConverter::getScriptClassName(std::string & src_scriptCode) {
std::string scriptClass;

m_scriptOptionsParser->getScriptClassName(src_scriptCode, scriptClass,
[&](const std::string& msg){m_throwException("F-UDF-CL-SL-JAVA-1601: " + msg);});

if (scriptClass != "") {
m_jvmOptions.push_back("-Dexasol.scriptclass=" + scriptClass);
}
}

void ScriptOptionsConverter::getExternalJvmOptions(std::string & src_scriptCode) {
std::vector<std::string> jvmOptions;
const std::string whitespace = " \t\f\v";
m_scriptOptionsParser->getExternalJvmOptions(src_scriptCode, jvmOptions,
[&](const std::string& msg){m_throwException("F-UDF-CL-SL-JAVA-1602" + msg);});

for (const std::string& jvmOption: jvmOptions) {
for (size_t start = 0, delim = 0; ; start = delim + 1) {
start = jvmOption.find_first_not_of(whitespace, start);
if (start == std::string::npos)
break;
delim = jvmOption.find_first_of(whitespace, start);
if (delim != std::string::npos) {
m_jvmOptions.push_back(jvmOption.substr(start, delim - start));
}
else {
m_jvmOptions.push_back(jvmOption.substr(start));
break;
}
}
void Converter::convertScriptClassName(const std::string & value) {
if (value != "") {
m_jvmOptions.push_back("-Dexasol.scriptclass=" + value);
}
}

void ScriptOptionsConverter::convertImportScripts(std::string & src_scriptCode) {
SWIGMetadata *meta = NULL;
// Attention: We must hash the parent script before modifying it (adding the
// package definition). Otherwise we don't recognize if the script imports its self
m_importedScriptChecksums.insert(scriptToMd5(src_scriptCode.c_str()));
while (true) {
std::pair<std::string, size_t> nextImportStatement;
m_scriptOptionsParser->getNextImportScript(src_scriptCode, nextImportStatement,
[&](const std::string& msg){m_throwException("F-UDF-CL-SL-JAVA-1604" + msg);});
if (nextImportStatement.first == "")
void Converter::convertJvmOption(const std::string & value) {
for (size_t start = 0, delim = 0; ; start = delim + 1) {
start = value.find_first_not_of(m_whitespace, start);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, we need to later see, how we deal with white spaces in the jvm option

if (start == std::string::npos)
break;
if (!meta) {
meta = new SWIGMetadata();
if (!meta)
m_throwException("F-UDF-CL-SL-JAVA-1603: Failure while importing scripts");
delim = value.find_first_of(m_whitespace, start);
if (delim != std::string::npos) {
m_jvmOptions.push_back(value.substr(start, delim - start));
}
const char *scriptCode = meta->moduleContent(nextImportStatement.first.c_str());
const char *exception = meta->checkException();
if (exception)
m_throwException("F-UDF-CL-SL-JAVA-1605: "+std::string(exception));
if (m_importedScriptChecksums.insert(scriptToMd5(scriptCode)).second) {
// Script has not been imported yet
// If this imported script contains %import statements
// they will be resolved in this while loop.
src_scriptCode.insert(nextImportStatement.second, scriptCode);
else {
m_jvmOptions.push_back(value.substr(start));
break;
}
}
if (meta)
delete meta;
}

const std::set<std::string> & ScriptOptionsConverter::getJarPaths() {
return m_jarPaths;
}

} //namespace JavaScriptOptions

Expand Down
Loading