Skip to content

Commit

Permalink
feat: add String::SanitizeUTF8 function (#130)
Browse files Browse the repository at this point in the history
feat: add String::SanitizeUTF function
  • Loading branch information
vishwa2710 authored Oct 2, 2023
1 parent 04b5799 commit 54492cd
Show file tree
Hide file tree
Showing 8 changed files with 261 additions and 113 deletions.
14 changes: 14 additions & 0 deletions include/OpenSpaceToolkit/Core/Types/String.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,20 @@ class String : public std::string

static String Replicate(const String& aString, Size aCount);

/// @brief Checks if the string contains any invalid UTF-8 characters
///
/// @param [in] aString A string
/// @return True if valid UTF-8 string

static bool IsValidUTF8(const String& aString);

/// @brief Sanitizes the string by removing any invalid UTF-8 characters
///
/// @param [in] aString A string
/// @return valid UTF-8 string

static String SanitizeUTF8(const String& aString);

/// @brief Create formatted string
///
/// @code
Expand Down
115 changes: 57 additions & 58 deletions share/util/Test.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,79 +2,78 @@

#include <OpenSpaceToolkit/Core/Logger.hpp>


class TestClass
{
public:
TestClass();

public:

TestClass ( ) ;

friend std::ostream& operator << ( std::ostream& anOutputStream,
const TestClass& aTestClass ) ;

private:

int integer_ ;
double double_ ;
friend std::ostream& operator<<(std::ostream& anOutputStream, const TestClass& aTestClass);

} ;
private:
int integer_;
double double_;
};


TestClass::TestClass ( )
: integer_(123),
double_(456.789)
TestClass::TestClass()
: integer_(123),
double_(456.789)
{

}

std::ostream& operator << ( std::ostream& anOutputStream,
const TestClass& aTestClass )
std::ostream& operator<<(std::ostream& anOutputStream, const TestClass& aTestClass)
{
anOutputStream << "Class ▸ " << aTestClass.integer_ << " " << aTestClass.double_;

anOutputStream << "Class ▸ " << aTestClass.integer_ << " " << aTestClass.double_ ;

return anOutputStream ;

return anOutputStream;
}


int main ( )
int main()
{

LOG_SCOPE("Test") ;

using ostk::core::types::Integer ;
using ostk::core::Logger ;
using ostk::core::logger::Severity ;

Logger logger = Logger::Console(Severity::Info) ;

logger << 1 ;
logger << 123.456 ;
logger << 'a' ;
logger << "Hello World!" ;
logger << "Hello" << " " << "World!" ;
logger << TestClass() ;

LOG_TRACE(logger) << TestClass() << " !!!" ;
LOG_DEBUG(logger) << TestClass() << " !!!" ;
LOG_INFO(logger) << TestClass() << " !!!" ;
LOG_WARNING(logger) << TestClass() << " !!!" ;
LOG_ERROR(logger) << TestClass() << " !!!" ;

GLOBAL_LOG_TRACE << "Hello " << " " << "World!" ;
GLOBAL_LOG_DEBUG << "Hello " << " " << "World!" ;
GLOBAL_LOG_INFO << "Hello " << " " << "World!" ;
GLOBAL_LOG_WARNING << "Hello " << " " << "World!" ;
GLOBAL_LOG_ERROR << "Hello " << " " << "World!" ;
GLOBAL_LOG_FATAL << "Hello " << " " << "World!" ;

Integer a = 6 ;
LOG_SCOPE("Test");

using ostk::core::types::Integer;
using ostk::core::Logger;
using ostk::core::logger::Severity;

Logger logger = Logger::Console(Severity::Info);

logger << 1;
logger << 123.456;
logger << 'a';
logger << "Hello World!";
logger << "Hello"
<< " "
<< "World!";
logger << TestClass();

LOG_TRACE(logger) << TestClass() << " !!!";
LOG_DEBUG(logger) << TestClass() << " !!!";
LOG_INFO(logger) << TestClass() << " !!!";
LOG_WARNING(logger) << TestClass() << " !!!";
LOG_ERROR(logger) << TestClass() << " !!!";

GLOBAL_LOG_TRACE << "Hello "
<< " "
<< "World!";
GLOBAL_LOG_DEBUG << "Hello "
<< " "
<< "World!";
GLOBAL_LOG_INFO << "Hello "
<< " "
<< "World!";
GLOBAL_LOG_WARNING << "Hello "
<< " "
<< "World!";
GLOBAL_LOG_ERROR << "Hello "
<< " "
<< "World!";
GLOBAL_LOG_FATAL << "Hello "
<< " "
<< "World!";

Integer a = 6;

if (a < 5)
{

}

}
9 changes: 4 additions & 5 deletions src/OpenSpaceToolkit/Core/Error/RuntimeError.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
/// Apache License 2.0

#include <OpenSpaceToolkit/Core/Error/RuntimeError.hpp>

namespace ostk
Expand All @@ -12,19 +11,19 @@ namespace error
RuntimeError::RuntimeError(const String& aMessage)
: Exception(String::Empty()),
message_(aMessage),
stackTrace_(boost::stacktrace::to_string(boost::stacktrace::stacktrace())),
what_(stackTrace_ + message_)
stackTrace_(String::SanitizeUTF8(boost::stacktrace::to_string(boost::stacktrace::stacktrace()))),
what_(aMessage + "\n" + stackTrace_)
{
}

String RuntimeError::getMessage() const
{
return message_.data();
return message_;
}

String RuntimeError::getStackTrace() const
{
return stackTrace_.data();
return stackTrace_;
}

RuntimeError::~RuntimeError() {}
Expand Down
78 changes: 78 additions & 0 deletions src/OpenSpaceToolkit/Core/Types/String.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,84 @@ String String::Replicate(const String& aString, Size aCount)
return stringStream.str();
}

bool String::IsValidUTF8(const String& string)
{
const Size stringLength = string.getLength();

for (Size i = 0; i < stringLength; i++)
{
Integer c = (unsigned char)string[i];

Size n;

if (0x00 <= c && c <= 0x7f)
{
n = 0; // 0bbbbbbb
}
else if ((c & 0xE0) == 0xC0)
{
n = 1; // 110bbbbb
}
else if (c == 0xed && i < (stringLength - 1) && ((unsigned char)string[i + 1] & 0xa0) == 0xa0)
{
return false; // U+d800 to U+dfff
}
else if ((c & 0xF0) == 0xE0)
{
n = 2; // 1110bbbb
}
else if ((c & 0xF8) == 0xF0)
{
n = 3; // 11110bbb
}
else
{
return false;
}
for (Size j = 0; j < n && i < stringLength; j++)
{
if ((++i == stringLength) || (((unsigned char)string[i] & 0xC0) != 0x80))
{
return false;
}
}
}

return true;
}

String String::SanitizeUTF8(const String& string)
{
String result;

for (Size i = 0; i < string.getLength();)
{
Size len = 1;
bool valid = true;

unsigned char c = static_cast<unsigned char>(string[i]);
if (c >= 0xF0)
len = 4;
else if (c >= 0xE0)
len = 3;
else if (c >= 0xC0)
len = 2;

if (i + len > string.size())
break;

const String sub = string.getSubstring(i, len);
if (!IsValidUTF8(sub))
valid = false;

if (valid)
result += sub;

i += len;
}
return result;
}

} // namespace types
} // namespace core
} // namespace ostk
Loading

0 comments on commit 54492cd

Please sign in to comment.