Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed build on systems with libc6 #113

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ language: c++
sudo: false
compiler:
- gcc
before_install:
- sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 762E3157
install:
- export CXX="g++-4.8" CC="gcc-4.8"
addons:
Expand Down
2 changes: 2 additions & 0 deletions src/FactExtract/Parser/afglrparserlib/regexp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ TRegexMatcher::TRegexMatcher(const TWtringBuf& pattern) {
}

NPire::TLexer lexer(final.begin(), final.end());
lexer.AddFeature(NPire::NFeatures::EnableUnicodeSequences());
lexer.SetEncoding(NPire::NEncodings::Utf8());
NPire::TFsm fsm = lexer.Parse();
if (surround)
Expand All @@ -58,6 +59,7 @@ TRegexMatcher::~TRegexMatcher() {
bool TRegexMatcher::IsCompatible(const TWtringBuf& pattern) {
try {
NPire::TLexer lexer(pattern.begin(), pattern.end());
lexer.AddFeature(NPire::NFeatures::EnableUnicodeSequences());
lexer.Parse();
return true;
} catch (...) {
Expand Down
1 change: 1 addition & 0 deletions src/contrib/libs/pire/pire/extra.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@
#include <contrib/libs/pire/pire/extra/capture.h>
#include <contrib/libs/pire/pire/extra/count.h>
#include <contrib/libs/pire/pire/extra/glyphs.h>
#include <contrib/libs/pire/pire/extra/unicode_support.h>

#endif
125 changes: 125 additions & 0 deletions src/contrib/libs/pire/pire/extra/unicode_support.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* unicode_support.cpp -- implementation for the EnableUnicodeSequences feature.
*
* Copyright (c) 2018 YANDEX LLC
* Author: Andrey Logvin <[email protected]>
*
* This file is part of Pire, the Perl Incompatible
* Regular Expressions library.
*
* Pire is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Pire is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser Public License for more details.
* You should have received a copy of the GNU Lesser Public License
* along with Pire. If not, see <http://www.gnu.org/licenses>.
*/


#include "unicode_support.h"

#include <contrib/libs/pire/pire/re_lexer.h>
//#include <util/stream/output.h>

namespace Pire {

namespace {
bool IsHexDigit(wchar32 ch) {
return ch < 256 && std::isxdigit(ch) != 0;
}

static const long hextable[] = {
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1, 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
};

class EnableUnicodeSequencesImpl : public Feature {
public:
bool Accepts(wchar32 c) const {
return c == (Control | 'x');
}

Term Lex() {
ystring hexStr;
GetChar();
wchar32 ch = PeekChar();
if (ch == '{') {
GetChar();
hexStr = ReadHexDigit([](wchar32 ch, size_t numAdded) -> bool {return ch == End || (numAdded != 0 && ch == '}');});
ch = GetChar();
if (ch != '}') {
Error("Pire::EnableUnicodeSequencesImpl::Lex(): \"\\x{...\" sequence should be closed by \"}\"");
}
} else {
hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool {return numAdded == 2;});
if (hexStr.size() != 2) {
Error("Pire::EnableUnicodeSequencesImpl::Lex(): \"\\x...\" sequence should contain two symbols");
}
}
return Term::Character(HexToDec(hexStr));
}

private:
const wchar32 MAX_UNICODE = 0x10FFFF;

// ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) {
template<typename Lambda>
ystring ReadHexDigit(Lambda shouldStop) {
ystring result;
wchar32 ch = GetChar();
while (!shouldStop(ch, result.size())) {
if (!IsHexDigit(ch)) {
Error("Pire::EnableUnicodeSequencesImpl::Lex(): \"\\x...\" sequence contains non-valid hex number");
}
result.push_back(ch);
ch = GetChar();
}
UngetChar(ch);
return result;
}

wchar32 HexToDec(const ystring& hexStr) {
wchar32 converted;
try {
// The above code is replacement for this
// converted = std::stoul(hexStr, 0, 16);

// StdOutStream() << hexStr << Endl;
converted = 0;
for (auto it = hexStr.begin(); it != hexStr.end(); ++it) {
converted = (converted << 4) | hextable[*it];
}

if (converted > MAX_UNICODE) {
throw std::out_of_range("stoul");
}

} catch (std::out_of_range&) {
converted = MAX_UNICODE + 1;
}
if (converted > MAX_UNICODE) {
Error("Pire::EnableUnicodeSequencesImpl::Lex(): hex number in \"\\x...\" sequence is too large");
}
return converted;
}
};
}

namespace Features {
Feature* EnableUnicodeSequences() { return new EnableUnicodeSequencesImpl; }
};
}
42 changes: 42 additions & 0 deletions src/contrib/libs/pire/pire/extra/unicode_support.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* unicode_support.h -- declaration of the EnableUnicodeSequences feature.
*
* Copyright (c) 2018 YANDEX LLC
* Author: Andrey Logvin <[email protected]>
*
* This file is part of Pire, the Perl Incompatible
* Regular Expressions library.
*
* Pire is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Pire is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser Public License for more details.
* You should have received a copy of the GNU Lesser Public License
* along with Pire. If not, see <http://www.gnu.org/licenses>.
*/


#ifndef PIRE_EXTRA_SUPPORT_UNICODE_H
#define PIRE_EXTRA_SUPPORT_UNICODE_H

#include <memory>

namespace Pire {
class Feature;
namespace Features {

/**
* A feature which tells Pire to convert \x{...} and \x... sequences
* to accordingly UTF-32 symbols
* e.g. \x00 == '\0', \x41 == A
*/
Feature* EnableUnicodeSequences();
}
}

#endif
6 changes: 3 additions & 3 deletions src/contrib/tools/bison/gnulib/src/fflush.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#undef fflush


#if defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */
#if defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */

/* Clear the stream's ungetc buffer, preserving the value of ftello (fp). */
static void
Expand Down Expand Up @@ -71,7 +71,7 @@ clear_ungetc_buffer (FILE *fp)

#endif

#if ! (defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */)
#if ! (defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */)

# if (defined __sferror || defined __DragonFly__) && defined __SNPT /* FreeBSD, NetBSD, OpenBSD, DragonFly, Mac OS X, Cygwin */

Expand Down Expand Up @@ -145,7 +145,7 @@ rpl_fflush (FILE *stream)
if (stream == NULL || ! freading (stream))
return fflush (stream);

#if defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */
#if defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */

clear_ungetc_buffer_preserving_position (stream);

Expand Down
2 changes: 1 addition & 1 deletion src/contrib/tools/bison/gnulib/src/fpurge.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ fpurge (FILE *fp)
/* Most systems provide FILE as a struct and the necessary bitmask in
<stdio.h>, because they need it for implementing getc() and putc() as
fast macros. */
# if defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */
# if defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */
fp->_IO_read_end = fp->_IO_read_ptr;
fp->_IO_write_ptr = fp->_IO_write_base;
/* Avoid memory leak when there is an active ungetc buffer. */
Expand Down
2 changes: 1 addition & 1 deletion src/contrib/tools/bison/gnulib/src/freadahead.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
size_t
freadahead (FILE *fp)
{
#if defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */
#if defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */
if (fp->_IO_write_ptr > fp->_IO_write_base)
return 0;
return (fp->_IO_read_end - fp->_IO_read_ptr)
Expand Down
2 changes: 1 addition & 1 deletion src/contrib/tools/bison/gnulib/src/freading.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ freading (FILE *fp)
/* Most systems provide FILE as a struct and the necessary bitmask in
<stdio.h>, because they need it for implementing getc() and putc() as
fast macros. */
# if defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */
# if defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */
return ((fp->_flags & _IO_NO_WRITES) != 0
|| ((fp->_flags & (_IO_NO_READS | _IO_CURRENTLY_PUTTING)) == 0
&& fp->_IO_read_base != NULL));
Expand Down
4 changes: 2 additions & 2 deletions src/contrib/tools/bison/gnulib/src/fseeko.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fseeko (FILE *fp, off_t offset, int whence)
#endif

/* These tests are based on fpurge.c. */
#if defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */
#if defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */
if (fp->_IO_read_end == fp->_IO_read_ptr
&& fp->_IO_write_ptr == fp->_IO_write_base
&& fp->_IO_save_base == NULL)
Expand Down Expand Up @@ -121,7 +121,7 @@ fseeko (FILE *fp, off_t offset, int whence)
return -1;
}

#if defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */
#if defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */
fp->_flags &= ~_IO_EOF_SEEN;
fp->_offset = pos;
#elif defined __sferror || defined __DragonFly__ /* FreeBSD, NetBSD, OpenBSD, DragonFly, Mac OS X, Cygwin */
Expand Down
2 changes: 1 addition & 1 deletion src/contrib/tools/bison/gnulib/src/fseterr.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fseterr (FILE *fp)
/* Most systems provide FILE as a struct and the necessary bitmask in
<stdio.h>, because they need it for implementing getc() and putc() as
fast macros. */
#if defined _IO_ftrylockfile || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5 */
#if defined _IO_ftrylockfile || defined _IO_EOF_SEEN || __GNU_LIBRARY__ == 1 /* GNU libc, BeOS, Haiku, Linux libc5, Linux libc6 */
fp->_flags |= _IO_ERR_SEEN;
#elif defined __sferror || defined __DragonFly__ /* FreeBSD, NetBSD, OpenBSD, DragonFly, Mac OS X, Cygwin */
fp_->_flags |= __SERR;
Expand Down
7 changes: 7 additions & 0 deletions src/contrib/tools/bison/gnulib/src/stdio-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@
have different naming conventions, or their access requires some casts. */


/* Glibc 2.28 made _IO_IN_BACKUP private. For now, work around this
problem by defining it ourselves. FIXME: Do not rely on glibc
internals. */
#if !defined _IO_IN_BACKUP && defined _IO_EOF_SEEN
# define _IO_IN_BACKUP 0x100
#endif

/* BSD stdio derived implementations. */

#if defined __NetBSD__ /* NetBSD */
Expand Down
1 change: 1 addition & 0 deletions src/library/pire/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ SRCS(
extra/capture.cpp
extra/count.cpp
extra/glyphs.cpp
extra/unicode_support.cpp
re_lexer.cpp
re_parser.y
extraencodings.cpp
Expand Down
1 change: 1 addition & 0 deletions src/library/pire/pire.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ namespace NPire {
using Pire::Features::GlueSimilarGlyphs;
using Pire::Features::AndNotSupport;
using Pire::Features::Capture;
using Pire::Features::EnableUnicodeSequences;
}

namespace NEncodings {
Expand Down