Skip to content

Commit

Permalink
Add --group-separator and --no-group-separator to pcre2grep.
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipHazel committed Nov 20, 2023
1 parent 7fe586b commit e179a4b
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 7 deletions.
6 changes: 4 additions & 2 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,17 @@ consistency with OP_VREVERSE.
40. In some legacy environments with a pre C99 snprintf, pcre2_regerror could
return an incorrect value when the provided buffer was too small.

41. Applied pull request #342 which adds sanity checks for ctype functions and
41. Applied pull request #342 which adds sanity checks for ctype functions and
locks out any accidental sign-extension.

42. In the 32-bit library, in non-UTF mode, a quantifier that followed a
literal character with a value greater than or equal to 0x80000000u caused
literal character with a value greater than or equal to 0x80000000u caused
undefined behaviour.

43. \z was misbehaving when matching fragments inside invalid UTF strings.

44. Implement --group-separator and --no-group-separator for pcre2grep.


Version 10.42 11-December-2022
------------------------------
Expand Down
8 changes: 8 additions & 0 deletions RunGrepTest
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,14 @@ fi
echo "---------------------------- Test 151 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always -e this -e The -e 'The wo' testdata/grepinputv) >>testtrygrep

echo "---------------------------- Test 152 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --group-separator='++' 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 153 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --no-group-separator 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep


# Now compare the results.

Expand Down
13 changes: 11 additions & 2 deletions doc/pcre2grep.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH PCRE2GREP 1 "08 February 2023" "PCRE2 10.43"
.TH PCRE2GREP 1 "20 November 2023" "PCRE2 10.43"
.SH NAME
pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS
Expand Down Expand Up @@ -367,6 +367,10 @@ mode, \fB--colour\fP has no effect, and no context is shown. That is, the
match in a line, each of them is shown separately. This option is mutually
exclusive with \fB--output\fP, \fB--line-offsets\fP, and \fB--only-matching\fP.
.TP
\fB--group-separator\fP=\fItext\fP
Output this text string instead of two hyphens between groups of lines when
\fB-A\fP, \fB-B\fP, or \fB-C\fP is in use. See also \fB--no-group-separator\fP.
.TP
\fB-H\fP, \fB--with-filename\fP
Force the inclusion of the file name at the start of output lines when
searching a single file. The file name is not normally shown in this case.
Expand Down Expand Up @@ -590,6 +594,11 @@ being output, it precedes the line number. When the \fB-M\fP option causes a
pattern to match more than one line, only the first is preceded by its line
number. This option is forced if \fB--line-offsets\fP is used.
.TP
\fB--no-group-separator\fP\fP
Do not output a separator between groups of lines when \fB-A\fP, \fB-B\fP, or
\fB-C\fP is in use. The default is to output a line containing two hyphens. See
also \fB--group-separator\fP.
.TP
\fB--no-jit\fP
If the PCRE2 library is built with support for just-in-time compiling (which
speeds up matching), \fBpcre2grep\fP automatically makes use of this, unless it
Expand Down Expand Up @@ -997,6 +1006,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 08 February 2023
Last updated: 20 November 2023
Copyright (c) 1997-2023 University of Cambridge.
.fi
14 changes: 11 additions & 3 deletions src/pcre2grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ static const char *dee_option = NULL;
static const char *DEE_option = NULL;
static const char *locale = NULL;
static const char *newline_arg = NULL;
static const char *group_separator = "--";
static const char *om_separator = NULL;
static const char *stdin_name = "(standard input)";
static const char *output_text = NULL;
Expand Down Expand Up @@ -425,6 +426,8 @@ used to identify them. */
#define N_OM_CAPTURE (-24)
#define N_ALLABSK (-25)
#define N_POSIX_DIGIT (-26)
#define N_GROUP_SEPARATOR (-27)
#define N_NO_GROUP_SEPARATOR (-28)

static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
Expand All @@ -448,6 +451,7 @@ static option_item optionlist[] = {
{ OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
{ OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
{ OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
{ OP_STRING, N_GROUP_SEPARATOR, &group_separator, "group-separator=text", "set separator between groups of lines" },
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
{ OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
Expand All @@ -471,6 +475,7 @@ static option_item optionlist[] = {
#else
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
#endif
{ OP_NODATA, N_NO_GROUP_SEPARATOR, NULL, "no-group-separator", "suppress separators between groups of lines" },
{ OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
Expand Down Expand Up @@ -2954,7 +2959,8 @@ while (ptr < endptr)

if (hyphenpending)
{
fprintf(stdout, "--" STDOUT_NL);
if (group_separator != NULL)
fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
hyphenpending = FALSE;
hyphenprinted = TRUE;
}
Expand All @@ -2975,8 +2981,9 @@ while (ptr < endptr)
p = previous_line(p, main_buffer);
}

if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
fprintf(stdout, "--" STDOUT_NL);
if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted &&
group_separator != NULL)
fprintf(stdout, "%s%s", group_separator, STDOUT_NL);

while (p < ptr)
{
Expand Down Expand Up @@ -3590,6 +3597,7 @@ switch(letter)
case N_LOFFSETS: line_offsets = number = TRUE; break;
case N_NOJIT: use_jit = FALSE; break;
case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
case N_NO_GROUP_SEPARATOR: group_separator = NULL; break;
case 'a': binary_files = BIN_TEXT; break;
case 'c': count_only = TRUE; break;
case N_POSIX_DIGIT: posix_digit = TRUE; break;
Expand Down
21 changes: 21 additions & 0 deletions testdata/grepoutput
Original file line number Diff line number Diff line change
Expand Up @@ -1232,3 +1232,24 @@ RC=2
The word is cat in this line
The caterpillar sat on the mat
The snowcat is not an animal
---------------------------- Test 152 -----------------------------
24:four
25-five
26-six
27-seven
++
34:fourteen
35-fifteen
36-sixteen
37-seventeen
RC=0
---------------------------- Test 153 -----------------------------
24:four
25-five
26-six
27-seven
34:fourteen
35-fifteen
36-sixteen
37-seventeen
RC=0
Expand Down

0 comments on commit e179a4b

Please sign in to comment.