971 lines
39 KiB
Diff
971 lines
39 KiB
Diff
From e2c673cbcdc325a3a2e9dd02169bb4a42c61bc48 Mon Sep 17 00:00:00 2001
|
|
From: James Youngman <jay@gnu.org>
|
|
Date: Mon, 13 Nov 2017 22:37:55 +0000
|
|
Subject: [PATCH 144/224] regexprops: fix dangling reference to the `ed'
|
|
regular expression dialect.
|
|
|
|
* lib/regextype.c (regex_map): Permute the entries to list POSIX
|
|
dialects before other ones, so that we don't end up with a
|
|
dangling reference to `ed' regular expressions when
|
|
context=findutils. Remove trailing white space from the output.
|
|
* doc/regexprops.texi: Regenerate this file, so that we no longer
|
|
have a dangling reference to the `ed' dialect.
|
|
* doc/find.texi (Regular Expressions): Point out the difference
|
|
between Emacs regular expressions and findutils regular
|
|
expressions: in findutils "." will match newline.
|
|
* find/find.1: Likewise.
|
|
* locate/locate.1: Likewise. Also document the --regextype option.
|
|
---
|
|
doc/find.texi | 7 +-
|
|
doc/regexprops.texi | 376 ++++++++++++++++++++++++++++----------------
|
|
find/find.1 | 4 +-
|
|
lib/regexprops.c | 74 ++++-----
|
|
lib/regextype.c | 14 +-
|
|
locate/locate.1 | 14 +-
|
|
6 files changed, 306 insertions(+), 183 deletions(-)
|
|
|
|
diff --git a/doc/find.texi b/doc/find.texi
|
|
index 2731f0af..5573d29b 100644
|
|
--- a/doc/find.texi
|
|
+++ b/doc/find.texi
|
|
@@ -3917,8 +3917,11 @@ your locale setup affects the interpretation of regular expressions.
|
|
|
|
There are also several different types of regular expression, and
|
|
these are interpreted differently. Normally, the type of regular
|
|
-expression used by @code{find} and @code{locate} is the same as is
|
|
-used in GNU Emacs. Both programs provide an option which allows you
|
|
+expression used by @code{find} and @code{locate} is almost identical to
|
|
+that used in GNU Emacs. The single difference is that in @code{find}
|
|
+and @code{locate}, a @samp{.} will match a newline character.
|
|
+
|
|
+Both @code{find} and @code{locate} provide an option which allows you
|
|
to select an alternative regular expression syntax; for @code{find}
|
|
this is the @samp{-regextype} option, and for @code{locate} this is
|
|
the @samp{--regextype} option.
|
|
diff --git a/doc/regexprops.texi b/doc/regexprops.texi
|
|
index 8fee88ae..0229460e 100644
|
|
--- a/doc/regexprops.texi
|
|
+++ b/doc/regexprops.texi
|
|
@@ -11,15 +11,15 @@
|
|
|
|
@menu
|
|
* findutils-default regular expression syntax::
|
|
+* posix-awk regular expression syntax::
|
|
+* posix-basic regular expression syntax::
|
|
+* posix-egrep regular expression syntax::
|
|
+* posix-extended regular expression syntax::
|
|
* awk regular expression syntax::
|
|
* egrep regular expression syntax::
|
|
* emacs regular expression syntax::
|
|
* gnu-awk regular expression syntax::
|
|
* grep regular expression syntax::
|
|
-* posix-awk regular expression syntax::
|
|
-* posix-basic regular expression syntax::
|
|
-* posix-egrep regular expression syntax::
|
|
-* posix-extended regular expression syntax::
|
|
@end menu
|
|
|
|
@node findutils-default regular expression syntax
|
|
@@ -44,6 +44,7 @@ matches a @samp{?}.
|
|
|
|
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are ignored. Within square brackets, @samp{\} is taken literally. Character classes are not supported, so for example you would need to use @samp{[0-9]} instead of @samp{[[:digit:]]}.
|
|
|
|
+
|
|
GNU extensions are supported:
|
|
@enumerate
|
|
|
|
@@ -73,11 +74,10 @@ The alternation operator is @samp{\|}.
|
|
The character @samp{^} only represents the beginning of a string when it appears:
|
|
@enumerate
|
|
|
|
-@item
|
|
-At the beginning of a regular expression
|
|
+@item At the beginning of a regular expression
|
|
+
|
|
+@item After an open-group, signified by @samp{\(}
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{\(}
|
|
|
|
@item After the alternation operator @samp{\|}
|
|
|
|
@@ -89,8 +89,8 @@ The character @samp{$} only represents the end of a string when it appears:
|
|
|
|
@item At the end of a regular expression
|
|
|
|
-@item Before a close-group, signified by
|
|
-@samp{\)}
|
|
+@item Before a close-group, signified by @samp{\)}
|
|
+
|
|
@item Before the alternation operator @samp{\|}
|
|
|
|
@end enumerate
|
|
@@ -101,8 +101,8 @@ The character @samp{$} only represents the end of a string when it appears:
|
|
|
|
@item At the beginning of a regular expression
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{\(}
|
|
+@item After an open-group, signified by @samp{\(}
|
|
+
|
|
@item After the alternation operator @samp{\|}
|
|
|
|
@end enumerate
|
|
@@ -113,8 +113,8 @@ The character @samp{$} only represents the end of a string when it appears:
|
|
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
|
|
|
|
-@node awk regular expression syntax
|
|
-@subsection @samp{awk} regular expression syntax
|
|
+@node posix-awk regular expression syntax
|
|
+@subsection @samp{posix-awk} regular expression syntax
|
|
|
|
|
|
The character @samp{.} matches any single character except the null character.
|
|
@@ -135,53 +135,57 @@ matches a @samp{?}.
|
|
|
|
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} can be used to quote the following character. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
|
|
+
|
|
GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
|
|
|
|
-Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit matches that digit.
|
|
+
|
|
+Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
|
|
|
The alternation operator is @samp{|}.
|
|
|
|
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
|
|
|
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
|
|
+
|
|
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
|
|
@enumerate
|
|
|
|
@item At the beginning of a regular expression
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{(}
|
|
+@item After an open-group, signified by @samp{(}
|
|
+
|
|
@item After the alternation operator @samp{|}
|
|
|
|
@end enumerate
|
|
|
|
|
|
-
|
|
+Intervals are specified by @samp{@{} and @samp{@}}.
|
|
+Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
|
|
|
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
|
|
|
|
-@node egrep regular expression syntax
|
|
-@subsection @samp{egrep} regular expression syntax
|
|
+@node posix-basic regular expression syntax
|
|
+@subsection @samp{posix-basic} regular expression syntax
|
|
|
|
|
|
-The character @samp{.} matches any single character.
|
|
+The character @samp{.} matches any single character except the null character.
|
|
|
|
|
|
@table @samp
|
|
|
|
-@item +
|
|
-indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
|
-@item ?
|
|
-indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
|
@item \+
|
|
-matches a @samp{+}
|
|
+indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
|
@item \?
|
|
-matches a @samp{?}.
|
|
+indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
|
+@item + and ?
|
|
+match themselves.
|
|
+
|
|
@end table
|
|
|
|
|
|
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
|
|
+
|
|
GNU extensions are supported:
|
|
@enumerate
|
|
|
|
@@ -204,24 +208,59 @@ GNU extensions are supported:
|
|
@end enumerate
|
|
|
|
|
|
-Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
|
+Grouping is performed with backslashes followed by parentheses @samp{\(}, @samp{\)}. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{\(}.
|
|
|
|
-The alternation operator is @samp{|}.
|
|
+The alternation operator is @samp{\|}.
|
|
|
|
-The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
|
+The character @samp{^} only represents the beginning of a string when it appears:
|
|
+@enumerate
|
|
|
|
-The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.
|
|
+@item At the beginning of a regular expression
|
|
+
|
|
+@item After an open-group, signified by @samp{\(}
|
|
+
|
|
+
|
|
+@item After the alternation operator @samp{\|}
|
|
+
|
|
+@end enumerate
|
|
+
|
|
+
|
|
+The character @samp{$} only represents the end of a string when it appears:
|
|
+@enumerate
|
|
+
|
|
+@item At the end of a regular expression
|
|
+
|
|
+@item Before a close-group, signified by @samp{\)}
|
|
+
|
|
+@item Before the alternation operator @samp{\|}
|
|
+
|
|
+@end enumerate
|
|
+
|
|
+
|
|
+@samp{\*}, @samp{\+} and @samp{\?} are special at any point in a regular expression except:
|
|
+@enumerate
|
|
+
|
|
+@item At the beginning of a regular expression
|
|
+
|
|
+@item After an open-group, signified by @samp{\(}
|
|
+
|
|
+@item After the alternation operator @samp{\|}
|
|
+
|
|
+@end enumerate
|
|
+
|
|
+
|
|
+Intervals are specified by @samp{\@{} and @samp{\@}}.
|
|
+Invalid intervals such as @samp{a\@{1z} are not accepted.
|
|
|
|
-Intervals are specified by @samp{@{} and @samp{@}}. Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
|
|
|
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
|
|
|
|
-@node emacs regular expression syntax
|
|
-@subsection @samp{emacs} regular expression syntax
|
|
+@node posix-egrep regular expression syntax
|
|
+@subsection @samp{posix-egrep} regular expression syntax
|
|
|
|
|
|
-The character @samp{.} matches any single character except newline.
|
|
+The character @samp{.} matches any single character.
|
|
|
|
|
|
@table @samp
|
|
@@ -237,7 +276,8 @@ matches a @samp{?}.
|
|
@end table
|
|
|
|
|
|
-Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are ignored. Within square brackets, @samp{\} is taken literally. Character classes are not supported, so for example you would need to use @samp{[0-9]} instead of @samp{[[:digit:]]}.
|
|
+Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
+
|
|
|
|
GNU extensions are supported:
|
|
@enumerate
|
|
@@ -261,58 +301,27 @@ GNU extensions are supported:
|
|
@end enumerate
|
|
|
|
|
|
-Grouping is performed with backslashes followed by parentheses @samp{\(}, @samp{\)}. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{\(}.
|
|
-
|
|
-The alternation operator is @samp{\|}.
|
|
-
|
|
-The character @samp{^} only represents the beginning of a string when it appears:
|
|
-@enumerate
|
|
-
|
|
-@item
|
|
-At the beginning of a regular expression
|
|
-
|
|
-@item After an open-group, signified by
|
|
-@samp{\(}
|
|
-
|
|
-@item After the alternation operator @samp{\|}
|
|
-
|
|
-@end enumerate
|
|
-
|
|
-
|
|
-The character @samp{$} only represents the end of a string when it appears:
|
|
-@enumerate
|
|
-
|
|
-@item At the end of a regular expression
|
|
-
|
|
-@item Before a close-group, signified by
|
|
-@samp{\)}
|
|
-@item Before the alternation operator @samp{\|}
|
|
-
|
|
-@end enumerate
|
|
-
|
|
-
|
|
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
|
|
-@enumerate
|
|
+Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
|
|
|
-@item At the beginning of a regular expression
|
|
+The alternation operator is @samp{|}.
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{\(}
|
|
-@item After the alternation operator @samp{\|}
|
|
+The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
|
|
|
-@end enumerate
|
|
|
|
+The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.
|
|
|
|
|
|
+Intervals are specified by @samp{@{} and @samp{@}}.
|
|
+Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
|
|
|
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
|
|
|
|
-@node gnu-awk regular expression syntax
|
|
-@subsection @samp{gnu-awk} regular expression syntax
|
|
+@node posix-extended regular expression syntax
|
|
+@subsection @samp{posix-extended} regular expression syntax
|
|
|
|
|
|
-The character @samp{.} matches any single character.
|
|
+The character @samp{.} matches any single character except the null character.
|
|
|
|
|
|
@table @samp
|
|
@@ -328,7 +337,8 @@ matches a @samp{?}.
|
|
@end table
|
|
|
|
|
|
-Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} can be used to quote the following character. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
+Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
+
|
|
|
|
GNU extensions are supported:
|
|
@enumerate
|
|
@@ -358,42 +368,101 @@ The alternation operator is @samp{|}.
|
|
|
|
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
|
|
|
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
|
|
+
|
|
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
|
|
@enumerate
|
|
|
|
@item At the beginning of a regular expression
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{(}
|
|
+@item After an open-group, signified by @samp{(}
|
|
+
|
|
@item After the alternation operator @samp{|}
|
|
|
|
@end enumerate
|
|
|
|
|
|
-Intervals are specified by @samp{@{} and @samp{@}}. Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
|
+Intervals are specified by @samp{@{} and @samp{@}}.
|
|
+Invalid intervals such as @samp{a@{1z} are not accepted.
|
|
+
|
|
|
|
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
|
|
|
|
-@node grep regular expression syntax
|
|
-@subsection @samp{grep} regular expression syntax
|
|
+@node awk regular expression syntax
|
|
+@subsection @samp{awk} regular expression syntax
|
|
|
|
|
|
-The character @samp{.} matches any single character.
|
|
+The character @samp{.} matches any single character except the null character.
|
|
|
|
|
|
@table @samp
|
|
|
|
-@item \+
|
|
+@item +
|
|
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
|
+@item ?
|
|
+indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
|
+@item \+
|
|
+matches a @samp{+}
|
|
@item \?
|
|
+matches a @samp{?}.
|
|
+@end table
|
|
+
|
|
+
|
|
+Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} can be used to quote the following character. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
+
|
|
+
|
|
+GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
|
|
+
|
|
+
|
|
+Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit matches that digit.
|
|
+
|
|
+The alternation operator is @samp{|}.
|
|
+
|
|
+The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
|
+
|
|
+
|
|
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
|
|
+@enumerate
|
|
+
|
|
+@item At the beginning of a regular expression
|
|
+
|
|
+@item After an open-group, signified by @samp{(}
|
|
+
|
|
+@item After the alternation operator @samp{|}
|
|
+
|
|
+@end enumerate
|
|
+
|
|
+
|
|
+
|
|
+
|
|
+The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
+
|
|
+
|
|
+@node egrep regular expression syntax
|
|
+@subsection @samp{egrep} regular expression syntax
|
|
+This is a synonym for posix-egrep.
|
|
+@node emacs regular expression syntax
|
|
+@subsection @samp{emacs} regular expression syntax
|
|
+
|
|
+
|
|
+The character @samp{.} matches any single character except newline.
|
|
+
|
|
+
|
|
+@table @samp
|
|
+
|
|
+@item +
|
|
+indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
|
+@item ?
|
|
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
|
-@item + and ?
|
|
-match themselves.
|
|
+@item \+
|
|
+matches a @samp{+}
|
|
+@item \?
|
|
+matches a @samp{?}.
|
|
@end table
|
|
|
|
|
|
-Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
+Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are ignored. Within square brackets, @samp{\} is taken literally. Character classes are not supported, so for example you would need to use @samp{[0-9]} instead of @samp{[[:digit:]]}.
|
|
+
|
|
|
|
GNU extensions are supported:
|
|
@enumerate
|
|
@@ -424,13 +493,10 @@ The alternation operator is @samp{\|}.
|
|
The character @samp{^} only represents the beginning of a string when it appears:
|
|
@enumerate
|
|
|
|
-@item
|
|
-At the beginning of a regular expression
|
|
+@item At the beginning of a regular expression
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{\(}
|
|
+@item After an open-group, signified by @samp{\(}
|
|
|
|
-@item After a newline
|
|
|
|
@item After the alternation operator @samp{\|}
|
|
|
|
@@ -442,39 +508,35 @@ The character @samp{$} only represents the end of a string when it appears:
|
|
|
|
@item At the end of a regular expression
|
|
|
|
-@item Before a close-group, signified by
|
|
-@samp{\)}
|
|
-@item Before a newline
|
|
+@item Before a close-group, signified by @samp{\)}
|
|
|
|
@item Before the alternation operator @samp{\|}
|
|
|
|
@end enumerate
|
|
|
|
|
|
-@samp{\*}, @samp{\+} and @samp{\?} are special at any point in a regular expression except:
|
|
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
|
|
@enumerate
|
|
|
|
@item At the beginning of a regular expression
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{\(}
|
|
-@item After a newline
|
|
+@item After an open-group, signified by @samp{\(}
|
|
|
|
@item After the alternation operator @samp{\|}
|
|
|
|
@end enumerate
|
|
|
|
|
|
-Intervals are specified by @samp{\@{} and @samp{\@}}. Invalid intervals such as @samp{a\@{1z} are not accepted.
|
|
+
|
|
|
|
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
|
|
|
|
-@node posix-awk regular expression syntax
|
|
-@subsection @samp{posix-awk} regular expression syntax
|
|
+@node gnu-awk regular expression syntax
|
|
+@subsection @samp{gnu-awk} regular expression syntax
|
|
|
|
|
|
-The character @samp{.} matches any single character except the null character.
|
|
+The character @samp{.} matches any single character.
|
|
|
|
|
|
@table @samp
|
|
@@ -492,7 +554,28 @@ matches a @samp{?}.
|
|
|
|
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} can be used to quote the following character. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
|
|
-GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
|
|
+
|
|
+GNU extensions are supported:
|
|
+@enumerate
|
|
+
|
|
+@item @samp{\w} matches a character within a word
|
|
+
|
|
+@item @samp{\W} matches a character which is not within a word
|
|
+
|
|
+@item @samp{\<} matches the beginning of a word
|
|
+
|
|
+@item @samp{\>} matches the end of a word
|
|
+
|
|
+@item @samp{\b} matches a word boundary
|
|
+
|
|
+@item @samp{\B} matches characters which are not a word boundary
|
|
+
|
|
+@item @samp{\`} matches the beginning of the whole input
|
|
+
|
|
+@item @samp{\'} matches the end of the whole input
|
|
+
|
|
+@end enumerate
|
|
+
|
|
|
|
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
|
|
|
@@ -500,51 +583,47 @@ The alternation operator is @samp{|}.
|
|
|
|
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
|
|
|
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
|
|
+
|
|
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
|
|
@enumerate
|
|
|
|
@item At the beginning of a regular expression
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{(}
|
|
+@item After an open-group, signified by @samp{(}
|
|
+
|
|
@item After the alternation operator @samp{|}
|
|
|
|
@end enumerate
|
|
|
|
|
|
-Intervals are specified by @samp{@{} and @samp{@}}. Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
|
+Intervals are specified by @samp{@{} and @samp{@}}.
|
|
+Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
|
|
|
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
|
|
|
|
-@node posix-basic regular expression syntax
|
|
-@subsection @samp{posix-basic} regular expression syntax
|
|
-This is a synonym for ed.
|
|
-@node posix-egrep regular expression syntax
|
|
-@subsection @samp{posix-egrep} regular expression syntax
|
|
-This is a synonym for egrep.
|
|
-@node posix-extended regular expression syntax
|
|
-@subsection @samp{posix-extended} regular expression syntax
|
|
+@node grep regular expression syntax
|
|
+@subsection @samp{grep} regular expression syntax
|
|
|
|
|
|
-The character @samp{.} matches any single character except the null character.
|
|
+The character @samp{.} matches any single character.
|
|
|
|
|
|
@table @samp
|
|
|
|
-@item +
|
|
-indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
|
-@item ?
|
|
-indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
|
@item \+
|
|
-matches a @samp{+}
|
|
+indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
|
@item \?
|
|
-matches a @samp{?}.
|
|
+indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
|
+@item + and ?
|
|
+match themselves.
|
|
+
|
|
@end table
|
|
|
|
|
|
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
|
|
|
+
|
|
GNU extensions are supported:
|
|
@enumerate
|
|
|
|
@@ -567,25 +646,56 @@ GNU extensions are supported:
|
|
@end enumerate
|
|
|
|
|
|
-Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
|
+Grouping is performed with backslashes followed by parentheses @samp{\(}, @samp{\)}. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{\(}.
|
|
|
|
-The alternation operator is @samp{|}.
|
|
+The alternation operator is @samp{\|}.
|
|
|
|
-The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
|
+The character @samp{^} only represents the beginning of a string when it appears:
|
|
+@enumerate
|
|
|
|
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
|
|
+@item At the beginning of a regular expression
|
|
+
|
|
+@item After an open-group, signified by @samp{\(}
|
|
+
|
|
+
|
|
+@item After a newline
|
|
+
|
|
+@item After the alternation operator @samp{\|}
|
|
+
|
|
+@end enumerate
|
|
+
|
|
+
|
|
+The character @samp{$} only represents the end of a string when it appears:
|
|
+@enumerate
|
|
+
|
|
+@item At the end of a regular expression
|
|
+
|
|
+@item Before a close-group, signified by @samp{\)}
|
|
+
|
|
+@item Before a newline
|
|
+
|
|
+@item Before the alternation operator @samp{\|}
|
|
+
|
|
+@end enumerate
|
|
+
|
|
+
|
|
+@samp{\*}, @samp{\+} and @samp{\?} are special at any point in a regular expression except:
|
|
@enumerate
|
|
|
|
@item At the beginning of a regular expression
|
|
|
|
-@item After an open-group, signified by
|
|
-@samp{(}
|
|
-@item After the alternation operator @samp{|}
|
|
+@item After an open-group, signified by @samp{\(}
|
|
+
|
|
+@item After a newline
|
|
+
|
|
+@item After the alternation operator @samp{\|}
|
|
|
|
@end enumerate
|
|
|
|
|
|
-Intervals are specified by @samp{@{} and @samp{@}}. Invalid intervals such as @samp{a@{1z} are not accepted.
|
|
+Intervals are specified by @samp{\@{} and @samp{\@}}.
|
|
+Invalid intervals such as @samp{a\@{1z} are not accepted.
|
|
+
|
|
|
|
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
|
|
|
diff --git a/find/find.1 b/find/find.1
|
|
index 06ddfa5b..8b1320c1 100644
|
|
--- a/find/find.1
|
|
+++ b/find/find.1
|
|
@@ -879,8 +879,8 @@ on the whole path, not a search. For example, to match a file named
|
|
`./fubar3', you can use the regular expression `.*bar.' or `.*b.*3',
|
|
but not `f.*r3'. The regular expressions understood by
|
|
.B find
|
|
-are by default Emacs Regular Expressions, but this can be
|
|
-changed with the
|
|
+are by default Emacs Regular Expressions (except that `.' matches
|
|
+newline), but this can be changed with the
|
|
.B \-regextype
|
|
option.
|
|
|
|
diff --git a/lib/regexprops.c b/lib/regexprops.c
|
|
index fcbdd5db..b20b4a38 100644
|
|
--- a/lib/regexprops.c
|
|
+++ b/lib/regexprops.c
|
|
@@ -78,8 +78,12 @@ directive (const char *s)
|
|
static void
|
|
comment (const char *s)
|
|
{
|
|
- directive ("@c ");
|
|
- literal (s);
|
|
+ directive ("@c");
|
|
+ if (s[0])
|
|
+ {
|
|
+ literal (" ");
|
|
+ literal (s);
|
|
+ }
|
|
newline ();
|
|
}
|
|
|
|
@@ -175,7 +179,7 @@ describe_regex_syntax (int options)
|
|
|
|
content (" the null character");
|
|
}
|
|
- content (". ");
|
|
+ content (".");
|
|
newpara ();
|
|
|
|
if (!(options & RE_LIMITED_OPS))
|
|
@@ -185,25 +189,25 @@ describe_regex_syntax (int options)
|
|
{
|
|
enum_item ("\\+");
|
|
content ("indicates that the regular expression should match one"
|
|
- " or more occurrences of the previous atom or regexp. ");
|
|
+ " or more occurrences of the previous atom or regexp.");
|
|
enum_item ("\\?");
|
|
content ("indicates that the regular expression should match zero"
|
|
- " or one occurrence of the previous atom or regexp. ");
|
|
- enum_item ("+ and ? ");
|
|
- content ("match themselves. ");
|
|
+ " or one occurrence of the previous atom or regexp.");
|
|
+ enum_item ("+ and ?");
|
|
+ content ("match themselves.\n");
|
|
}
|
|
else
|
|
{
|
|
enum_item ("+");
|
|
content ("indicates that the regular expression should match one"
|
|
- " or more occurrences of the previous atom or regexp. ");
|
|
+ " or more occurrences of the previous atom or regexp.");
|
|
enum_item ("?");
|
|
content ("indicates that the regular expression should match zero"
|
|
- " or one occurrence of the previous atom or regexp. ");
|
|
+ " or one occurrence of the previous atom or regexp.");
|
|
enum_item ("\\+");
|
|
literal ("matches a @samp{+}");
|
|
enum_item ("\\?");
|
|
- literal ("matches a @samp{?}. ");
|
|
+ literal ("matches a @samp{?}.");
|
|
}
|
|
endtable ();
|
|
}
|
|
@@ -226,15 +230,15 @@ describe_regex_syntax (int options)
|
|
|
|
if (options & RE_CHAR_CLASSES)
|
|
content ("Character classes are supported; for example "
|
|
- "@samp{[[:digit:]]} will match a single decimal digit. ");
|
|
+ "@samp{[[:digit:]]} will match a single decimal digit.\n");
|
|
else
|
|
literal ("Character classes are not supported, so for example "
|
|
"you would need to use @samp{[0-9]} "
|
|
- "instead of @samp{[[:digit:]]}. ");
|
|
+ "instead of @samp{[[:digit:]]}.\n");
|
|
|
|
if (options & RE_HAT_LISTS_NOT_NEWLINE)
|
|
{
|
|
- literal ("Non-matching lists @samp{[^@dots{}]} do not ever match newline. ");
|
|
+ literal ("Non-matching lists @samp{[^@dots{}]} do not ever match newline.\n");
|
|
}
|
|
newpara ();
|
|
if (options & RE_NO_GNU_OPS)
|
|
@@ -242,7 +246,7 @@ describe_regex_syntax (int options)
|
|
content ("GNU extensions are not supported and so "
|
|
"@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
|
|
"match "
|
|
- "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
|
|
+ "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.\n");
|
|
}
|
|
else
|
|
{
|
|
@@ -276,7 +280,7 @@ describe_regex_syntax (int options)
|
|
|
|
if (options & RE_NO_BK_REFS)
|
|
{
|
|
- content ("A backslash followed by a digit matches that digit. ");
|
|
+ content ("A backslash followed by a digit matches that digit.");
|
|
}
|
|
else
|
|
{
|
|
@@ -285,7 +289,7 @@ describe_regex_syntax (int options)
|
|
literal ("@samp{(}");
|
|
else
|
|
literal ("@samp{\\(}");
|
|
- content (". ");
|
|
+ content (".");
|
|
}
|
|
|
|
|
|
@@ -293,29 +297,28 @@ describe_regex_syntax (int options)
|
|
if (!(options & RE_LIMITED_OPS))
|
|
{
|
|
if (options & RE_NO_BK_VBAR)
|
|
- literal ("The alternation operator is @samp{|}. ");
|
|
+ literal ("The alternation operator is @samp{|}.");
|
|
else
|
|
- literal ("The alternation operator is @samp{\\|}. ");
|
|
+ literal ("The alternation operator is @samp{\\|}.");
|
|
}
|
|
newpara ();
|
|
|
|
if (options & RE_CONTEXT_INDEP_ANCHORS)
|
|
{
|
|
- literal ("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
|
|
+ literal ("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.\n");
|
|
}
|
|
else
|
|
{
|
|
literal ("The character @samp{^} only represents the beginning of a string when it appears:");
|
|
beginenum ();
|
|
- enum_item ("\nAt the beginning of a regular expression");
|
|
- enum_item ("After an open-group, signified by ");
|
|
+ enum_item ("At the beginning of a regular expression");
|
|
if (options & RE_NO_BK_PARENS)
|
|
{
|
|
- literal ("@samp{(}");
|
|
+ enum_item ("After an open-group, signified by @samp{(}");
|
|
}
|
|
else
|
|
{
|
|
- literal ("@samp{\\(}");
|
|
+ enum_item ("After an open-group, signified by @samp{\\(}");
|
|
}
|
|
newline ();
|
|
if (!(options & RE_LIMITED_OPS))
|
|
@@ -334,14 +337,13 @@ describe_regex_syntax (int options)
|
|
literal ("The character @samp{$} only represents the end of a string when it appears:");
|
|
beginenum ();
|
|
enum_item ("At the end of a regular expression");
|
|
- enum_item ("Before a close-group, signified by ");
|
|
if (options & RE_NO_BK_PARENS)
|
|
{
|
|
- literal ("@samp{)}");
|
|
+ enum_item ("Before a close-group, signified by @samp{)}");
|
|
}
|
|
else
|
|
{
|
|
- literal ("@samp{\\)}");
|
|
+ enum_item ("Before a close-group, signified by @samp{\\)}");
|
|
}
|
|
if (!(options & RE_LIMITED_OPS))
|
|
{
|
|
@@ -361,7 +363,7 @@ describe_regex_syntax (int options)
|
|
if ((options & RE_CONTEXT_INDEP_OPS)
|
|
&& !(options & RE_CONTEXT_INVALID_OPS))
|
|
{
|
|
- literal ("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
|
|
+ literal ("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.\n");
|
|
}
|
|
else
|
|
{
|
|
@@ -381,14 +383,13 @@ describe_regex_syntax (int options)
|
|
|
|
beginenum ();
|
|
enum_item ("At the beginning of a regular expression");
|
|
- enum_item ("After an open-group, signified by ");
|
|
if (options & RE_NO_BK_PARENS)
|
|
{
|
|
- literal ("@samp{(}");
|
|
+ enum_item ("After an open-group, signified by @samp{(}");
|
|
}
|
|
else
|
|
{
|
|
- literal ("@samp{\\(}");
|
|
+ enum_item ("After an open-group, signified by @samp{\\(}");
|
|
}
|
|
if (!(options & RE_LIMITED_OPS))
|
|
{
|
|
@@ -410,39 +411,38 @@ describe_regex_syntax (int options)
|
|
{
|
|
if (options & RE_NO_BK_BRACES)
|
|
{
|
|
- literal ("Intervals are specified by @samp{@{} and @samp{@}}. ");
|
|
+ literal ("Intervals are specified by @samp{@{} and @samp{@}}.\n");
|
|
if (options & RE_INVALID_INTERVAL_ORD)
|
|
{
|
|
literal ("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
|
|
}
|
|
else
|
|
{
|
|
- literal ("Invalid intervals such as @samp{a@{1z} are not accepted. ");
|
|
+ literal ("Invalid intervals such as @samp{a@{1z} are not accepted.\n");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
- literal ("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
|
|
+ literal ("Intervals are specified by @samp{\\@{} and @samp{\\@}}.\n");
|
|
if (options & RE_INVALID_INTERVAL_ORD)
|
|
{
|
|
literal ("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
|
|
}
|
|
else
|
|
{
|
|
- literal ("Invalid intervals such as @samp{a\\@{1z} are not accepted. ");
|
|
+ literal ("Invalid intervals such as @samp{a\\@{1z} are not accepted.\n");
|
|
}
|
|
}
|
|
-
|
|
}
|
|
|
|
newpara ();
|
|
if (options & RE_NO_POSIX_BACKTRACKING)
|
|
{
|
|
- content ("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
|
|
+ content ("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match.");
|
|
}
|
|
else
|
|
{
|
|
- content ("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
|
|
+ content ("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.");
|
|
}
|
|
newpara ();
|
|
}
|
|
diff --git a/lib/regextype.c b/lib/regextype.c
|
|
index 8a7347dc..89416ebd 100644
|
|
--- a/lib/regextype.c
|
|
+++ b/lib/regextype.c
|
|
@@ -56,17 +56,19 @@ struct tagRegexTypeMap
|
|
struct tagRegexTypeMap regex_map[] =
|
|
{
|
|
{ "findutils-default", CONTEXT_FINDUTILS, RE_SYNTAX_EMACS|RE_DOT_NEWLINE },
|
|
+
|
|
+ { "posix-awk", CONTEXT_ALL, RE_SYNTAX_POSIX_AWK },
|
|
+ { "posix-basic", CONTEXT_ALL, RE_SYNTAX_POSIX_BASIC },
|
|
+ { "posix-egrep", CONTEXT_ALL, RE_SYNTAX_POSIX_EGREP },
|
|
+ { "posix-extended", CONTEXT_ALL, RE_SYNTAX_POSIX_EXTENDED },
|
|
+ { "posix-minimal-basic", CONTEXT_GENERIC, RE_SYNTAX_POSIX_MINIMAL_BASIC },
|
|
+
|
|
{ "awk", CONTEXT_ALL, RE_SYNTAX_AWK },
|
|
- { "egrep", CONTEXT_ALL, RE_SYNTAX_EGREP },
|
|
{ "ed", CONTEXT_GENERIC, RE_SYNTAX_ED },
|
|
+ { "egrep", CONTEXT_ALL, RE_SYNTAX_EGREP },
|
|
{ "emacs", CONTEXT_ALL, RE_SYNTAX_EMACS },
|
|
{ "gnu-awk", CONTEXT_ALL, RE_SYNTAX_GNU_AWK },
|
|
{ "grep", CONTEXT_ALL, RE_SYNTAX_GREP },
|
|
- { "posix-awk", CONTEXT_ALL, RE_SYNTAX_POSIX_AWK },
|
|
- { "posix-basic", CONTEXT_ALL, RE_SYNTAX_POSIX_BASIC },
|
|
- { "posix-egrep", CONTEXT_ALL, RE_SYNTAX_POSIX_EGREP },
|
|
- { "posix-extended", CONTEXT_ALL, RE_SYNTAX_POSIX_EXTENDED },
|
|
- { "posix-minimal-basic", CONTEXT_GENERIC, RE_SYNTAX_POSIX_MINIMAL_BASIC },
|
|
{ "sed", CONTEXT_GENERIC, RE_SYNTAX_SED },
|
|
/* ,{ "posix-common", CONTEXT_GENERIC, _RE_SYNTAX_POSIX_COMMON } */
|
|
};
|
|
--
|
|
2.19.1
|
|
|