From e2c673cbcdc325a3a2e9dd02169bb4a42c61bc48 Mon Sep 17 00:00:00 2001
From: James Youngman <jay@gnu.org>
Date: Mon, 13 Nov 2017 22:37:55 +0000
Subject: [PATCH 144/224] regexprops: fix dangling reference to the `ed'
 regular expression dialect.

* lib/regextype.c (regex_map): Permute the entries to list POSIX
dialects before other ones, so that we don't end up with a
dangling reference to `ed' regular expressions when
context=findutils.  Remove trailing white space from the output.
* doc/regexprops.texi: Regenerate this file, so that we no longer
have a dangling reference to the `ed' dialect.
* doc/find.texi (Regular Expressions): Point out the difference
between Emacs regular expressions and findutils regular
expressions: in findutils "." will match newline.
* find/find.1: Likewise.
* locate/locate.1: Likewise.  Also document the --regextype option.
---
 doc/find.texi       |   7 +-
 doc/regexprops.texi | 376 ++++++++++++++++++++++++++++----------------
 find/find.1         |   4 +-
 lib/regexprops.c    |  74 ++++-----
 lib/regextype.c     |  14 +-
 locate/locate.1     |  14 +-
 6 files changed, 306 insertions(+), 183 deletions(-)

diff --git a/doc/find.texi b/doc/find.texi
index 2731f0af..5573d29b 100644
--- a/doc/find.texi
+++ b/doc/find.texi
@@ -3917,8 +3917,11 @@ your locale setup affects the interpretation of regular expressions.
 
 There are also several different types of regular expression, and
 these are interpreted differently.  Normally, the type of regular
-expression used by @code{find} and @code{locate} is the same as is
-used in GNU Emacs.  Both programs provide an option which allows you
+expression used by @code{find} and @code{locate} is almost identical to
+that used in GNU Emacs.  The single difference is that in @code{find}
+and @code{locate}, a @samp{.} will match a newline character.
+
+Both @code{find} and @code{locate} provide an option which allows you
 to select an alternative regular expression syntax; for @code{find}
 this is the @samp{-regextype} option, and for @code{locate} this is
 the @samp{--regextype} option.
diff --git a/doc/regexprops.texi b/doc/regexprops.texi
index 8fee88ae..0229460e 100644
--- a/doc/regexprops.texi
+++ b/doc/regexprops.texi
@@ -11,15 +11,15 @@
 
 @menu
 * findutils-default regular expression syntax::
+* posix-awk regular expression syntax::
+* posix-basic regular expression syntax::
+* posix-egrep regular expression syntax::
+* posix-extended regular expression syntax::
 * awk regular expression syntax::
 * egrep regular expression syntax::
 * emacs regular expression syntax::
 * gnu-awk regular expression syntax::
 * grep regular expression syntax::
-* posix-awk regular expression syntax::
-* posix-basic regular expression syntax::
-* posix-egrep regular expression syntax::
-* posix-extended regular expression syntax::
 @end menu
 
 @node findutils-default regular expression syntax
@@ -44,6 +44,7 @@ matches a @samp{?}.
 
 Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are ignored.  Within square brackets, @samp{\} is taken literally.  Character classes are not supported, so for example you would need to use @samp{[0-9]} instead of @samp{[[:digit:]]}.
 
+
 GNU extensions are supported:
 @enumerate
 
@@ -73,11 +74,10 @@ The alternation operator is @samp{\|}.
 The character @samp{^} only represents the beginning of a string when it appears:
 @enumerate
 
-@item
-At the beginning of a regular expression
+@item At the beginning of a regular expression
+
+@item After an open-group, signified by @samp{\(}
 
-@item After an open-group, signified by
-@samp{\(}
 
 @item After the alternation operator @samp{\|}
 
@@ -89,8 +89,8 @@ The character @samp{$} only represents the end of a string when it appears:
 
 @item At the end of a regular expression
 
-@item Before a close-group, signified by
-@samp{\)}
+@item Before a close-group, signified by @samp{\)}
+
 @item Before the alternation operator @samp{\|}
 
 @end enumerate
@@ -101,8 +101,8 @@ The character @samp{$} only represents the end of a string when it appears:
 
 @item At the beginning of a regular expression
 
-@item After an open-group, signified by
-@samp{\(}
+@item After an open-group, signified by @samp{\(}
+
 @item After the alternation operator @samp{\|}
 
 @end enumerate
@@ -113,8 +113,8 @@ The character @samp{$} only represents the end of a string when it appears:
 The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
 
 
-@node awk regular expression syntax
-@subsection @samp{awk} regular expression syntax
+@node posix-awk regular expression syntax
+@subsection @samp{posix-awk} regular expression syntax
 
 
 The character @samp{.} matches any single character except the null character.
@@ -135,53 +135,57 @@ matches a @samp{?}.
 
 Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} can be used to quote the following character.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
 
+
 GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
 
-Grouping is performed with parentheses @samp{()}.  An unmatched @samp{)} matches just itself.  A backslash followed by a digit matches that digit.
+
+Grouping is performed with parentheses @samp{()}.  An unmatched @samp{)} matches just itself.  A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
 
 The alternation operator is @samp{|}.
 
 The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
 
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
+
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
 @enumerate
 
 @item At the beginning of a regular expression
 
-@item After an open-group, signified by
-@samp{(}
+@item After an open-group, signified by @samp{(}
+
 @item After the alternation operator @samp{|}
 
 @end enumerate
 
 
-
+Intervals are specified by @samp{@{} and @samp{@}}.
+Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
 
 The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
 
 
-@node egrep regular expression syntax
-@subsection @samp{egrep} regular expression syntax
+@node posix-basic regular expression syntax
+@subsection @samp{posix-basic} regular expression syntax
 
 
-The character @samp{.} matches any single character.
+The character @samp{.} matches any single character except the null character.
 
 
 @table @samp
 
-@item +
-indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
-@item ?
-indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
 @item \+
-matches a @samp{+}
+indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
 @item \?
-matches a @samp{?}.
+indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
+@item + and ?
+match themselves.
+
 @end table
 
 
 Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} is taken literally.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
 
+
 GNU extensions are supported:
 @enumerate
 
@@ -204,24 +208,59 @@ GNU extensions are supported:
 @end enumerate
 
 
-Grouping is performed with parentheses @samp{()}.  An unmatched @samp{)} matches just itself.  A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
+Grouping is performed with backslashes followed by parentheses @samp{\(}, @samp{\)}.  A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis @samp{\(}.
 
-The alternation operator is @samp{|}.
+The alternation operator is @samp{\|}.
 
-The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
+The character @samp{^} only represents the beginning of a string when it appears:
+@enumerate
 
-The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.
+@item At the beginning of a regular expression
+
+@item After an open-group, signified by @samp{\(}
+
+
+@item After the alternation operator @samp{\|}
+
+@end enumerate
+
+
+The character @samp{$} only represents the end of a string when it appears:
+@enumerate
+
+@item At the end of a regular expression
+
+@item Before a close-group, signified by @samp{\)}
+
+@item Before the alternation operator @samp{\|}
+
+@end enumerate
+
+
+@samp{\*}, @samp{\+} and @samp{\?} are special at any point in a regular expression except:
+@enumerate
+
+@item At the beginning of a regular expression
+
+@item After an open-group, signified by @samp{\(}
+
+@item After the alternation operator @samp{\|}
+
+@end enumerate
+
+
+Intervals are specified by @samp{\@{} and @samp{\@}}.
+Invalid intervals such as @samp{a\@{1z} are not accepted.
 
-Intervals are specified by @samp{@{} and @samp{@}}.  Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
 
 The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
 
 
-@node emacs regular expression syntax
-@subsection @samp{emacs} regular expression syntax
+@node posix-egrep regular expression syntax
+@subsection @samp{posix-egrep} regular expression syntax
 
 
-The character @samp{.} matches any single character except newline.
+The character @samp{.} matches any single character.
 
 
 @table @samp
@@ -237,7 +276,8 @@ matches a @samp{?}.
 @end table
 
 
-Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are ignored.  Within square brackets, @samp{\} is taken literally.  Character classes are not supported, so for example you would need to use @samp{[0-9]} instead of @samp{[[:digit:]]}.
+Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} is taken literally.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
+
 
 GNU extensions are supported:
 @enumerate
@@ -261,58 +301,27 @@ GNU extensions are supported:
 @end enumerate
 
 
-Grouping is performed with backslashes followed by parentheses @samp{\(}, @samp{\)}.  A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis @samp{\(}.
-
-The alternation operator is @samp{\|}.
-
-The character @samp{^} only represents the beginning of a string when it appears:
-@enumerate
-
-@item
-At the beginning of a regular expression
-
-@item After an open-group, signified by
-@samp{\(}
-
-@item After the alternation operator @samp{\|}
-
-@end enumerate
-
-
-The character @samp{$} only represents the end of a string when it appears:
-@enumerate
-
-@item At the end of a regular expression
-
-@item Before a close-group, signified by
-@samp{\)}
-@item Before the alternation operator @samp{\|}
-
-@end enumerate
-
-
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
-@enumerate
+Grouping is performed with parentheses @samp{()}.  An unmatched @samp{)} matches just itself.  A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
 
-@item At the beginning of a regular expression
+The alternation operator is @samp{|}.
 
-@item After an open-group, signified by
-@samp{\(}
-@item After the alternation operator @samp{\|}
+The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
 
-@end enumerate
 
+The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.
 
 
+Intervals are specified by @samp{@{} and @samp{@}}.
+Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
 
 The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
 
 
-@node gnu-awk regular expression syntax
-@subsection @samp{gnu-awk} regular expression syntax
+@node posix-extended regular expression syntax
+@subsection @samp{posix-extended} regular expression syntax
 
 
-The character @samp{.} matches any single character.
+The character @samp{.} matches any single character except the null character.
 
 
 @table @samp
@@ -328,7 +337,8 @@ matches a @samp{?}.
 @end table
 
 
-Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} can be used to quote the following character.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
+Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} is taken literally.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
+
 
 GNU extensions are supported:
 @enumerate
@@ -358,42 +368,101 @@ The alternation operator is @samp{|}.
 
 The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
 
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
+
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
 @enumerate
 
 @item At the beginning of a regular expression
 
-@item After an open-group, signified by
-@samp{(}
+@item After an open-group, signified by @samp{(}
+
 @item After the alternation operator @samp{|}
 
 @end enumerate
 
 
-Intervals are specified by @samp{@{} and @samp{@}}.  Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
+Intervals are specified by @samp{@{} and @samp{@}}.
+Invalid intervals such as @samp{a@{1z} are not accepted.
+
 
 The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
 
 
-@node grep regular expression syntax
-@subsection @samp{grep} regular expression syntax
+@node awk regular expression syntax
+@subsection @samp{awk} regular expression syntax
 
 
-The character @samp{.} matches any single character.
+The character @samp{.} matches any single character except the null character.
 
 
 @table @samp
 
-@item \+
+@item +
 indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
+@item ?
+indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
+@item \+
+matches a @samp{+}
 @item \?
+matches a @samp{?}.
+@end table
+
+
+Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} can be used to quote the following character.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
+
+
+GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
+
+
+Grouping is performed with parentheses @samp{()}.  An unmatched @samp{)} matches just itself.  A backslash followed by a digit matches that digit.
+
+The alternation operator is @samp{|}.
+
+The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
+
+
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
+@enumerate
+
+@item At the beginning of a regular expression
+
+@item After an open-group, signified by @samp{(}
+
+@item After the alternation operator @samp{|}
+
+@end enumerate
+
+
+
+
+The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
+
+
+@node egrep regular expression syntax
+@subsection @samp{egrep} regular expression syntax
+This is a synonym for posix-egrep.
+@node emacs regular expression syntax
+@subsection @samp{emacs} regular expression syntax
+
+
+The character @samp{.} matches any single character except newline.
+
+
+@table @samp
+
+@item +
+indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
+@item ?
 indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
-@item + and ?
-match themselves.
+@item \+
+matches a @samp{+}
+@item \?
+matches a @samp{?}.
 @end table
 
 
-Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} is taken literally.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
+Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are ignored.  Within square brackets, @samp{\} is taken literally.  Character classes are not supported, so for example you would need to use @samp{[0-9]} instead of @samp{[[:digit:]]}.
+
 
 GNU extensions are supported:
 @enumerate
@@ -424,13 +493,10 @@ The alternation operator is @samp{\|}.
 The character @samp{^} only represents the beginning of a string when it appears:
 @enumerate
 
-@item
-At the beginning of a regular expression
+@item At the beginning of a regular expression
 
-@item After an open-group, signified by
-@samp{\(}
+@item After an open-group, signified by @samp{\(}
 
-@item After a newline
 
 @item After the alternation operator @samp{\|}
 
@@ -442,39 +508,35 @@ The character @samp{$} only represents the end of a string when it appears:
 
 @item At the end of a regular expression
 
-@item Before a close-group, signified by
-@samp{\)}
-@item Before a newline
+@item Before a close-group, signified by @samp{\)}
 
 @item Before the alternation operator @samp{\|}
 
 @end enumerate
 
 
-@samp{\*}, @samp{\+} and @samp{\?} are special at any point in a regular expression except:
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
 @enumerate
 
 @item At the beginning of a regular expression
 
-@item After an open-group, signified by
-@samp{\(}
-@item After a newline
+@item After an open-group, signified by @samp{\(}
 
 @item After the alternation operator @samp{\|}
 
 @end enumerate
 
 
-Intervals are specified by @samp{\@{} and @samp{\@}}.  Invalid intervals such as @samp{a\@{1z} are not accepted.
+
 
 The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
 
 
-@node posix-awk regular expression syntax
-@subsection @samp{posix-awk} regular expression syntax
+@node gnu-awk regular expression syntax
+@subsection @samp{gnu-awk} regular expression syntax
 
 
-The character @samp{.} matches any single character except the null character.
+The character @samp{.} matches any single character.
 
 
 @table @samp
@@ -492,7 +554,28 @@ matches a @samp{?}.
 
 Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} can be used to quote the following character.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
 
-GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
+
+GNU extensions are supported:
+@enumerate
+
+@item @samp{\w} matches a character within a word
+
+@item @samp{\W} matches a character which is not within a word
+
+@item @samp{\<} matches the beginning of a word
+
+@item @samp{\>} matches the end of a word
+
+@item @samp{\b} matches a word boundary
+
+@item @samp{\B} matches characters which are not a word boundary
+
+@item @samp{\`} matches the beginning of the whole input
+
+@item @samp{\'} matches the end of the whole input
+
+@end enumerate
+
 
 Grouping is performed with parentheses @samp{()}.  An unmatched @samp{)} matches just itself.  A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
 
@@ -500,51 +583,47 @@ The alternation operator is @samp{|}.
 
 The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
 
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
+
+@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
 @enumerate
 
 @item At the beginning of a regular expression
 
-@item After an open-group, signified by
-@samp{(}
+@item After an open-group, signified by @samp{(}
+
 @item After the alternation operator @samp{|}
 
 @end enumerate
 
 
-Intervals are specified by @samp{@{} and @samp{@}}.  Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
+Intervals are specified by @samp{@{} and @samp{@}}.
+Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
 
 The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
 
 
-@node posix-basic regular expression syntax
-@subsection @samp{posix-basic} regular expression syntax
-This is a synonym for ed.
-@node posix-egrep regular expression syntax
-@subsection @samp{posix-egrep} regular expression syntax
-This is a synonym for egrep.
-@node posix-extended regular expression syntax
-@subsection @samp{posix-extended} regular expression syntax
+@node grep regular expression syntax
+@subsection @samp{grep} regular expression syntax
 
 
-The character @samp{.} matches any single character except the null character.
+The character @samp{.} matches any single character.
 
 
 @table @samp
 
-@item +
-indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
-@item ?
-indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
 @item \+
-matches a @samp{+}
+indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
 @item \?
-matches a @samp{?}.
+indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
+@item + and ?
+match themselves.
+
 @end table
 
 
 Bracket expressions are used to match ranges of characters.  Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid.  Within square brackets, @samp{\} is taken literally.  Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
 
+
 GNU extensions are supported:
 @enumerate
 
@@ -567,25 +646,56 @@ GNU extensions are supported:
 @end enumerate
 
 
-Grouping is performed with parentheses @samp{()}.  An unmatched @samp{)} matches just itself.  A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
+Grouping is performed with backslashes followed by parentheses @samp{\(}, @samp{\)}.  A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis @samp{\(}.
 
-The alternation operator is @samp{|}.
+The alternation operator is @samp{\|}.
 
-The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
+The character @samp{^} only represents the beginning of a string when it appears:
+@enumerate
 
-@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
+@item At the beginning of a regular expression
+
+@item After an open-group, signified by @samp{\(}
+
+
+@item After a newline
+
+@item After the alternation operator @samp{\|}
+
+@end enumerate
+
+
+The character @samp{$} only represents the end of a string when it appears:
+@enumerate
+
+@item At the end of a regular expression
+
+@item Before a close-group, signified by @samp{\)}
+
+@item Before a newline
+
+@item Before the alternation operator @samp{\|}
+
+@end enumerate
+
+
+@samp{\*}, @samp{\+} and @samp{\?} are special at any point in a regular expression except:
 @enumerate
 
 @item At the beginning of a regular expression
 
-@item After an open-group, signified by
-@samp{(}
-@item After the alternation operator @samp{|}
+@item After an open-group, signified by @samp{\(}
+
+@item After a newline
+
+@item After the alternation operator @samp{\|}
 
 @end enumerate
 
 
-Intervals are specified by @samp{@{} and @samp{@}}.  Invalid intervals such as @samp{a@{1z} are not accepted.
+Intervals are specified by @samp{\@{} and @samp{\@}}.
+Invalid intervals such as @samp{a\@{1z} are not accepted.
+
 
 The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
 
diff --git a/find/find.1 b/find/find.1
index 06ddfa5b..8b1320c1 100644
--- a/find/find.1
+++ b/find/find.1
@@ -879,8 +879,8 @@ on the whole path, not a search.  For example, to match a file named
 `./fubar3', you can use the regular expression `.*bar.' or `.*b.*3',
 but not `f.*r3'.  The regular expressions understood by
 .B find
-are by default Emacs Regular Expressions, but this can be
-changed with the
+are by default Emacs Regular Expressions (except that `.' matches
+newline), but this can be changed with the
 .B \-regextype
 option.
 
diff --git a/lib/regexprops.c b/lib/regexprops.c
index fcbdd5db..b20b4a38 100644
--- a/lib/regexprops.c
+++ b/lib/regexprops.c
@@ -78,8 +78,12 @@ directive (const char *s)
 static void
 comment (const char *s)
 {
-  directive ("@c ");
-  literal (s);
+  directive ("@c");
+  if (s[0])
+    {
+      literal (" ");
+      literal (s);
+    }
   newline ();
 }
 
@@ -175,7 +179,7 @@ describe_regex_syntax (int options)
 
       content (" the null character");
     }
-  content (".  ");
+  content (".");
   newpara ();
 
   if (!(options & RE_LIMITED_OPS))
@@ -185,25 +189,25 @@ describe_regex_syntax (int options)
 	{
 	  enum_item ("\\+");
 	  content ("indicates that the regular expression should match one"
-		   " or more occurrences of the previous atom or regexp.  ");
+		   " or more occurrences of the previous atom or regexp.");
 	  enum_item ("\\?");
 	  content ("indicates that the regular expression should match zero"
-		   " or one occurrence of the previous atom or regexp.  ");
-	  enum_item ("+ and ? ");
-	  content ("match themselves.  ");
+		   " or one occurrence of the previous atom or regexp.");
+	  enum_item ("+ and ?");
+	  content ("match themselves.\n");
 	}
       else
 	{
 	  enum_item ("+");
 	  content ("indicates that the regular expression should match one"
-		   " or more occurrences of the previous atom or regexp.  ");
+		   " or more occurrences of the previous atom or regexp.");
 	  enum_item ("?");
 	  content ("indicates that the regular expression should match zero"
-		   " or one occurrence of the previous atom or regexp.  ");
+		   " or one occurrence of the previous atom or regexp.");
 	  enum_item ("\\+");
 	  literal ("matches a @samp{+}");
 	  enum_item ("\\?");
-	  literal ("matches a @samp{?}.  ");
+	  literal ("matches a @samp{?}.");
 	}
       endtable ();
     }
@@ -226,15 +230,15 @@ describe_regex_syntax (int options)
 
   if (options & RE_CHAR_CLASSES)
     content ("Character classes are supported; for example "
-	     "@samp{[[:digit:]]} will match a single decimal digit.  ");
+	     "@samp{[[:digit:]]} will match a single decimal digit.\n");
   else
     literal ("Character classes are not supported, so for example "
 	     "you would need to use @samp{[0-9]} "
-	     "instead of @samp{[[:digit:]]}.  ");
+	     "instead of @samp{[[:digit:]]}.\n");
 
   if (options & RE_HAT_LISTS_NOT_NEWLINE)
     {
-      literal ("Non-matching lists @samp{[^@dots{}]} do not ever match newline.  ");
+      literal ("Non-matching lists @samp{[^@dots{}]} do not ever match newline.\n");
     }
   newpara ();
   if (options & RE_NO_GNU_OPS)
@@ -242,7 +246,7 @@ describe_regex_syntax (int options)
       content ("GNU extensions are not supported and so "
 	       "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
 	       "match "
-	       "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.  ");
+	       "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.\n");
     }
   else
     {
@@ -276,7 +280,7 @@ describe_regex_syntax (int options)
 
   if (options & RE_NO_BK_REFS)
     {
-      content ("A backslash followed by a digit matches that digit.  ");
+      content ("A backslash followed by a digit matches that digit.");
     }
   else
     {
@@ -285,7 +289,7 @@ describe_regex_syntax (int options)
 	literal ("@samp{(}");
       else
 	literal ("@samp{\\(}");
-      content (".  ");
+      content (".");
     }
 
 
@@ -293,29 +297,28 @@ describe_regex_syntax (int options)
   if (!(options & RE_LIMITED_OPS))
     {
       if (options & RE_NO_BK_VBAR)
-	literal ("The alternation operator is @samp{|}.  ");
+	literal ("The alternation operator is @samp{|}.");
       else
-	literal ("The alternation operator is @samp{\\|}. ");
+	literal ("The alternation operator is @samp{\\|}.");
     }
   newpara ();
 
   if (options & RE_CONTEXT_INDEP_ANCHORS)
     {
-      literal ("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.  ");
+      literal ("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.\n");
     }
   else
     {
       literal ("The character @samp{^} only represents the beginning of a string when it appears:");
       beginenum ();
-      enum_item ("\nAt the beginning of a regular expression");
-      enum_item ("After an open-group, signified by ");
+      enum_item ("At the beginning of a regular expression");
       if (options & RE_NO_BK_PARENS)
 	{
-	  literal ("@samp{(}");
+	  enum_item ("After an open-group, signified by @samp{(}");
 	}
       else
 	{
-	  literal ("@samp{\\(}");
+	  enum_item ("After an open-group, signified by @samp{\\(}");
 	}
       newline ();
       if (!(options & RE_LIMITED_OPS))
@@ -334,14 +337,13 @@ describe_regex_syntax (int options)
       literal ("The character @samp{$} only represents the end of a string when it appears:");
       beginenum ();
       enum_item ("At the end of a regular expression");
-      enum_item ("Before a close-group, signified by ");
       if (options & RE_NO_BK_PARENS)
 	{
-	  literal ("@samp{)}");
+	  enum_item ("Before a close-group, signified by @samp{)}");
 	}
       else
 	{
-	  literal ("@samp{\\)}");
+	  enum_item ("Before a close-group, signified by @samp{\\)}");
 	}
       if (!(options & RE_LIMITED_OPS))
 	{
@@ -361,7 +363,7 @@ describe_regex_syntax (int options)
       if ((options & RE_CONTEXT_INDEP_OPS)
 	  && !(options & RE_CONTEXT_INVALID_OPS))
 	{
-	  literal ("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.  ");
+	  literal ("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.\n");
 	}
       else
 	{
@@ -381,14 +383,13 @@ describe_regex_syntax (int options)
 
 	  beginenum ();
 	  enum_item ("At the beginning of a regular expression");
-	  enum_item ("After an open-group, signified by ");
 	  if (options & RE_NO_BK_PARENS)
 	    {
-	      literal ("@samp{(}");
+	      enum_item ("After an open-group, signified by @samp{(}");
 	    }
 	  else
 	    {
-	      literal ("@samp{\\(}");
+	      enum_item ("After an open-group, signified by @samp{\\(}");
 	    }
 	  if (!(options & RE_LIMITED_OPS))
 	    {
@@ -410,39 +411,38 @@ describe_regex_syntax (int options)
     {
       if (options & RE_NO_BK_BRACES)
 	{
-	  literal ("Intervals are specified by @samp{@{} and @samp{@}}.  ");
+	  literal ("Intervals are specified by @samp{@{} and @samp{@}}.\n");
 	  if (options & RE_INVALID_INTERVAL_ORD)
 	    {
 	      literal ("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
 	    }
 	  else
 	    {
-	      literal ("Invalid intervals such as @samp{a@{1z} are not accepted.  ");
+	      literal ("Invalid intervals such as @samp{a@{1z} are not accepted.\n");
 	    }
 	}
       else
 	{
-	  literal ("Intervals are specified by @samp{\\@{} and @samp{\\@}}.  ");
+	  literal ("Intervals are specified by @samp{\\@{} and @samp{\\@}}.\n");
 	  if (options & RE_INVALID_INTERVAL_ORD)
 	    {
 	      literal ("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
 	    }
 	  else
 	    {
-	      literal ("Invalid intervals such as @samp{a\\@{1z} are not accepted.  ");
+	      literal ("Invalid intervals such as @samp{a\\@{1z} are not accepted.\n");
 	    }
 	}
-
     }
 
   newpara ();
   if (options & RE_NO_POSIX_BACKTRACKING)
     {
-      content ("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match.  ");
+      content ("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match.");
     }
   else
     {
-      content ("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.  ");
+      content ("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.");
     }
   newpara ();
 }
diff --git a/lib/regextype.c b/lib/regextype.c
index 8a7347dc..89416ebd 100644
--- a/lib/regextype.c
+++ b/lib/regextype.c
@@ -56,17 +56,19 @@ struct tagRegexTypeMap
 struct tagRegexTypeMap regex_map[] =
   {
    { "findutils-default",     CONTEXT_FINDUTILS, RE_SYNTAX_EMACS|RE_DOT_NEWLINE  },
+
+   { "posix-awk",             CONTEXT_ALL,       RE_SYNTAX_POSIX_AWK             },
+   { "posix-basic",           CONTEXT_ALL,       RE_SYNTAX_POSIX_BASIC           },
+   { "posix-egrep",           CONTEXT_ALL,       RE_SYNTAX_POSIX_EGREP           },
+   { "posix-extended",        CONTEXT_ALL,       RE_SYNTAX_POSIX_EXTENDED        },
+   { "posix-minimal-basic",   CONTEXT_GENERIC,   RE_SYNTAX_POSIX_MINIMAL_BASIC   },
+
    { "awk",                   CONTEXT_ALL,       RE_SYNTAX_AWK                   },
-   { "egrep",                 CONTEXT_ALL,       RE_SYNTAX_EGREP                 },
    { "ed",                    CONTEXT_GENERIC,   RE_SYNTAX_ED                    },
+   { "egrep",                 CONTEXT_ALL,       RE_SYNTAX_EGREP                 },
    { "emacs",                 CONTEXT_ALL,       RE_SYNTAX_EMACS                 },
    { "gnu-awk",               CONTEXT_ALL,       RE_SYNTAX_GNU_AWK               },
    { "grep",                  CONTEXT_ALL,       RE_SYNTAX_GREP                  },
-   { "posix-awk",             CONTEXT_ALL,       RE_SYNTAX_POSIX_AWK             },
-   { "posix-basic",           CONTEXT_ALL,       RE_SYNTAX_POSIX_BASIC           },
-   { "posix-egrep",           CONTEXT_ALL,       RE_SYNTAX_POSIX_EGREP           },
-   { "posix-extended",        CONTEXT_ALL,       RE_SYNTAX_POSIX_EXTENDED        },
-   { "posix-minimal-basic",   CONTEXT_GENERIC,   RE_SYNTAX_POSIX_MINIMAL_BASIC    },
    { "sed",                   CONTEXT_GENERIC,   RE_SYNTAX_SED                   },
    /*    ,{ "posix-common",   CONTEXT_GENERIC,  _RE_SYNTAX_POSIX_COMMON   } */
   };
-- 
2.19.1