Merge branch 'maint'

* maint:
  tag.c: whitespace breakages fix
  Fix whitespace issue in object.c
  t5505: add missing &&
diff --git a/.gitignore b/.gitignore
index fcdd822..20560b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,12 +166,17 @@
 /test-dump-cache-tree
 /test-genrandom
 /test-index-version
+/test-line-buffer
 /test-match-trees
+/test-obj-pool
 /test-parse-options
 /test-path-utils
 /test-run-command
 /test-sha1
 /test-sigchain
+/test-string-pool
+/test-svn-fe
+/test-treap
 /common-cmds.h
 *.tar.gz
 *.dsc
@@ -181,6 +186,12 @@
 *.[aos]
 *.py[co]
 .depend/
+*.gcda
+*.gcno
+*.gcov
+/coverage-untested-functions
+/cover_db/
+/cover_db_html/
 *+
 /config.mak
 /autom4te.cache
diff --git a/Documentation/Makefile b/Documentation/Makefile
index a4c4063..e117bc4 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -279,7 +279,7 @@
 XSLT = docbook.xsl
 XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
 
-user-manual.html: user-manual.xml
+user-manual.html: user-manual.xml $(XSLT)
 	$(QUIET_XSLTPROC)$(RM) $@+ $@ && \
 	xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \
 	mv $@+ $@
diff --git a/Documentation/RelNotes-1.7.3.txt b/Documentation/RelNotes-1.7.3.txt
new file mode 100644
index 0000000..3512bbb
--- /dev/null
+++ b/Documentation/RelNotes-1.7.3.txt
@@ -0,0 +1,73 @@
+Git v1.7.3 Release Notes (draft)
+================================
+
+Updates since v1.7.2
+--------------------
+
+ * git-gui got various updates and a new maintainer, Pat Thoyts.
+
+ * Gitweb allows its configuration to change per each request; it used to
+   read the configuration once upon startup.
+
+ * When git finds a corrupt object, it now reports the file that contains
+   it.
+
+ * "git checkout -B <it>" is a shorter way to say "git branch -f <it>"
+   followed by "git checkout <it>".
+
+ * When "git checkout" or "git merge" refuse to proceed in order to
+   protect local modification to your working tree, they used to stop
+   after showing just one path that might be lost.  They now show all,
+   in a format that is easier to read.
+
+ * "git clean" learned "-e" ("--exclude") option.
+
+ * Hunk headers produced for C# files by "git diff" and friends show more
+   relevant context than before.
+
+ * diff.ignoresubmodules configuration variable can be used to squelch the
+   differences in submodules reported when running commands (e.g. "diff",
+   "status", etc.) at the superproject level.
+
+ * http.useragent configuration can be used to lie who you are to your
+   restrictive firewall.
+
+ * "git rebase --strategy <s>" learned "-X" option to pass extra options
+   that are understood by the chosen merge strategy.
+
+ * "git rebase -i" learned "exec" that you can insert into the insn sheet
+   to run a command between its steps.
+
+ * "git rebase" between branches that have many binary changes that do
+   not conflict should be faster.
+
+ * "git rebase -i" peeks into rebase.autosquash configuration and acts as
+   if you gave --autosquash from the command line.
+
+
+Also contains various documentation updates.
+
+
+Fixes since v1.7.2
+------------------
+
+All of the fixes in v1.7.2.X maintenance series are included in this
+release, unless otherwise noted.
+
+ * "git merge -s recursive" (which is the default) did not handle cases
+   where a directory becomes a file (or vice versa) very well.
+
+ * "git fetch" and friends were accidentally broken for url with "+" in
+   its path, e.g. "git://git.gnome.org/gtk+".
+
+---
+exec >/var/tmp/1
+echo O=$(git describe master)
+O=v1.7.2.2-268-g7e42332
+O=v1.7.2
+git shortlog --no-merges $O..master ^maint
+exit 0
+
+What did we want to do with...
+
+1e3d411 (Enable custom schemes for column colors in the graph API, 2010-07-13)
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index eb53e06..ece3c77 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -7,17 +7,16 @@
 	  before committing
 	- do not check in commented out code or unneeded files
 	- the first line of the commit message should be a short
-	  description and should skip the full stop
+	  description (50 characters is the soft limit, see DISCUSSION
+	  in git-commit(1)), and should skip the full stop
 	- the body should provide a meaningful commit message, which:
 		- uses the imperative, present tense: "change",
 		  not "changed" or "changes".
 		- includes motivation for the change, and contrasts
 		  its implementation with previous behaviour
-	- if you want your work included in git.git, add a
-	  "Signed-off-by: Your Name <you@example.com>" line to the
-	  commit message (or just use the option "-s" when
-	  committing) to confirm that you agree to the Developer's
-	  Certificate of Origin
+	- add a "Signed-off-by: Your Name <you@example.com>" line to the
+	  commit message (or just use the option "-s" when committing)
+	  to confirm that you agree to the Developer's Certificate of Origin
 	- make sure that you have tests for the bug you are fixing
 	- make sure that the test suite passes after your commit
 
diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf
index 87a90f2..aea8627 100644
--- a/Documentation/asciidoc.conf
+++ b/Documentation/asciidoc.conf
@@ -16,8 +16,11 @@
 caret=&#94;
 startsb=&#91;
 endsb=&#93;
+backslash=&#92;
 tilde=&#126;
+apostrophe=&#39;
 backtick=&#96;
+litdd=&#45;&#45;
 
 ifdef::backend-docbook[]
 [linkgit-inlinemacro]
diff --git a/Documentation/config.txt b/Documentation/config.txt
index dc4e83b..cda6721 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -826,6 +826,12 @@
 	will enable basic rename detection.  If set to "copies" or
 	"copy", it will detect copies, as well.
 
+diff.ignoreSubmodules::
+	Sets the default value of --ignore-submodules. Note that this
+	affects only 'git diff' Porcelain, and not lower level 'diff'
+	commands such as 'git diff-files'. 'git checkout' also honors
+	this setting when reporting uncommitted changes.
+
 diff.suppressBlankEmpty::
 	A boolean to inhibit the standard behavior of printing a space
 	before each empty output line. Defaults to false.
@@ -1243,6 +1249,15 @@
 	support EPSV mode. Can be overridden by the 'GIT_CURL_FTP_NO_EPSV'
 	environment variable. Default is false (curl will use EPSV).
 
+http.useragent::
+	The HTTP USER_AGENT string presented to an HTTP server.  The default
+	value represents the version of the client git such as git/1.7.1.
+	This option allows you to override this value to a more common value
+	such as Mozilla/4.0.  This may be necessary, for instance, if
+	connecting through a firewall that restricts HTTP connections to a set
+	of common USER_AGENT strings (but not including those like git/1.7.1).
+	Can be overridden by the 'GIT_HTTP_USER_AGENT' environment variable.
+
 i18n.commitEncoding::
 	Character encoding the commit messages are stored in; git itself
 	does not care per se, but this information is necessary e.g. when
@@ -1291,10 +1306,11 @@
 	ignored if portable keystroke input is not available.
 
 log.date::
-	Set default date-time mode for the log command. Setting log.date
-	value is similar to using 'git log'\'s --date option. The value is one of the
-	following alternatives: {relative,local,default,iso,rfc,short}.
-	See linkgit:git-log[1].
+	Set the default date-time mode for the 'log' command.
+	Setting a value for log.date is similar to using 'git log''s
+	`\--date` option.  Possible values are `relative`, `local`,
+	`default`, `iso`, `rfc`, and `short`; see linkgit:git-log[1]
+	for details.
 
 log.decorate::
 	Print out the ref names of any commits that are shown by the log
@@ -1535,6 +1551,9 @@
 	Whether to show a diffstat of what changed upstream since the last
 	rebase. False by default.
 
+rebase.autosquash::
+	If set to true enable '--autosquash' option by default.
+
 receive.autogc::
 	By default, git-receive-pack will run "git-gc --auto" after
 	receiving data from git-push and updating refs.  You can stop
@@ -1751,6 +1770,19 @@
 	URL and other values found in the `.gitmodules` file.  See
 	linkgit:git-submodule[1] and linkgit:gitmodules[5] for details.
 
+submodule.<name>.ignore::
+	Defines under what circumstances "git status" and the diff family show
+	a submodule as modified. When set to "all", it will never be considered
+	modified, "dirty" will ignore all changes to the submodules work tree and
+	takes only differences between the HEAD of the submodule and the commit
+	recorded in the superproject into account. "untracked" will additionally
+	let submodules with modified tracked files in their work tree show up.
+	Using "none" (the default when this option is not set) also shows
+	submodules that have untracked files in their work tree as changed.
+	This setting overrides any setting made in .gitmodules for this submodule,
+	both settings can be overridden on the command line by using the
+	"--ignore-submodules" option.
+
 tar.umask::
 	This variable can be used to restrict the permission bits of
 	tar archive entries.  The default is 0002, which turns off the
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index eecedaa..4656a97 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -355,7 +355,11 @@
 
 --ignore-submodules[=<when>]::
 	Ignore changes to submodules in the diff generation. <when> can be
-	either "untracked", "dirty" or "all", which is the default. When
+	either "none", "untracked", "dirty" or "all", which is the default
+	Using "none" will consider the submodule modified when it either contains
+	untracked or modified files or its HEAD differs from the commit recorded
+	in the superproject and can be used to override any settings of the
+	'ignore' option in linkgit:git-config[1] or linkgit:gitmodules[5]. When
 	"untracked" is used submodules are not considered dirty when they only
 	contain untracked content (but they are still scanned for modified
 	content). Using "dirty" ignores all changes to the work tree of submodules,
diff --git a/Documentation/docbook.xsl b/Documentation/docbook.xsl
index 9a6912c..da8b05b 100644
--- a/Documentation/docbook.xsl
+++ b/Documentation/docbook.xsl
@@ -1,5 +1,8 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 		version='1.0'>
  <xsl:import href="http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl"/>
- <xsl:output method="html" encoding="UTF-8" indent="no" />
+ <xsl:output method="html"
+     encoding="UTF-8" indent="no"
+     doctype-public="-//W3C//DTD HTML 4.01//EN"
+     doctype-system="http://www.w3.org/TR/html4/strict.dtd" />
 </xsl:stylesheet>
diff --git a/Documentation/git-add.txt b/Documentation/git-add.txt
index e22a62f..73378b2 100644
--- a/Documentation/git-add.txt
+++ b/Documentation/git-add.txt
@@ -157,14 +157,14 @@
 EXAMPLES
 --------
 
-* Adds content from all `\*.txt` files under `Documentation` directory
+* Adds content from all `*.txt` files under `Documentation` directory
 and its subdirectories:
 +
 ------------
 $ git add Documentation/\*.txt
 ------------
 +
-Note that the asterisk `\*` is quoted from the shell in this
+Note that the asterisk `*` is quoted from the shell in this
 example; this lets the command include the files from
 subdirectories of `Documentation/` directory.
 
@@ -220,7 +220,7 @@
 difference between indexed copy and the working tree
 version (if the working tree version were also different,
 'binary' would have been shown in place of 'nothing').  The
-other file, git-add--interactive.perl, has 403 lines added
+other file, git-add{litdd}interactive.perl, has 403 lines added
 and 35 lines deleted if you commit what is in the index, but
 working tree file has further modifications (one addition and
 one deletion).
diff --git a/Documentation/git-archimport.txt b/Documentation/git-archimport.txt
index 4d4325f..4f358c8 100644
--- a/Documentation/git-archimport.txt
+++ b/Documentation/git-archimport.txt
@@ -44,7 +44,7 @@
 manually.  To do so, write a git branch name after each <archive/branch>
 parameter, separated by a colon.  This way, you can shorten the Arch
 branch names and convert Arch jargon to git jargon, for example mapping a
-"PROJECT--devo--VERSION" branch to "master".
+"PROJECT{litdd}devo{litdd}VERSION" branch to "master".
 
 Associating multiple Arch branches to one git branch is possible; the
 result will make the most sense only if no commits are made to the first
@@ -85,8 +85,8 @@
 -o::
 	Use this for compatibility with old-style branch names used by
 	earlier versions of 'git archimport'.  Old-style branch names
-	were category--branch, whereas new-style branch names are
-	archive,category--branch--version.  In both cases, names given
+	were category{litdd}branch, whereas new-style branch names are
+	archive,category{litdd}branch{litdd}version.  In both cases, names given
 	on the command-line will override the automatically-generated
 	ones.
 
diff --git a/Documentation/git-bisect-lk2009.txt b/Documentation/git-bisect-lk2009.txt
index efbe379..8a2ba37 100644
--- a/Documentation/git-bisect-lk2009.txt
+++ b/Documentation/git-bisect-lk2009.txt
@@ -873,7 +873,7 @@
 where c is the number of rounds of test (so a small constant) and b is
 the ratio of bug per commit (hopefully a small constant too).
 
-So of course it's much better as it's O(N \* T) vs O(N \* T \* M) if
+So of course it's much better as it's O(N * T) vs O(N * T * M) if
 you would test everything after each commit.
 
 This means that test suites are good to prevent some bugs from being
diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt
index a5ed8fb..38e59af 100644
--- a/Documentation/git-bundle.txt
+++ b/Documentation/git-bundle.txt
@@ -9,7 +9,7 @@
 SYNOPSIS
 --------
 [verse]
-'git bundle' create <file> <git-rev-list args>
+'git bundle' create <file> <git-rev-list-args>
 'git bundle' verify <file>
 'git bundle' list-heads <file> [refname...]
 'git bundle' unbundle <file> [refname...]
@@ -34,57 +34,58 @@
 -------
 
 create <file>::
-       Used to create a bundle named 'file'.  This requires the
-       'git rev-list' arguments to define the bundle contents.
+	Used to create a bundle named 'file'.  This requires the
+	'git-rev-list-args' arguments to define the bundle contents.
 
 verify <file>::
-       Used to check that a bundle file is valid and will apply
-       cleanly to the current repository.  This includes checks on the
-       bundle format itself as well as checking that the prerequisite
-       commits exist and are fully linked in the current repository.
-       'git bundle' prints a list of missing commits, if any, and exits
-       with a non-zero status.
+	Used to check that a bundle file is valid and will apply
+	cleanly to the current repository.  This includes checks on the
+	bundle format itself as well as checking that the prerequisite
+	commits exist and are fully linked in the current repository.
+	'git bundle' prints a list of missing commits, if any, and exits
+	with a non-zero status.
 
 list-heads <file>::
-       Lists the references defined in the bundle.  If followed by a
-       list of references, only references matching those given are
-       printed out.
+	Lists the references defined in the bundle.  If followed by a
+	list of references, only references matching those given are
+	printed out.
 
 unbundle <file>::
-       Passes the objects in the bundle to 'git index-pack'
-       for storage in the repository, then prints the names of all
-       defined references. If a list of references is given, only
-       references matching those in the list are printed. This command is
-       really plumbing, intended to be called only by 'git fetch'.
+	Passes the objects in the bundle to 'git index-pack'
+	for storage in the repository, then prints the names of all
+	defined references. If a list of references is given, only
+	references matching those in the list are printed. This command is
+	really plumbing, intended to be called only by 'git fetch'.
 
-[git-rev-list-args...]::
-       A list of arguments, acceptable to 'git rev-parse' and
-       'git rev-list', that specifies the specific objects and references
-       to transport.  For example, `master\~10..master` causes the
-       current master reference to be packaged along with all objects
-       added since its 10th ancestor commit.  There is no explicit
-       limit to the number of references and objects that may be
-       packaged.
+<git-rev-list-args>::
+	A list of arguments, acceptable to 'git rev-parse' and
+	'git rev-list' (and containg a named ref, see SPECIFYING REFERENCES
+	below), that specifies the specific objects and references
+	to transport.  For example, `master{tilde}10..master` causes the
+	current master reference to be packaged along with all objects
+	added since its 10th ancestor commit.  There is no explicit
+	limit to the number of references and objects that may be
+	packaged.
 
 
 [refname...]::
-       A list of references used to limit the references reported as
-       available. This is principally of use to 'git fetch', which
-       expects to receive only those references asked for and not
-       necessarily everything in the pack (in this case, 'git bundle' acts
-       like 'git fetch-pack').
+	A list of references used to limit the references reported as
+	available. This is principally of use to 'git fetch', which
+	expects to receive only those references asked for and not
+	necessarily everything in the pack (in this case, 'git bundle' acts
+	like 'git fetch-pack').
 
 SPECIFYING REFERENCES
 ---------------------
 
 'git bundle' will only package references that are shown by
 'git show-ref': this includes heads, tags, and remote heads.  References
-such as `master\~1` cannot be packaged, but are perfectly suitable for
+such as `master{tilde}1` cannot be packaged, but are perfectly suitable for
 defining the basis.  More than one reference may be packaged, and more
 than one basis can be specified.  The objects packaged are those not
 contained in the union of the given bases.  Each basis can be
-specified explicitly (e.g. `^master\~10`), or implicitly (e.g.
-`master\~10..master`, `--since=10.days.ago master`).
+specified explicitly (e.g. `^master{tilde}10`), or implicitly (e.g.
+`master{tilde}10..master`, `--since=10.days.ago master`).
 
 It is very important that the basis used be held by the destination.
 It is okay to err on the side of caution, causing the bundle file
@@ -154,7 +155,7 @@
 If you know up to what commit the intended recipient repository should
 have the necessary objects, you can use that knowledge to specify the
 basis, giving a cut-off point to limit the revisions and objects that go
-in the resulting bundle. The previous example used lastR2bundle tag
+in the resulting bundle. The previous example used the lastR2bundle tag
 for this purpose, but you can use any other options that you would give to
 the linkgit:git-log[1] command. Here are more examples:
 
@@ -194,7 +195,7 @@
 $ git fetch mybundle master:localRef
 ----------------
 
-You can also see what references it offers.
+You can also see what references it offers:
 
 ----------------
 $ git ls-remote mybundle
diff --git a/Documentation/git-checkout-index.txt b/Documentation/git-checkout-index.txt
index d6aa6e1..62f9ab2 100644
--- a/Documentation/git-checkout-index.txt
+++ b/Documentation/git-checkout-index.txt
@@ -13,7 +13,7 @@
 		   [--stage=<number>|all]
 		   [--temp]
 		   [-z] [--stdin]
-		   [--] [<file>]\*
+		   [--] [<file>]*
 
 DESCRIPTION
 -----------
diff --git a/Documentation/git-checkout.txt b/Documentation/git-checkout.txt
index 1bacd2e..f88e997 100644
--- a/Documentation/git-checkout.txt
+++ b/Documentation/git-checkout.txt
@@ -9,7 +9,7 @@
 --------
 [verse]
 'git checkout' [-q] [-f] [-m] [<branch>]
-'git checkout' [-q] [-f] [-m] [[-b|--orphan] <new_branch>] [<start_point>]
+'git checkout' [-q] [-f] [-m] [[-b|-B|--orphan] <new_branch>] [<start_point>]
 'git checkout' [-f|--ours|--theirs|-m|--conflict=<style>] [<tree-ish>] [--] <paths>...
 'git checkout' --patch [<tree-ish>] [--] [<paths>...]
 
@@ -21,7 +21,7 @@
 branch.
 
 'git checkout' [<branch>]::
-'git checkout' -b <new branch> [<start point>]::
+'git checkout' -b|-B <new_branch> [<start point>]::
 
 	This form switches branches by updating the index, working
 	tree, and HEAD to reflect the specified branch.
@@ -31,6 +31,17 @@
 use the `--track` or `--no-track` options, which will be passed to
 'git branch'.  As a convenience, `--track` without `-b` implies branch
 creation; see the description of `--track` below.
++
+If `-B` is given, <new_branch> is created if it doesn't exist; otherwise, it
+is reset. This is the transactional equivalent of
++
+------------
+$ git branch -f <branch> [<start point>]
+$ git checkout <branch>
+------------
++
+that is to say, the branch is not reset/created unless "git checkout" is
+successful.
 
 'git checkout' [--patch] [<tree-ish>] [--] <pathspec>...::
 
@@ -75,6 +86,12 @@
 	Create a new branch named <new_branch> and start it at
 	<start_point>; see linkgit:git-branch[1] for details.
 
+-B::
+	Creates the branch <new_branch> and start it at <start_point>;
+	if it already exists, then reset it to <start_point>. This is
+	equivalent to running "git branch" with "-f"; see
+	linkgit:git-branch[1] for details.
+
 -t::
 --track::
 	When creating a new branch, set up "upstream" configuration. See
@@ -170,7 +187,7 @@
 checks out the branch (instead of detaching).  You may also specify
 `-` which is synonymous with `"@\{-1\}"`.
 +
-As a further special case, you may use `"A...B"` as a shortcut for the
+As a further special case, you may use `"A\...B"` as a shortcut for the
 merge base of `A` and `B` if there is exactly one merge base. You can
 leave out at most one of `A` and `B`, in which case it defaults to `HEAD`.
 
diff --git a/Documentation/git-clean.txt b/Documentation/git-clean.txt
index a81cb6c..60e38e6 100644
--- a/Documentation/git-clean.txt
+++ b/Documentation/git-clean.txt
@@ -8,7 +8,7 @@
 SYNOPSIS
 --------
 [verse]
-'git clean' [-d] [-f] [-n] [-q] [-x | -X] [--] <path>...
+'git clean' [-d] [-f] [-n] [-q] [-e <pattern>] [-x | -X] [--] <path>...
 
 DESCRIPTION
 -----------
@@ -45,6 +45,12 @@
 	Be quiet, only report errors, but not the files that are
 	successfully removed.
 
+-e <pattern>::
+--exclude=<pattern>::
+	Specify special exceptions to not be cleaned.  Each <pattern> is
+	the same form as in $GIT_DIR/info/excludes and this option can be
+	given multiple times.
+
 -x::
 	Don't use the ignore rules.  This allows removing all untracked
 	files, including build products.  This can be used (possibly in
diff --git a/Documentation/git-commit-tree.txt b/Documentation/git-commit-tree.txt
index 6188854..349366e 100644
--- a/Documentation/git-commit-tree.txt
+++ b/Documentation/git-commit-tree.txt
@@ -8,7 +8,7 @@
 
 SYNOPSIS
 --------
-'git commit-tree' <tree> [-p <parent commit>]\* < changelog
+'git commit-tree' <tree> [-p <parent commit>]* < changelog
 
 DESCRIPTION
 -----------
diff --git a/Documentation/git-fast-export.txt b/Documentation/git-fast-export.txt
index 98ec6b5..fcad113 100644
--- a/Documentation/git-fast-export.txt
+++ b/Documentation/git-fast-export.txt
@@ -90,10 +90,16 @@
 	resulting stream can only be used by a repository which
 	already contains the necessary objects.
 
+--full-tree::
+	This option will cause fast-export to issue a "deleteall"
+	directive for each commit followed by a full list of all files
+	in the commit (as opposed to just listing the files which are
+	different from the commit's first parent).
+
 [git-rev-list-args...]::
        A list of arguments, acceptable to 'git rev-parse' and
        'git rev-list', that specifies the specific objects and references
-       to export.  For example, `master\~10..master` causes the
+       to export.  For example, `master{tilde}10..master` causes the
        current master reference to be exported along with all objects
        added since its 10th ancestor commit.
 
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 77a0a24..966ba4f 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -482,9 +482,11 @@
 	'M' SP <mode> SP <dataref> SP <path> LF
 ....
 +
-Here `<dataref>` can be either a mark reference (`:<idnum>`)
+Here usually `<dataref>` must be either a mark reference (`:<idnum>`)
 set by a prior `blob` command, or a full 40-byte SHA-1 of an
-existing Git blob object.
+existing Git blob object.  If `<mode>` is `040000`` then
+`<dataref>` must be the full 40-byte SHA-1 of an existing
+Git tree object or a mark reference set with `--import-marks`.
 
 Inline data format::
 	The data content for the file has not been supplied yet.
@@ -509,6 +511,8 @@
 * `160000`: A gitlink, SHA-1 of the object refers to a commit in
   another repository. Git links can only be specified by SHA or through
   a commit mark. They are used to implement submodules.
+* `040000`: A subdirectory.  Subdirectories can only be specified by
+  SHA or through a tree mark set with `--import-marks`.
 
 In both formats `<path>` is the complete path of the file to be added
 (if not already existing) or modified (if already existing).
diff --git a/Documentation/git-fmt-merge-msg.txt b/Documentation/git-fmt-merge-msg.txt
index a585dbe..302f56b 100644
--- a/Documentation/git-fmt-merge-msg.txt
+++ b/Documentation/git-fmt-merge-msg.txt
@@ -9,8 +9,8 @@
 SYNOPSIS
 --------
 [verse]
-'git fmt-merge-msg' [--log | --no-log] <$GIT_DIR/FETCH_HEAD
-'git fmt-merge-msg' [--log | --no-log] -F <file>
+'git fmt-merge-msg' [-m <message>] [--log | --no-log] <$GIT_DIR/FETCH_HEAD
+'git fmt-merge-msg' [-m <message>] [--log | --no-log] -F <file>
 
 DESCRIPTION
 -----------
@@ -38,6 +38,11 @@
 	Synonyms to --log and --no-log; these are deprecated and will be
 	removed in the future.
 
+-m <message>::
+--message <message>::
+	Use <message> instead of the branch names for the first line
+	of the log message.  For use with `--log`.
+
 -F <file>::
 --file <file>::
 	Take the list of merged objects from <file> instead of
diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 390d85c..d66fd9d 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -9,7 +9,7 @@
 --------
 [verse]
 'git for-each-ref' [--count=<count>] [--shell|--perl|--python|--tcl]
-		   [--sort=<key>]\* [--format=<format>] [<pattern>...]
+		   [--sort=<key>]* [--format=<format>] [<pattern>...]
 
 DESCRIPTION
 -----------
diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 5474dd7..dab0a78 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -191,11 +191,11 @@
 Examples
 --------
 
-git grep 'time_t' \-- '*.[ch]'::
+git grep {apostrophe}time_t{apostrophe} \-- {apostrophe}*.[ch]{apostrophe}::
 	Looks for `time_t` in all tracked .c and .h files in the working
 	directory and its subdirectories.
 
-git grep -e \'#define\' --and \( -e MAX_PATH -e PATH_MAX \)::
+git grep -e {apostrophe}#define{apostrophe} --and \( -e MAX_PATH -e PATH_MAX \)::
 	Looks for a line that has `#define` and either `MAX_PATH` or
 	`PATH_MAX`.
 
diff --git a/Documentation/git-help.txt b/Documentation/git-help.txt
index f8df109..eccd0ff 100644
--- a/Documentation/git-help.txt
+++ b/Documentation/git-help.txt
@@ -55,9 +55,9 @@
 +
 The web browser can be specified using the configuration variable
 'help.browser', or 'web.browser' if the former is not set. If none of
-these config variables is set, the 'git web--browse' helper script
+these config variables is set, the 'git web{litdd}browse' helper script
 (called by 'git help') will pick a suitable default. See
-linkgit:git-web--browse[1] for more information about this.
+linkgit:git-web{litdd}browse[1] for more information about this.
 
 CONFIGURATION VARIABLES
 -----------------------
@@ -80,7 +80,7 @@
 The 'help.browser', 'web.browser' and 'browser.<tool>.path' will also
 be checked if the 'web' format is chosen (either by command line
 option or configuration variable). See '-w|--web' in the OPTIONS
-section above and linkgit:git-web--browse[1].
+section above and linkgit:git-web{litdd}browse[1].
 
 man.viewer
 ~~~~~~~~~~
diff --git a/Documentation/git-instaweb.txt b/Documentation/git-instaweb.txt
index 2c3c4d2..7477ce8 100644
--- a/Documentation/git-instaweb.txt
+++ b/Documentation/git-instaweb.txt
@@ -44,20 +44,23 @@
 -b::
 --browser::
 	The web browser that should be used to view the gitweb
-	page. This will be passed to the 'git web--browse' helper
+	page. This will be passed to the 'git web{litdd}browse' helper
 	script along with the URL of the gitweb instance. See
-	linkgit:git-web--browse[1] for more information about this. If
+	linkgit:git-web{litdd}browse[1] for more information about this. If
 	the script fails, the URL will be printed to stdout.
 
+start::
 --start::
 	Start the httpd instance and exit.  This does not generate
 	any of the configuration files for spawning a new instance.
 
+stop::
 --stop::
 	Stop the httpd instance and exit.  This does not generate
 	any of the configuration files for spawning a new instance,
 	nor does it close the browser.
 
+restart::
 --restart::
 	Restart the httpd instance and exit.  This does not generate
 	any of the configuration files for spawning a new instance.
@@ -79,7 +82,7 @@
 
 If the configuration variable 'instaweb.browser' is not set,
 'web.browser' will be used instead if it is defined. See
-linkgit:git-web--browse[1] for more information about this.
+linkgit:git-web{litdd}browse[1] for more information about this.
 
 Author
 ------
diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index bd919f2..15aee2f 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -10,14 +10,14 @@
 --------
 [verse]
 'git ls-files' [-z] [-t] [-v]
-		(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])\*
-		(-[c|d|o|i|s|u|k|m])\*
+		(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])*
+		(-[c|d|o|i|s|u|k|m])*
 		[-x <pattern>|--exclude=<pattern>]
 		[-X <file>|--exclude-from=<file>]
 		[--exclude-per-directory=<file>]
 		[--exclude-standard]
 		[--error-unmatch] [--with-tree=<tree-ish>]
-		[--full-name] [--abbrev] [--] [<file>]\*
+		[--full-name] [--abbrev] [--] [<file>]*
 
 DESCRIPTION
 -----------
@@ -140,6 +140,12 @@
 	lines, show only a partial prefix.
 	Non default number of digits can be specified with --abbrev=<n>.
 
+--debug::
+	After each line that describes a file, add more data about its
+	cache entry.  This is intended to show as much information as
+	possible for manual inspection; the exact format may change at
+	any time.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/Documentation/git-merge-base.txt b/Documentation/git-merge-base.txt
index ce5b369..eedef1b 100644
--- a/Documentation/git-merge-base.txt
+++ b/Documentation/git-merge-base.txt
@@ -8,7 +8,9 @@
 
 SYNOPSIS
 --------
-'git merge-base' [-a|--all] <commit> <commit>...
+[verse]
+'git merge-base' [-a|--all] [--octopus] <commit> <commit>...
+'git merge-base' --independent <commit>...
 
 DESCRIPTION
 -----------
@@ -20,12 +22,12 @@
 ancestor', i.e. a 'merge base'.  Note that there can be more than one
 merge base for a pair of commits.
 
-Among the two commits to compute the merge base from, one is specified by
-the first commit argument on the command line; the other commit is a
-(possibly hypothetical) commit that is a merge across all the remaining
-commits on the command line.  As the most common special case, specifying only
-two commits on the command line means computing the merge base between
-the given two commits.
+Unless `--octopus` is given, among the two commits to compute the merge
+base from, one is specified by the first commit argument on the command
+line; the other commit is a (possibly hypothetical) commit that is a merge
+across all the remaining commits on the command line.  As the most common
+special case, specifying only two commits on the command line means
+computing the merge base between the given two commits.
 
 As a consequence, the 'merge base' is not necessarily contained in each of the
 commit arguments if more than two commits are specified. This is different
@@ -37,6 +39,18 @@
 --all::
 	Output all merge bases for the commits, instead of just one.
 
+--octopus::
+	Compute the best common ancestors of all supplied commits,
+	in preparation for an n-way merge.  This mimics the behavior
+	of 'git show-branch --merge-base'.
+
+--independent::
+	Instead of printing merge bases, print a minimal subset of
+	the supplied commits with the same ancestors.  In other words,
+	among the commits given, list those which cannot be reached
+	from any other.  This mimics the behavior of 'git show-branch
+	--independent'.
+
 DISCUSSION
 ----------
 
@@ -96,6 +110,12 @@
 --------------
 Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
 
+See also
+--------
+linkgit:git-rev-list[1],
+linkgit:git-show-branch[1],
+linkgit:git-merge[1]
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/Documentation/git-merge-index.txt b/Documentation/git-merge-index.txt
index 4d266de..921b38f 100644
--- a/Documentation/git-merge-index.txt
+++ b/Documentation/git-merge-index.txt
@@ -8,7 +8,7 @@
 
 SYNOPSIS
 --------
-'git merge-index' [-o] [-q] <merge-program> (-a | [--] <file>\*)
+'git merge-index' [-o] [-q] <merge-program> (-a | [--] <file>*)
 
 DESCRIPTION
 -----------
diff --git a/Documentation/git-mergetool--lib.txt b/Documentation/git-mergetool--lib.txt
index 78eb03f..d8df553 100644
--- a/Documentation/git-mergetool--lib.txt
+++ b/Documentation/git-mergetool--lib.txt
@@ -1,5 +1,5 @@
-git-mergetool--lib(1)
-=====================
+git-mergetool{litdd}lib(1)
+==========================
 
 NAME
 ----
@@ -16,11 +16,11 @@
 This documentation is meant for people who are studying the
 Porcelain-ish scripts and/or are writing new ones.
 
-The 'git-mergetool--lib' scriptlet is designed to be sourced (using
+The 'git-mergetool{litdd}lib' scriptlet is designed to be sourced (using
 `.`) by other shell scripts to set up functions for working
 with git merge tools.
 
-Before sourcing 'git-mergetool--lib', your script must set `TOOL_MODE`
+Before sourcing 'git-mergetool{litdd}lib', your script must set `TOOL_MODE`
 to define the operation mode for the functions listed below.
 'diff' and 'merge' are valid values.
 
diff --git a/Documentation/git-push.txt b/Documentation/git-push.txt
index 658ff2f..020955f 100644
--- a/Documentation/git-push.txt
+++ b/Documentation/git-push.txt
@@ -200,7 +200,7 @@
 	For a successfully pushed ref, the summary shows the old and new
 	values of the ref in a form suitable for using as an argument to
 	`git log` (this is `<old>..<new>` in most cases, and
-	`<old>...<new>` for forced non-fast-forward updates).
+	`<old>\...<new>` for forced non-fast-forward updates).
 +
 For a failed update, more details are given:
 +
diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt
index be23ad2..30e5c0e 100644
--- a/Documentation/git-rebase.txt
+++ b/Documentation/git-rebase.txt
@@ -199,6 +199,9 @@
 	Whether to show a diffstat of what changed upstream since the last
 	rebase. False by default.
 
+rebase.autosquash::
+	If set to true enable '--autosquash' option by default.
+
 OPTIONS
 -------
 <newbase>::
@@ -207,7 +210,7 @@
 	<upstream>.  May be any valid commit, and not just an
 	existing branch name.
 +
-As a special case, you may use "A...B" as a shortcut for the
+As a special case, you may use "A\...B" as a shortcut for the
 merge base of A and B if there is exactly one merge base. You can
 leave out at most one of A and B, in which case it defaults to HEAD.
 
@@ -250,6 +253,13 @@
 the 'ours' strategy simply discards all patches from the <branch>,
 which makes little sense.
 
+-X <strategy-option>::
+--strategy-option=<strategy-option>::
+	Pass the <strategy-option> through to the merge strategy.
+	This implies `\--merge` and, if no strategy has been
+	specified, `-s recursive`.  Note the reversal of 'ours' and
+	'theirs' as noted in above for the `-m` option.
+
 -q::
 --quiet::
 	Be quiet. Implies --no-stat.
@@ -326,6 +336,7 @@
 	instead.
 
 --autosquash::
+--no-autosquash::
 	When the commit log message begins with "squash! ..." (or
 	"fixup! ..."), and there is a commit whose title begins with
 	the same ..., automatically modify the todo list of rebase -i
@@ -334,6 +345,10 @@
 	commit from `pick` to `squash` (or `fixup`).
 +
 This option is only valid when the '--interactive' option is used.
++
+If the '--autosquash' option is enabled by default using the
+configuration variable `rebase.autosquash`, this option can be
+used to override and disable this setting.
 
 --no-ff::
 	With --interactive, cherry-pick all rebased commits instead of
@@ -459,6 +474,30 @@
 $ git rebase -i -p --onto Q O
 -----------------------------
 
+Reordering and editing commits usually creates untested intermediate
+steps.  You may want to check that your history editing did not break
+anything by running a test, or at least recompiling at intermediate
+points in history by using the "exec" command (shortcut "x").  You may
+do so by creating a todo list like this one:
+
+-------------------------------------------
+pick deadbee Implement feature XXX
+fixup f1a5c00 Fix to feature XXX
+exec make
+pick c0ffeee The oneline of the next commit
+edit deadbab The oneline of the commit after
+exec cd subdir; make test
+...
+-------------------------------------------
+
+The interactive rebase will stop when a command fails (i.e. exits with
+non-0 status) to give you an opportunity to fix the problem. You can
+continue with `git rebase --continue`.
+
+The "exec" command launches the command in a shell (the one specified
+in `$SHELL`, or the default shell if `$SHELL` is not set), so you can
+use shell features (like "cd", ">", ";" ...). The command is run from
+the root of the working tree.
 
 SPLITTING COMMITS
 -----------------
diff --git a/Documentation/git-relink.txt b/Documentation/git-relink.txt
index 25ff8f9..8a5842b 100644
--- a/Documentation/git-relink.txt
+++ b/Documentation/git-relink.txt
@@ -7,7 +7,7 @@
 
 SYNOPSIS
 --------
-'git relink' [--safe] <dir> [<dir>]\* <master_dir>
+'git relink' [--safe] <dir> [<dir>]* <master_dir>
 
 DESCRIPTION
 -----------
diff --git a/Documentation/git-rev-parse.txt b/Documentation/git-rev-parse.txt
index be4c053..341ca90 100644
--- a/Documentation/git-rev-parse.txt
+++ b/Documentation/git-rev-parse.txt
@@ -74,7 +74,7 @@
 	properly quoted for consumption by shell.  Useful when
 	you expect your parameter to contain whitespaces and
 	newlines (e.g. when using pickaxe `-S` with
-	'git diff-\*'). In contrast to the `--sq-quote` option,
+	'git diff-{asterisk}'). In contrast to the `--sq-quote` option,
 	the command input is still interpreted as usual.
 
 --not::
@@ -112,14 +112,15 @@
 +
 If a `pattern` is given, only refs matching the given shell glob are
 shown.  If the pattern does not contain a globbing character (`?`,
-`\*`, or `[`), it is turned into a prefix match by appending `/\*`.
+`{asterisk}`, or `[`), it is turned into a prefix match by
+appending `/{asterisk}`.
 
 --glob=pattern::
 	Show all refs matching the shell glob pattern `pattern`. If
 	the pattern does not start with `refs/`, this is automatically
 	prepended.  If the pattern does not contain a globbing
-	character (`?`, `\*`, or `[`), it is turned into a prefix
-	match by appending `/\*`.
+	character (`?`, `{asterisk}`, or `[`), it is turned into a prefix
+	match by appending `/{asterisk}`.
 
 --show-toplevel::
 	Show the absolute path of the top-level directory.
diff --git a/Documentation/git-rm.txt b/Documentation/git-rm.txt
index c21d19e..71e3d9f 100644
--- a/Documentation/git-rm.txt
+++ b/Documentation/git-rm.txt
@@ -78,7 +78,8 @@
 
 File globbing matches across directory boundaries.  Thus, given
 two directories `d` and `d2`, there is a difference between
-using `git rm \'d\*\'` and `git rm \'d/\*\'`, as the former will
+using `git rm {apostrophe}d{asterisk}{apostrophe}` and
+`git rm {apostrophe}d/{asterisk}{apostrophe}`, as the former will
 also remove all of directory `d2`.
 
 REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM
@@ -135,11 +136,11 @@
 
 EXAMPLES
 --------
-git rm Documentation/\\*.txt::
-	Removes all `\*.txt` files from the index that are under the
+git rm Documentation/\*.txt::
+	Removes all `*.txt` files from the index that are under the
 	`Documentation` directory and any of its subdirectories.
 +
-Note that the asterisk `\*` is quoted from the shell in this
+Note that the asterisk `*` is quoted from the shell in this
 example; this lets git, and not the shell, expand the pathnames
 of files and subdirectories under the `Documentation/` directory.
 
diff --git a/Documentation/git-show-branch.txt b/Documentation/git-show-branch.txt
index 81ba296..6453263 100644
--- a/Documentation/git-show-branch.txt
+++ b/Documentation/git-show-branch.txt
@@ -168,10 +168,10 @@
 ------------------------------------------------
 
 These three branches all forked from a common commit, [master],
-whose commit message is "Add \'git show-branch\'". The "fixes"
-branch adds one commit "Introduce "reset type" flag to "git reset"".
-The "mhf" branch adds many other commits. The current branch
-is "master".
+whose commit message is "Add {apostrophe}git show-branch{apostrophe}".
+The "fixes" branch adds one commit "Introduce "reset type" flag to
+"git reset"". The "mhf" branch adds many other commits.
+The current branch is "master".
 
 
 EXAMPLE
diff --git a/Documentation/git-show-ref.txt b/Documentation/git-show-ref.txt
index 75780d7..4696af7 100644
--- a/Documentation/git-show-ref.txt
+++ b/Documentation/git-show-ref.txt
@@ -73,8 +73,8 @@
 --exclude-existing[=<pattern>]::
 
 	Make 'git show-ref' act as a filter that reads refs from stdin of the
-	form "^(?:<anything>\s)?<refname>(?:\^\{\})?$" and performs the
-	following actions on each:
+	form "^(?:<anything>\s)?<refname>(?:{backslash}{caret}\{\})?$"
+	and performs the following actions on each:
 	(1) strip "^{}" at the end of line if any;
 	(2) ignore if pattern is provided and does not head-match refname;
 	(3) warn if refname is not a well-formed refname and skip;
diff --git a/Documentation/git-stash.txt b/Documentation/git-stash.txt
index 473889a..8728f7a 100644
--- a/Documentation/git-stash.txt
+++ b/Documentation/git-stash.txt
@@ -104,18 +104,22 @@
 have conflicts (which are stored in the index, where you therefore can no
 longer apply the changes as they were originally).
 +
-When no `<stash>` is given, `stash@\{0}` is assumed.
+When no `<stash>` is given, `stash@\{0}` is assumed, otherwise `<stash>` must
+be a reference of the form `stash@\{<revision>}`.
 
 apply [--index] [-q|--quiet] [<stash>]::
 
-	Like `pop`, but do not remove the state from the stash list.
+	Like `pop`, but do not remove the state from the stash list. Unlike `pop`,
+	`<stash>` may be any commit that looks like a commit created by
+	`stash save` or `stash create`.
 
 branch <branchname> [<stash>]::
 
 	Creates and checks out a new branch named `<branchname>` starting from
 	the commit at which the `<stash>` was originally created, applies the
-	changes recorded in `<stash>` to the new working tree and index, then
-	drops the `<stash>` if that completes successfully. When no `<stash>`
+	changes recorded in `<stash>` to the new working tree and index.
+	If that succeeds, and `<stash>` is a reference of the form
+	`stash@{<revision>}`, it then drops the `<stash>`. When no `<stash>`
 	is given, applies the latest one.
 +
 This is useful if the branch on which you ran `git stash save` has
@@ -132,7 +136,9 @@
 drop [-q|--quiet] [<stash>]::
 
 	Remove a single stashed state from the stash list. When no `<stash>`
-	is given, it removes the latest one. i.e. `stash@\{0}`
+	is given, it removes the latest one. i.e. `stash@\{0}`, otherwise
+	`<stash>` must a valid stash log reference of the form
+	`stash@\{<revision>}`.
 
 create::
 
diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
index 2fd054c..dae190a 100644
--- a/Documentation/git-status.txt
+++ b/Documentation/git-status.txt
@@ -55,7 +55,11 @@
 
 --ignore-submodules[=<when>]::
 	Ignore changes to submodules when looking for changes. <when> can be
-	either "untracked", "dirty" or "all", which is the default. When
+	either "none", "untracked", "dirty" or "all", which is the default.
+	Using "none" will consider the submodule modified when it either contains
+	untracked or modified files or its HEAD differs from the commit recorded
+	in the superproject and can be used to override any settings of the
+	'ignore' option in linkgit:git-config[1] or linkgit:gitmodules[5]. When
 	"untracked" is used submodules are not considered dirty when they only
 	contain untracked content (but they are still scanned for modified
 	content). Using "dirty" ignores all changes to the work tree of submodules,
diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt
index b09bd97..4b84d08 100644
--- a/Documentation/git-svn.txt
+++ b/Documentation/git-svn.txt
@@ -646,6 +646,12 @@
 	revision fetched.  If unset, 'git svn' assumes this option to
 	be "true".
 
+svn.pathnameencoding::
+	This instructs git svn to recode pathnames to a given encoding.
+	It can be used by windows users and by those who work in non-utf8
+	locales to avoid corrupted file names with non-ASCII characters.
+	Valid encodings are the ones supported by Perl's Encode module.
+
 Since the noMetadata, rewriteRoot, rewriteUUID, useSvnsyncProps and useSvmProps
 options all affect the metadata generated and used by 'git svn'; they
 *must* be set in the configuration file before any history is imported
diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index 765d4b3..74d1d49 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -12,7 +12,7 @@
 'git update-index'
 	     [--add] [--remove | --force-remove] [--replace]
 	     [--refresh] [-q] [--unmerged] [--ignore-missing]
-	     [--cacheinfo <mode> <object> <file>]\*
+	     [--cacheinfo <mode> <object> <file>]*
 	     [--chmod=(+|-)x]
 	     [--assume-unchanged | --no-assume-unchanged]
 	     [--skip-worktree | --no-skip-worktree]
@@ -21,7 +21,7 @@
 	     [--info-only] [--index-info]
 	     [-z] [--stdin]
 	     [--verbose]
-	     [--] [<file>]\*
+	     [--] [<file>]*
 
 DESCRIPTION
 -----------
diff --git a/Documentation/git-web--browse.txt b/Documentation/git-web--browse.txt
index 7572049..e1586c7 100644
--- a/Documentation/git-web--browse.txt
+++ b/Documentation/git-web--browse.txt
@@ -1,5 +1,5 @@
-git-web--browse(1)
-==================
+git-web{litdd}browse(1)
+=======================
 
 NAME
 ----
@@ -7,7 +7,7 @@
 
 SYNOPSIS
 --------
-'git web--browse' [OPTIONS] URL/FILE ...
+'git web{litdd}browse' [OPTIONS] URL/FILE ...
 
 DESCRIPTION
 -----------
@@ -71,7 +71,7 @@
 When the browser, specified by options or configuration variables, is
 not among the supported ones, then the corresponding
 'browser.<tool>.cmd' configuration variable will be looked up. If this
-variable exists then 'git web--browse' will treat the specified tool
+variable exists then 'git web{litdd}browse' will treat the specified tool
 as a custom command and will use a shell eval to run the command with
 the URLs passed as arguments.
 
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 564586b..e5a27d8 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -317,6 +317,17 @@
 	smudge = cat
 ------------------------
 
+For best results, `clean` should not alter its output further if it is
+run twice ("clean->clean" should be equivalent to "clean"), and
+multiple `smudge` commands should not alter `clean`'s output
+("smudge->smudge->clean" should be equivalent to "clean").  See the
+section on merging below.
+
+The "indent" filter is well-behaved in this regard: it will not modify
+input that is already correctly indented.  In this case, the lack of a
+smudge filter means that the clean filter _must_ accept its own output
+without modifying it.
+
 
 Interaction between checkin/checkout attributes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -331,6 +342,29 @@
 with `text`, and then `ident` and fed to `filter`.
 
 
+Merging branches with differing checkin/checkout attributes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you have added attributes to a file that cause the canonical
+repository format for that file to change, such as adding a
+clean/smudge filter or text/eol/ident attributes, merging anything
+where the attribute is not in place would normally cause merge
+conflicts.
+
+To prevent these unnecessary merge conflicts, git can be told to run a
+virtual check-out and check-in of all three stages of a file when
+resolving a three-way merge by setting the `merge.renormalize`
+configuration variable.  This prevents changes caused by check-in
+conversion from causing spurious merge conflicts when a converted file
+is merged with an unconverted file.
+
+As long as a "smudge->clean" results in the same output as a "clean"
+even on files that are already smudged, this strategy will
+automatically resolve all filter-related conflicts.  Filters that do
+not act in this way may cause additional merge conflicts that must be
+resolved manually.
+
+
 Generating diff text
 ~~~~~~~~~~~~~~~~~~~~
 
@@ -441,6 +475,8 @@
 
 - `cpp` suitable for source code in the C and C++ languages.
 
+- `csharp` suitable for source code in the C# language.
+
 - `html` suitable for HTML/XHTML documents.
 
 - `java` suitable for source code in the Java language.
diff --git a/Documentation/gitcore-tutorial.txt b/Documentation/gitcore-tutorial.txt
index ed3ddc9..5e9c5eb 100644
--- a/Documentation/gitcore-tutorial.txt
+++ b/Documentation/gitcore-tutorial.txt
@@ -110,7 +110,7 @@
 and a reference to an object is always the 40-byte hex
 representation of that SHA1 name. The files in the `refs`
 subdirectory are expected to contain these hex references
-(usually with a final `\'\n\'` at the end), and you should thus
+(usually with a final `\n` at the end), and you should thus
 expect to see a number of 41-byte files containing these
 references in these `refs` subdirectories when you actually start
 populating your tree.
@@ -310,7 +310,7 @@
 ----------------
 
 which is another incomprehensible object name. Again, if you want to,
-you can use `git cat-file -t 8988d\...` to see that this time the object
+you can use `git cat-file -t 8988d...` to see that this time the object
 is not a "blob" object, but a "tree" object (you can also use
 `git cat-file` to actually output the raw object contents, but you'll see
 mainly a binary mess, so that's less interesting).
@@ -436,8 +436,8 @@
 (note how we didn't need the `\--add` flag this time, since git knew
 about the file already).
 
-Note what happens to the different 'git diff-\*' versions here. After
-we've updated `hello` in the index, `git diff-files -p` now shows no
+Note what happens to the different 'git diff-{asterisk}' versions here.
+After we've updated `hello` in the index, `git diff-files -p` now shows no
 differences, but `git diff-index -p HEAD` still *does* show that the
 current state is different from the state we committed. In fact, now
 'git diff-index' shows the same difference whether we use the `--cached`
@@ -494,7 +494,7 @@
 [NOTE]
 ============
 Here is an ASCII art by Jon Loeliger that illustrates how
-various diff-\* commands compare things.
+various 'diff-{asterisk}' commands compare things.
 
                       diff-tree
                        +----+
@@ -958,11 +958,11 @@
 The first two lines indicate that it is showing the two branches
 and the first line of the commit log message from their
 top-of-the-tree commits, you are currently on `master` branch
-(notice the asterisk `\*` character), and the first column for
+(notice the asterisk `{asterisk}` character), and the first column for
 the later output lines is used to show commits contained in the
 `master` branch, and the second column for the `mybranch`
 branch. Three commits are shown along with their log messages.
-All of them have non blank characters in the first column (`*`
+All of them have non blank characters in the first column (`{asterisk}`
 shows an ordinary commit on the current branch, `-` is a merge commit), which
 means they are now part of the `master` branch. Only the "Some
 work" commit has the plus `+` character in the second column,
@@ -1092,7 +1092,7 @@
 first obtains the topmost commit object name from the remote site
 by looking at the specified refname under `repo.git/refs/` directory,
 and then tries to obtain the
-commit object by downloading from `repo.git/objects/xx/xxx\...`
+commit object by downloading from `repo.git/objects/xx/xxx...`
 using the object name of that commit object.  Then it reads the
 commit object to find out its parent commits and the associate
 tree object; it repeats this process until it gets all the
@@ -1420,7 +1420,7 @@
 directory.
 
 [NOTE]
-You will see two files, `pack-\*.pack` and `pack-\*.idx`,
+You will see two files, `pack-{asterisk}.pack` and `pack-{asterisk}.idx`,
 in `.git/objects/pack` directory. They are closely related to
 each other, and if you ever copy them by hand to a different
 repository for whatever reason, you should make sure you copy
diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt
index e10fa88..7dc2e8b 100644
--- a/Documentation/gitignore.txt
+++ b/Documentation/gitignore.txt
@@ -90,12 +90,12 @@
  - Otherwise, git treats the pattern as a shell glob suitable
    for consumption by fnmatch(3) with the FNM_PATHNAME flag:
    wildcards in the pattern will not match a / in the pathname.
-   For example, "Documentation/\*.html" matches
+   For example, "Documentation/{asterisk}.html" matches
    "Documentation/git.html" but not "Documentation/ppc/ppc.html"
    or "tools/perf/Documentation/perf.html".
 
  - A leading slash matches the beginning of the pathname.
-   For example, "/*.c" matches "cat-file.c" but not
+   For example, "/{asterisk}.c" matches "cat-file.c" but not
    "mozilla-sha1/sha1.c".
 
 An example:
diff --git a/Documentation/gitmodules.txt b/Documentation/gitmodules.txt
index 72a13d1..bcffd95 100644
--- a/Documentation/gitmodules.txt
+++ b/Documentation/gitmodules.txt
@@ -44,6 +44,21 @@
 	This config option is overridden if 'git submodule update' is given
 	the '--merge' or '--rebase' options.
 
+submodule.<name>.ignore::
+	Defines under what circumstances "git status" and the diff family show
+	a submodule as modified. When set to "all", it will never be considered
+	modified, "dirty" will ignore all changes to the submodules work tree and
+	takes only differences between the HEAD of the submodule and the commit
+	recorded in the superproject into account. "untracked" will additionally
+	let submodules with modified tracked files in their work tree show up.
+	Using "none" (the default when this option is not set) also shows
+	submodules that have untracked files in their work tree as changed.
+	If this option is also present in the submodules entry in .git/config of
+	the superproject, the setting there will override the one found in
+	.gitmodules.
+	Both settings can be overridden on the command line by using the
+	"--ignore-submodule" option.
+
 
 EXAMPLES
 --------
diff --git a/Documentation/howto/revert-branch-rebase.txt b/Documentation/howto/revert-branch-rebase.txt
index 8c32da6..093c656 100644
--- a/Documentation/howto/revert-branch-rebase.txt
+++ b/Documentation/howto/revert-branch-rebase.txt
@@ -112,25 +112,19 @@
 $ git rebase master
 * Applying: Redo "revert" using three-way merge machinery.
 First trying simple merge strategy to cherry-pick.
-Finished one cherry-pick.
 * Applying: Remove git-apply-patch-script.
 First trying simple merge strategy to cherry-pick.
 Simple cherry-pick fails; trying Automatic cherry-pick.
 Removing Documentation/git-apply-patch-script.txt
 Removing git-apply-patch-script
-Finished one cherry-pick.
 * Applying: Document "git cherry-pick" and "git revert"
 First trying simple merge strategy to cherry-pick.
-Finished one cherry-pick.
 * Applying: mailinfo and applymbox updates
 First trying simple merge strategy to cherry-pick.
-Finished one cherry-pick.
 * Applying: Show commits in topo order and name all commits.
 First trying simple merge strategy to cherry-pick.
-Finished one cherry-pick.
 * Applying: More documentation updates.
 First trying simple merge strategy to cherry-pick.
-Finished one cherry-pick.
 ------------------------------------------------
 
 The temporary tag 'pu-anchor' is me just being careful, in case 'git
diff --git a/Documentation/install-webdoc.sh b/Documentation/install-webdoc.sh
index 34d02a2..37e67d1 100755
--- a/Documentation/install-webdoc.sh
+++ b/Documentation/install-webdoc.sh
@@ -12,7 +12,7 @@
 	then
 		: did not match
 	elif test -f "$T/$h" &&
-	   $DIFF -u -I'Last updated [0-9][0-9]-[A-Z][a-z][a-z]-' "$T/$h" "$h"
+		$DIFF -u -I'^Last updated ' "$T/$h" "$h"
 	then
 		:; # up to date
 	else
diff --git a/Documentation/merge-config.txt b/Documentation/merge-config.txt
index a403155..b72f533 100644
--- a/Documentation/merge-config.txt
+++ b/Documentation/merge-config.txt
@@ -15,6 +15,16 @@
 	during a merge; if not specified, defaults to the value of
 	diff.renameLimit.
 
+merge.renormalize::
+	Tell git that canonical representation of files in the
+	repository has changed over time (e.g. earlier commits record
+	text files with CRLF line endings, but recent ones use LF line
+	endings).  In such a repository, git can convert the data
+	recorded in commits to a canonical form before performing a
+	merge to reduce unnecessary conflicts.  For more information,
+	see section "Merging branches with differing checkin/checkout
+	attributes" in linkgit:gitattributes[5].
+
 merge.stat::
 	Whether to print the diffstat between ORIG_HEAD and the merge result
 	at the end of the merge.  True by default.
diff --git a/Documentation/merge-strategies.txt b/Documentation/merge-strategies.txt
index a5bc1db..049313d 100644
--- a/Documentation/merge-strategies.txt
+++ b/Documentation/merge-strategies.txt
@@ -40,6 +40,18 @@
 theirs;;
 	This is opposite of 'ours'.
 
+renormalize;;
+	This runs a virtual check-out and check-in of all three stages
+	of a file when resolving a three-way merge.  This option is
+	meant to be used when merging branches with different clean
+	filters or end-of-line normalization rules.  See "Merging
+	branches with differing checkin/checkout attributes" in
+	linkgit:gitattributes[5] for details.
+
+no-renormalize;;
+	Disables the `renormalize` option.  This overrides the
+	`merge.renormalize` configuration variable.
+
 subtree[=path];;
 	This option is a more advanced form of 'subtree' strategy, where
 	the strategy makes a guess on how two trees must be shifted to
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index cc562a0..e2237ae 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -321,7 +321,7 @@
 	reflog entries from the most recent one to older ones.
 	When this option is used you cannot specify commits to
 	exclude (that is, '{caret}commit', 'commit1..commit2',
-	nor 'commit1...commit2' notations cannot be used).
+	nor 'commit1\...commit2' notations cannot be used).
 +
 With '\--pretty' format other than oneline (for obvious reasons),
 this causes the output to have two extra lines of information
diff --git a/Documentation/technical/api-merge.txt b/Documentation/technical/api-merge.txt
new file mode 100644
index 0000000..a7e050b
--- /dev/null
+++ b/Documentation/technical/api-merge.txt
@@ -0,0 +1,73 @@
+merge API
+=========
+
+The merge API helps a program to reconcile two competing sets of
+improvements to some files (e.g., unregistered changes from the work
+tree versus changes involved in switching to a new branch), reporting
+conflicts if found.  The library called through this API is
+responsible for a few things.
+
+ * determining which trees to merge (recursive ancestor consolidation);
+
+ * lining up corresponding files in the trees to be merged (rename
+   detection, subtree shifting), reporting edge cases like add/add
+   and rename/rename conflicts to the user;
+
+ * performing a three-way merge of corresponding files, taking
+   path-specific merge drivers (specified in `.gitattributes`)
+   into account.
+
+Low-level (single file) merge
+-----------------------------
+
+`ll_merge`::
+
+	Perform a three-way single-file merge in core.  This is
+	a thin wrapper around `xdl_merge` that takes the path and
+	any merge backend specified in `.gitattributes` or
+	`.git/info/attributes` into account.  Returns 0 for a
+	clean merge.
+
+The caller:
+
+1. allocates an mmbuffer_t variable for the result;
+2. allocates and fills variables with the file's original content
+   and two modified versions (using `read_mmfile`, for example);
+3. calls ll_merge();
+4. reads the output from result_buf.ptr and result_buf.size;
+5. releases buffers when finished (free(ancestor.ptr); free(ours.ptr);
+   free(theirs.ptr); free(result_buf.ptr);).
+
+If the modifications do not merge cleanly, `ll_merge` will return a
+nonzero value and `result_buf` will generally include a description of
+the conflict bracketed by markers such as the traditional `<<<<<<<`
+and `>>>>>>>`.
+
+The `ancestor_label`, `our_label`, and `their_label` parameters are
+used to label the different sides of a conflict if the merge driver
+supports this.
+
+The `flag` parameter is a bitfield:
+
+ - The `LL_OPT_VIRTUAL_ANCESTOR` bit indicates whether this is an
+   internal merge to consolidate ancestors for a recursive merge.
+
+ - The `LL_OPT_FAVOR_MASK` bits allow local conflicts to be automatically
+   resolved in favor of one side or the other (as in 'git merge-file'
+   `--ours`/`--theirs`/`--union`).
+   They can be populated by `create_ll_flag`, whose argument can be
+   `XDL_MERGE_FAVOR_OURS`, `XDL_MERGE_FAVOR_THEIRS`, or
+   `XDL_MERGE_FAVOR_UNION`.
+
+Everything else
+---------------
+
+Talk about <merge-recursive.h> and merge_file():
+
+ - merge_trees() to merge with rename detection
+ - merge_recursive() for ancestor consolidation
+ - try_merge_command() for other strategies
+ - conflict format
+ - merge options
+
+(Daniel, Miklos, Stephan, JC)
diff --git a/Documentation/technical/api-parse-options.txt b/Documentation/technical/api-parse-options.txt
index 312e3b2..c5d141c 100644
--- a/Documentation/technical/api-parse-options.txt
+++ b/Documentation/technical/api-parse-options.txt
@@ -201,7 +201,7 @@
 If not stated otherwise, interpret the arguments as follows:
 
 * `short` is a character for the short option
-  (e.g. `\'e\'` for `-e`, use `0` to omit),
+  (e.g. `{apostrophe}e{apostrophe}` for `-e`, use `0` to omit),
 
 * `long` is a string for the long option
   (e.g. `"example"` for `\--example`, use `NULL` to omit),
@@ -228,10 +228,10 @@
 The callback mechanism is as follows:
 
 * Inside `func`, the only interesting member of the structure
-  given by `opt` is the void pointer `opt->value`.
-  `\*opt->value` will be the value that is saved into `var`, if you
+  given by `opt` is the void pointer `opt\->value`.
+  `\*opt\->value` will be the value that is saved into `var`, if you
   use `OPT_CALLBACK()`.
-  For example, do `*(unsigned long *)opt->value = 42;` to get 42
+  For example, do `*(unsigned long *)opt\->value = 42;` to get 42
   into an `unsigned long` variable.
 
 * Return value `0` indicates success and non-zero return
diff --git a/Documentation/technical/api-tree-walking.txt b/Documentation/technical/api-tree-walking.txt
index 55b7286..14af37c 100644
--- a/Documentation/technical/api-tree-walking.txt
+++ b/Documentation/technical/api-tree-walking.txt
@@ -42,6 +42,8 @@
 
 * `data` can be anything the `fn` callback would want to use.
 
+* `show_all_errors` tells whether to stop at the first error or not.
+
 Initializing
 ------------
 
diff --git a/Documentation/user-manual.txt b/Documentation/user-manual.txt
index 22aee34..fecc4eb 100644
--- a/Documentation/user-manual.txt
+++ b/Documentation/user-manual.txt
@@ -4251,9 +4251,9 @@
   negative numbers in case of different errors--and 0 on success.
 
 - the variable `sha1` in the function signature of `get_sha1()` is `unsigned
-  char \*`, but is actually expected to be a pointer to `unsigned
+  char {asterisk}`, but is actually expected to be a pointer to `unsigned
   char[20]`.  This variable will contain the 160-bit SHA-1 of the given
-  commit.  Note that whenever a SHA-1 is passed as `unsigned char \*`, it
+  commit.  Note that whenever a SHA-1 is passed as `unsigned char {asterisk}`, it
   is the binary representation, as opposed to the ASCII representation in
   hex characters, which is passed as `char *`.
 
diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN
index 8efc557..f6d301a 100755
--- a/GIT-VERSION-GEN
+++ b/GIT-VERSION-GEN
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=GIT-VERSION-FILE
-DEF_VER=v1.7.2.3
+DEF_VER=v1.7.2.GIT
 
 LF='
 '
diff --git a/Makefile b/Makefile
index 1f11618..8b7c243 100644
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,8 @@
 #
 # Define NO_MKSTEMPS if you don't have mkstemps in the C library.
 #
+# Define NO_STRTOK_R if you don't have strtok_r in the C library.
+#
 # Define NO_LIBGEN_H if you don't have libgen.h.
 #
 # Define NEEDS_LIBGEN if your libgen needs -lgen when linking
@@ -308,6 +310,7 @@
 TCLTK_PATH = wish
 PTHREAD_LIBS = -lpthread
 PTHREAD_CFLAGS =
+GCOV = gcov
 
 export TCL_PATH TCLTK_PATH
 
@@ -408,12 +411,17 @@
 TEST_PROGRAMS_NEED_X += test-delta
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-genrandom
+TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
+TEST_PROGRAMS_NEED_X += test-obj-pool
 TEST_PROGRAMS_NEED_X += test-parse-options
 TEST_PROGRAMS_NEED_X += test-path-utils
 TEST_PROGRAMS_NEED_X += test-run-command
 TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sigchain
+TEST_PROGRAMS_NEED_X += test-string-pool
+TEST_PROGRAMS_NEED_X += test-svn-fe
+TEST_PROGRAMS_NEED_X += test-treap
 TEST_PROGRAMS_NEED_X += test-index-version
 
 TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X))
@@ -468,6 +476,7 @@
 
 LIB_FILE=libgit.a
 XDIFF_LIB=xdiff/lib.a
+VCSSVN_LIB=vcs-svn/lib.a
 
 LIB_H += advice.h
 LIB_H += archive.h
@@ -1035,6 +1044,7 @@
 	NO_UNSETENV = YesPlease
 	NO_STRCASESTR = YesPlease
 	NO_STRLCPY = YesPlease
+	NO_STRTOK_R = YesPlease
 	NO_MEMMEM = YesPlease
 	# NEEDS_LIBICONV = YesPlease
 	NO_ICONV = YesPlease
@@ -1089,6 +1099,7 @@
 	NO_UNSETENV = YesPlease
 	NO_STRCASESTR = YesPlease
 	NO_STRLCPY = YesPlease
+	NO_STRTOK_R = YesPlease
 	NO_MEMMEM = YesPlease
 	NEEDS_LIBICONV = YesPlease
 	OLD_ICONV = YesPlease
@@ -1319,6 +1330,10 @@
 ifdef NO_STRTOULL
 	COMPAT_CFLAGS += -DNO_STRTOULL
 endif
+ifdef NO_STRTOK_R
+	COMPAT_CFLAGS += -DNO_STRTOK_R
+	COMPAT_OBJS += compat/strtok_r.o
+endif
 ifdef NO_SETENV
 	COMPAT_CFLAGS += -DNO_SETENV
 	COMPAT_OBJS += compat/setenv.o
@@ -1485,6 +1500,7 @@
 	QUIET_BUILT_IN = @echo '   ' BUILTIN $@;
 	QUIET_GEN      = @echo '   ' GEN $@;
 	QUIET_LNCP     = @echo '   ' LN/CP $@;
+	QUIET_GCOV     = @echo '   ' GCOV $@;
 	QUIET_SUBDIR0  = +@subdir=
 	QUIET_SUBDIR1  = ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
 			 $(MAKE) $(PRINT_DIR) -C $$subdir
@@ -1739,7 +1755,9 @@
 endif
 XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
-OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS)
+VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \
+	vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o
+OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
 dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
 dep_dirs := $(addsuffix .depend,$(sort $(dir $(OBJECTS))))
@@ -1861,6 +1879,11 @@
 xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
+
+$(VCSSVN_OBJS): \
+	vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \
+	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
+	vcs-svn/svndump.h
 endif
 
 exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
@@ -1873,12 +1896,16 @@
 
 config.s config.o: EXTRA_CPPFLAGS = -DETC_GITCONFIG='"$(ETC_GITCONFIG_SQ)"'
 
-http.s http.o: EXTRA_CPPFLAGS = -DGIT_USER_AGENT='"git/$(GIT_VERSION)"'
+http.s http.o: EXTRA_CPPFLAGS = -DGIT_HTTP_USER_AGENT='"git/$(GIT_VERSION)"'
 
 ifdef NO_EXPAT
 http-walker.s http-walker.o: EXTRA_CPPFLAGS = -DNO_EXPAT
 endif
 
+ifdef NO_REGEX
+compat/regex/regex.o: EXTRA_CPPFLAGS = -DGAWK -DNO_MBSUPPORT
+endif
+
 git-%$X: %.o $(GITLIBS)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
@@ -1909,6 +1936,8 @@
 $(XDIFF_LIB): $(XDIFF_OBJS)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS)
 
+$(VCSSVN_LIB): $(VCSSVN_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(VCSSVN_OBJS)
 
 doc:
 	$(MAKE) -C Documentation all
@@ -2007,12 +2036,18 @@
 
 test-delta$X: diff-delta.o patch-delta.o
 
+test-line-buffer$X: vcs-svn/lib.a
+
 test-parse-options$X: parse-options.o
 
+test-string-pool$X: vcs-svn/lib.a
+
+test-svn-fe$X: vcs-svn/lib.a
+
 .PRECIOUS: $(TEST_OBJS)
 
 test-%$X: test-%.o $(GITLIBS)
-	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
 
 check-sha1:: test-sha1$X
 	./test-sha1.sh
@@ -2187,8 +2222,8 @@
 	$(RM) configure
 
 clean:
-	$(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o \
-		builtin/*.o $(LIB_FILE) $(XDIFF_LIB)
+	$(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o vcs-svn/*.o \
+		builtin/*.o $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB)
 	$(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X
 	$(RM) $(TEST_PROGRAMS)
 	$(RM) -r bin-wrappers
@@ -2291,11 +2326,18 @@
 	$(MAKE) coverage-build
 	$(MAKE) coverage-report
 
+object_dirs := $(sort $(dir $(OBJECTS)))
 coverage-clean:
-	rm -f *.gcda *.gcno
+	$(RM) $(addsuffix *.gcov,$(object_dirs))
+	$(RM) $(addsuffix *.gcda,$(object_dirs))
+	$(RM) $(addsuffix *.gcno,$(object_dirs))
+	$(RM) coverage-untested-functions
+	$(RM) -r cover_db/
+	$(RM) -r cover_db_html/
 
 COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
 COVERAGE_LDFLAGS = $(CFLAGS)  -O0 -lgcov
+GCOVFLAGS = --preserve-paths --branch-probabilities --all-blocks
 
 coverage-build: coverage-clean
 	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
@@ -2303,7 +2345,17 @@
 		-j1 test
 
 coverage-report:
-	gcov -b *.c
+	$(QUIET_GCOV)for dir in $(object_dirs); do \
+		$(GCOV) $(GCOVFLAGS) --object-directory=$$dir $$dir*.c || exit; \
+	done
+
+coverage-untested-functions: coverage-report
 	grep '^function.*called 0 ' *.c.gcov \
 		| sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
-		| tee coverage-untested-functions
+		> coverage-untested-functions
+
+cover_db: coverage-report
+	gcov2perl -db cover_db *.gcov
+
+cover_db_html: cover_db
+	cover -report html -outputdir cover_db_html cover_db
diff --git a/RelNotes b/RelNotes
index 5217e8e..0e11dea 120000
--- a/RelNotes
+++ b/RelNotes
@@ -1 +1 @@
-Documentation/RelNotes-1.7.2.3.txt
\ No newline at end of file
+Documentation/RelNotes-1.7.3.txt
\ No newline at end of file
diff --git a/alias.c b/alias.c
index 372b7d8..eb9f08b 100644
--- a/alias.c
+++ b/alias.c
@@ -22,6 +22,13 @@
 	return alias_val;
 }
 
+#define SPLIT_CMDLINE_BAD_ENDING 1
+#define SPLIT_CMDLINE_UNCLOSED_QUOTE 2
+static const char *split_cmdline_errors[] = {
+	"cmdline ends with \\",
+	"unclosed quote"
+};
+
 int split_cmdline(char *cmdline, const char ***argv)
 {
 	int src, dst, count = 0, size = 16;
@@ -53,7 +60,7 @@
 				if (!c) {
 					free(*argv);
 					*argv = NULL;
-					return error("cmdline ends with \\");
+					return -SPLIT_CMDLINE_BAD_ENDING;
 				}
 			}
 			cmdline[dst++] = c;
@@ -66,7 +73,7 @@
 	if (quoted) {
 		free(*argv);
 		*argv = NULL;
-		return error("unclosed quote");
+		return -SPLIT_CMDLINE_UNCLOSED_QUOTE;
 	}
 
 	ALLOC_GROW(*argv, count+1, size);
@@ -75,3 +82,6 @@
 	return count;
 }
 
+const char *split_cmdline_strerror(int split_cmdline_errno) {
+	return split_cmdline_errors[-split_cmdline_errno-1];
+}
diff --git a/bisect.c b/bisect.c
index b556b11..060c042 100644
--- a/bisect.c
+++ b/bisect.c
@@ -141,7 +141,8 @@
 		enum object_type type;
 		unsigned long size;
 		char *buf = read_sha1_file(commit->object.sha1, &type, &size);
-		char *ep, *sp;
+		const char *subject_start;
+		int subject_len;
 
 		fprintf(stderr, "%c%c%c ",
 			(flags & TREESAME) ? ' ' : 'T',
@@ -156,13 +157,9 @@
 			fprintf(stderr, " %.*s", 8,
 				sha1_to_hex(pp->item->object.sha1));
 
-		sp = strstr(buf, "\n\n");
-		if (sp) {
-			sp += 2;
-			for (ep = sp; *ep && *ep != '\n'; ep++)
-				;
-			fprintf(stderr, " %.*s", (int)(ep - sp), sp);
-		}
+		subject_len = find_commit_subject(buf, &subject_start);
+		if (subject_len)
+			fprintf(stderr, " %.*s", subject_len, subject_start);
 		fprintf(stderr, "\n");
 	}
 }
diff --git a/branch.c b/branch.c
index 2ab42aa..93dc866 100644
--- a/branch.c
+++ b/branch.c
@@ -159,7 +159,7 @@
 			dont_change_ref = 1;
 		else if (!force)
 			die("A branch named '%s' already exists.", name);
-		else if (!is_bare_repository() && !strcmp(head, name))
+		else if (!is_bare_repository() && head && !strcmp(head, name))
 			die("Cannot force update the current branch.");
 		forcing = 1;
 	}
diff --git a/builtin/apply.c b/builtin/apply.c
index f38c1f7..23c18c5 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -416,48 +416,190 @@
 	return name;
 }
 
-static char *find_name(const char *line, char *def, int p_value, int terminate)
+static char *find_name_gnu(const char *line, char *def, int p_value)
+{
+	struct strbuf name = STRBUF_INIT;
+	char *cp;
+
+	/*
+	 * Proposed "new-style" GNU patch/diff format; see
+	 * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2
+	 */
+	if (unquote_c_style(&name, line, NULL)) {
+		strbuf_release(&name);
+		return NULL;
+	}
+
+	for (cp = name.buf; p_value; p_value--) {
+		cp = strchr(cp, '/');
+		if (!cp) {
+			strbuf_release(&name);
+			return NULL;
+		}
+		cp++;
+	}
+
+	/* name can later be freed, so we need
+	 * to memmove, not just return cp
+	 */
+	strbuf_remove(&name, 0, cp - name.buf);
+	free(def);
+	if (root)
+		strbuf_insert(&name, 0, root, root_len);
+	return squash_slash(strbuf_detach(&name, NULL));
+}
+
+static size_t tz_len(const char *line, size_t len)
+{
+	const char *tz, *p;
+
+	if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ')
+		return 0;
+	tz = line + len - strlen(" +0500");
+
+	if (tz[1] != '+' && tz[1] != '-')
+		return 0;
+
+	for (p = tz + 2; p != line + len; p++)
+		if (!isdigit(*p))
+			return 0;
+
+	return line + len - tz;
+}
+
+static size_t date_len(const char *line, size_t len)
+{
+	const char *date, *p;
+
+	if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-')
+		return 0;
+	p = date = line + len - strlen("72-02-05");
+
+	if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+	    !isdigit(*p++) || !isdigit(*p++))	/* Not a date. */
+		return 0;
+
+	if (date - line >= strlen("19") &&
+	    isdigit(date[-1]) && isdigit(date[-2]))	/* 4-digit year */
+		date -= strlen("19");
+
+	return line + len - date;
+}
+
+static size_t short_time_len(const char *line, size_t len)
+{
+	const char *time, *p;
+
+	if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':')
+		return 0;
+	p = time = line + len - strlen(" 07:01:32");
+
+	/* Permit 1-digit hours? */
+	if (*p++ != ' ' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+	    !isdigit(*p++) || !isdigit(*p++))	/* Not a time. */
+		return 0;
+
+	return line + len - time;
+}
+
+static size_t fractional_time_len(const char *line, size_t len)
+{
+	const char *p;
+	size_t n;
+
+	/* Expected format: 19:41:17.620000023 */
+	if (!len || !isdigit(line[len - 1]))
+		return 0;
+	p = line + len - 1;
+
+	/* Fractional seconds. */
+	while (p > line && isdigit(*p))
+		p--;
+	if (*p != '.')
+		return 0;
+
+	/* Hours, minutes, and whole seconds. */
+	n = short_time_len(line, p - line);
+	if (!n)
+		return 0;
+
+	return line + len - p + n;
+}
+
+static size_t trailing_spaces_len(const char *line, size_t len)
+{
+	const char *p;
+
+	/* Expected format: ' ' x (1 or more)  */
+	if (!len || line[len - 1] != ' ')
+		return 0;
+
+	p = line + len;
+	while (p != line) {
+		p--;
+		if (*p != ' ')
+			return line + len - (p + 1);
+	}
+
+	/* All spaces! */
+	return len;
+}
+
+static size_t diff_timestamp_len(const char *line, size_t len)
+{
+	const char *end = line + len;
+	size_t n;
+
+	/*
+	 * Posix: 2010-07-05 19:41:17
+	 * GNU: 2010-07-05 19:41:17.620000023 -0500
+	 */
+
+	if (!isdigit(end[-1]))
+		return 0;
+
+	n = tz_len(line, end - line);
+	end -= n;
+
+	n = short_time_len(line, end - line);
+	if (!n)
+		n = fractional_time_len(line, end - line);
+	end -= n;
+
+	n = date_len(line, end - line);
+	if (!n)	/* No date.  Too bad. */
+		return 0;
+	end -= n;
+
+	if (end == line)	/* No space before date. */
+		return 0;
+	if (end[-1] == '\t') {	/* Success! */
+		end--;
+		return line + len - end;
+	}
+	if (end[-1] != ' ')	/* No space before date. */
+		return 0;
+
+	/* Whitespace damage. */
+	end -= trailing_spaces_len(line, end - line);
+	return line + len - end;
+}
+
+static char *find_name_common(const char *line, char *def, int p_value,
+				const char *end, int terminate)
 {
 	int len;
 	const char *start = NULL;
 
 	if (p_value == 0)
 		start = line;
-
-	if (*line == '"') {
-		struct strbuf name = STRBUF_INIT;
-
-		/*
-		 * Proposed "new-style" GNU patch/diff format; see
-		 * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2
-		 */
-		if (!unquote_c_style(&name, line, NULL)) {
-			char *cp;
-
-			for (cp = name.buf; p_value; p_value--) {
-				cp = strchr(cp, '/');
-				if (!cp)
-					break;
-				cp++;
-			}
-			if (cp) {
-				/* name can later be freed, so we need
-				 * to memmove, not just return cp
-				 */
-				strbuf_remove(&name, 0, cp - name.buf);
-				free(def);
-				if (root)
-					strbuf_insert(&name, 0, root, root_len);
-				return squash_slash(strbuf_detach(&name, NULL));
-			}
-		}
-		strbuf_release(&name);
-	}
-
-	for (;;) {
+	while (line != end) {
 		char c = *line;
 
-		if (isspace(c)) {
+		if (!end && isspace(c)) {
 			if (c == '\n')
 				break;
 			if (name_terminate(start, line-start, c, terminate))
@@ -497,6 +639,37 @@
 	return squash_slash(xmemdupz(start, len));
 }
 
+static char *find_name(const char *line, char *def, int p_value, int terminate)
+{
+	if (*line == '"') {
+		char *name = find_name_gnu(line, def, p_value);
+		if (name)
+			return name;
+	}
+
+	return find_name_common(line, def, p_value, NULL, terminate);
+}
+
+static char *find_name_traditional(const char *line, char *def, int p_value)
+{
+	size_t len = strlen(line);
+	size_t date_len;
+
+	if (*line == '"') {
+		char *name = find_name_gnu(line, def, p_value);
+		if (name)
+			return name;
+	}
+
+	len = strchrnul(line, '\n') - line;
+	date_len = diff_timestamp_len(line, len);
+	if (!date_len)
+		return find_name_common(line, def, p_value, NULL, TERM_TAB);
+	len -= date_len;
+
+	return find_name_common(line, def, p_value, line + len, 0);
+}
+
 static int count_slashes(const char *cp)
 {
 	int cnt = 0;
@@ -519,7 +692,7 @@
 
 	if (is_dev_null(nameline))
 		return -1;
-	name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB);
+	name = find_name_traditional(nameline, NULL, 0);
 	if (!name)
 		return -1;
 	cp = strchr(name, '/');
@@ -638,16 +811,16 @@
 	if (is_dev_null(first)) {
 		patch->is_new = 1;
 		patch->is_delete = 0;
-		name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(second, NULL, p_value);
 		patch->new_name = name;
 	} else if (is_dev_null(second)) {
 		patch->is_new = 0;
 		patch->is_delete = 1;
-		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(first, NULL, p_value);
 		patch->old_name = name;
 	} else {
-		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
-		name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(first, NULL, p_value);
+		name = find_name_traditional(second, name, p_value);
 		if (has_epoch_timestamp(first)) {
 			patch->is_new = 1;
 			patch->is_delete = 0;
@@ -3606,11 +3779,11 @@
 	return 0;
 }
 
-int cmd_apply(int argc, const char **argv, const char *unused_prefix)
+int cmd_apply(int argc, const char **argv, const char *prefix_)
 {
 	int i;
 	int errs = 0;
-	int is_not_gitdir;
+	int is_not_gitdir = !startup_info->have_repository;
 	int binary;
 	int force_apply = 0;
 
@@ -3683,7 +3856,7 @@
 		OPT_END()
 	};
 
-	prefix = setup_git_directory_gently(&is_not_gitdir);
+	prefix = prefix_;
 	prefix_length = prefix ? strlen(prefix) : 0;
 	git_config(git_apply_config, NULL);
 	if (apply_default_whitespace)
diff --git a/builtin/blame.c b/builtin/blame.c
index 28e3be2..1015354 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1407,7 +1407,8 @@
 			    int detailed)
 {
 	int len;
-	char *tmp, *endp, *reencoded, *message;
+	const char *subject;
+	char *reencoded, *message;
 	static char author_name[1024];
 	static char author_mail[1024];
 	static char committer_name[1024];
@@ -1449,22 +1450,13 @@
 		    &ret->committer_time, &ret->committer_tz);
 
 	ret->summary = summary_buf;
-	tmp = strstr(message, "\n\n");
-	if (!tmp) {
-	error_out:
+	len = find_commit_subject(message, &subject);
+	if (len && len < sizeof(summary_buf)) {
+		memcpy(summary_buf, subject, len);
+		summary_buf[len] = 0;
+	} else {
 		sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
-		free(reencoded);
-		return;
 	}
-	tmp += 2;
-	endp = strchr(tmp, '\n');
-	if (!endp)
-		endp = tmp + strlen(tmp);
-	len = endp - tmp;
-	if (len >= sizeof(summary_buf) || len == 0)
-		goto error_out;
-	memcpy(summary_buf, tmp, len);
-	summary_buf[len] = 0;
 	free(reencoded);
 }
 
diff --git a/builtin/bundle.c b/builtin/bundle.c
index 2006cc5..80649ba 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -18,7 +18,6 @@
 int cmd_bundle(int argc, const char **argv, const char *prefix)
 {
 	struct bundle_header header;
-	int nongit;
 	const char *cmd, *bundle_file;
 	int bundle_fd = -1;
 	char buffer[PATH_MAX];
@@ -31,7 +30,6 @@
 	argc -= 2;
 	argv += 2;
 
-	prefix = setup_git_directory_gently(&nongit);
 	if (prefix && bundle_file[0] != '/') {
 		snprintf(buffer, sizeof(buffer), "%s/%s", prefix, bundle_file);
 		bundle_file = buffer;
@@ -54,11 +52,11 @@
 		return !!list_bundle_refs(&header, argc, argv);
 	}
 	if (!strcmp(cmd, "create")) {
-		if (nongit)
+		if (!startup_info->have_repository)
 			die("Need a repository to create a bundle.");
 		return !!create_bundle(&header, bundle_file, argc, argv);
 	} else if (!strcmp(cmd, "unbundle")) {
-		if (nongit)
+		if (!startup_info->have_repository)
 			die("Need a repository to unbundle.");
 		return !!unbundle(&header, bundle_fd) ||
 			list_bundle_refs(&header, argc, argv);
diff --git a/builtin/checkout.c b/builtin/checkout.c
index eef2b48..560eae1 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -18,6 +18,7 @@
 #include "xdiff-interface.h"
 #include "ll-merge.h"
 #include "resolve-undo.h"
+#include "submodule.h"
 
 static const char * const checkout_usage[] = {
 	"git checkout [options] <branch>",
@@ -32,10 +33,15 @@
 	int writeout_stage;
 	int writeout_error;
 
+	/* not set by parse_options */
+	int branch_exists;
+
 	const char *new_branch;
+	const char *new_branch_force;
 	const char *new_orphan_branch;
 	int new_branch_log;
 	enum branch_track track;
+	struct diff_options diff_options;
 };
 
 static int post_checkout_hook(struct commit *old, struct commit *new,
@@ -150,6 +156,10 @@
 	read_mmblob(&ours, active_cache[pos+1]->sha1);
 	read_mmblob(&theirs, active_cache[pos+2]->sha1);
 
+	/*
+	 * NEEDSWORK: re-create conflicts from merges with
+	 * merge.renormalize set, too
+	 */
 	status = ll_merge(&result_buf, path, &ancestor, "base",
 			  &ours, "ours", &theirs, "theirs", 0);
 	free(ancestor.ptr);
@@ -274,11 +284,12 @@
 	return errs;
 }
 
-static void show_local_changes(struct object *head)
+static void show_local_changes(struct object *head, struct diff_options *opts)
 {
 	struct rev_info rev;
 	/* I think we want full paths, even if we're in a subdirectory. */
 	init_revisions(&rev, NULL);
+	rev.diffopt.flags = opts->flags;
 	rev.diffopt.output_format |= DIFF_FORMAT_NAME_STATUS;
 	if (diff_setup_done(&rev.diffopt) < 0)
 		die("diff_setup_done failed");
@@ -372,7 +383,7 @@
 		topts.src_index = &the_index;
 		topts.dst_index = &the_index;
 
-		topts.msgs.not_uptodate_file = "You have local changes to '%s'; cannot switch branches.";
+		setup_unpack_trees_porcelain(&topts, "checkout");
 
 		refresh_cache(REFRESH_QUIET);
 
@@ -432,6 +443,13 @@
 			 */
 
 			add_files_to_cache(NULL, NULL, 0);
+			/*
+			 * NEEDSWORK: carrying over local changes
+			 * when branches have different end-of-line
+			 * normalization (or clean+smudge rules) is
+			 * a pain; plumb in an option to set
+			 * o.renormalize?
+			 */
 			init_merge_options(&o);
 			o.verbosity = 0;
 			work = write_tree_from_memory(&o);
@@ -455,7 +473,7 @@
 		die("unable to write new index file");
 
 	if (!opts->force && !opts->quiet)
-		show_local_changes(&new->commit->object);
+		show_local_changes(&new->commit->object, &opts->diff_options);
 
 	return 0;
 }
@@ -510,7 +528,8 @@
 			}
 		}
 		else
-			create_branch(old->name, opts->new_branch, new->name, 0,
+			create_branch(old->name, opts->new_branch, new->name,
+				      opts->new_branch_force ? 1 : 0,
 				      opts->new_branch_log, opts->track);
 		new->name = opts->new_branch;
 		setup_branch_path(new);
@@ -528,9 +547,12 @@
 			if (old->path && !strcmp(new->path, old->path))
 				fprintf(stderr, "Already on '%s'\n",
 					new->name);
-			else
+			else if (opts->new_branch)
 				fprintf(stderr, "Switched to%s branch '%s'\n",
-					opts->new_branch ? " a new" : "",
+					opts->branch_exists ? " and reset" : " a new",
+					new->name);
+			else
+				fprintf(stderr, "Switched to branch '%s'\n",
 					new->name);
 		}
 		if (old->path && old->name) {
@@ -599,7 +621,16 @@
 
 static int git_checkout_config(const char *var, const char *value, void *cb)
 {
-	return git_xmerge_config(var, value, cb);
+	if (!strcmp(var, "diff.ignoresubmodules")) {
+		struct checkout_opts *opts = cb;
+		handle_ignore_submodules_arg(&opts->diff_options, value);
+		return 0;
+	}
+
+	if (!prefixcmp(var, "submodule."))
+		return parse_submodule_config_option(var, value);
+
+	return git_xmerge_config(var, value, NULL);
 }
 
 static int interactive_checkout(const char *revision, const char **pathspec,
@@ -656,7 +687,10 @@
 	int dwim_new_local_branch = 1;
 	struct option options[] = {
 		OPT__QUIET(&opts.quiet),
-		OPT_STRING('b', NULL, &opts.new_branch, "new branch", "branch"),
+		OPT_STRING('b', NULL, &opts.new_branch, "branch",
+			   "create and checkout a new branch"),
+		OPT_STRING('B', NULL, &opts.new_branch_force, "branch",
+			   "create/reset and checkout a branch"),
 		OPT_BOOLEAN('l', NULL, &opts.new_branch_log, "log for new branch"),
 		OPT_SET_INT('t', "track",  &opts.track, "track",
 			BRANCH_TRACK_EXPLICIT),
@@ -680,13 +714,22 @@
 	memset(&opts, 0, sizeof(opts));
 	memset(&new, 0, sizeof(new));
 
-	git_config(git_checkout_config, NULL);
+	gitmodules_config();
+	git_config(git_checkout_config, &opts);
 
 	opts.track = BRANCH_TRACK_UNSPECIFIED;
 
 	argc = parse_options(argc, argv, prefix, options, checkout_usage,
 			     PARSE_OPT_KEEP_DASHDASH);
 
+	/* we can assume from now on new_branch = !new_branch_force */
+	if (opts.new_branch && opts.new_branch_force)
+		die("-B cannot be used with -b");
+
+	/* copy -B over to -b, so that we can just check the latter */
+	if (opts.new_branch_force)
+		opts.new_branch = opts.new_branch_force;
+
 	if (patch_mode && (opts.track > 0 || opts.new_branch
 			   || opts.new_branch_log || opts.merge || opts.force))
 		die ("--patch is incompatible with all other options");
@@ -708,7 +751,7 @@
 
 	if (opts.new_orphan_branch) {
 		if (opts.new_branch)
-			die("--orphan and -b are mutually exclusive");
+			die("--orphan and -b|-B are mutually exclusive");
 		if (opts.track > 0)
 			die("--orphan cannot be used with -t");
 		opts.new_branch = opts.new_orphan_branch;
@@ -857,8 +900,12 @@
 		if (strbuf_check_branch_ref(&buf, opts.new_branch))
 			die("git checkout: we do not like '%s' as a branch name.",
 			    opts.new_branch);
-		if (!get_sha1(buf.buf, rev))
-			die("git checkout: branch %s already exists", opts.new_branch);
+		if (!get_sha1(buf.buf, rev)) {
+			opts.branch_exists = 1;
+			if (!opts.new_branch_force)
+				die("git checkout: branch %s already exists",
+				    opts.new_branch);
+		}
 		strbuf_release(&buf);
 	}
 
diff --git a/builtin/clean.c b/builtin/clean.c
index fac64e6..b508d2c 100644
--- a/builtin/clean.c
+++ b/builtin/clean.c
@@ -10,12 +10,13 @@
 #include "cache.h"
 #include "dir.h"
 #include "parse-options.h"
+#include "string-list.h"
 #include "quote.h"
 
 static int force = -1; /* unset */
 
 static const char *const builtin_clean_usage[] = {
-	"git clean [-d] [-f] [-n] [-q] [-x | -X] [--] <paths>...",
+	"git clean [-d] [-f] [-n] [-q] [-e <pattern>] [-x | -X] [--] <paths>...",
 	NULL
 };
 
@@ -26,6 +27,13 @@
 	return git_default_config(var, value, cb);
 }
 
+static int exclude_cb(const struct option *opt, const char *arg, int unset)
+{
+	struct string_list *exclude_list = opt->value;
+	string_list_append(exclude_list, arg);
+	return 0;
+}
+
 int cmd_clean(int argc, const char **argv, const char *prefix)
 {
 	int i;
@@ -36,6 +44,7 @@
 	struct dir_struct dir;
 	static const char **pathspec;
 	struct strbuf buf = STRBUF_INIT;
+	struct string_list exclude_list = { NULL, 0, 0, 0 };
 	const char *qname;
 	char *seen = NULL;
 	struct option options[] = {
@@ -44,6 +53,8 @@
 		OPT_BOOLEAN('f', "force", &force, "force"),
 		OPT_BOOLEAN('d', NULL, &remove_directories,
 				"remove whole directories"),
+		{ OPTION_CALLBACK, 'e', "exclude", &exclude_list, "pattern",
+		  "exclude <pattern>", PARSE_OPT_NONEG, exclude_cb },
 		OPT_BOOLEAN('x', NULL, &ignored, "remove ignored files, too"),
 		OPT_BOOLEAN('X', NULL, &ignored_only,
 				"remove only ignored files"),
@@ -81,6 +92,9 @@
 	if (!ignored)
 		setup_standard_excludes(&dir);
 
+	for (i = 0; i < exclude_list.nr; i++)
+		add_exclude(exclude_list.items[i].string, "", 0, dir.exclude_list);
+
 	pathspec = get_pathspec(prefix, argv);
 
 	fill_directory(&dir, pathspec);
@@ -167,5 +181,6 @@
 	free(seen);
 
 	strbuf_release(&directory);
+	string_list_clear(&exclude_list, 0);
 	return (errors != 0);
 }
diff --git a/builtin/clone.c b/builtin/clone.c
index efb1e6f..19ed640 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -361,7 +361,7 @@
 
 int cmd_clone(int argc, const char **argv, const char *prefix)
 {
-	int is_bundle = 0;
+	int is_bundle = 0, is_local;
 	struct stat buf;
 	const char *repo_name, *repo, *work_tree, *git_dir;
 	char *path, *dir;
@@ -414,6 +414,9 @@
 		repo = xstrdup(make_absolute_path(repo_name));
 	else
 		repo = repo_name;
+	is_local = path && !is_bundle;
+	if (is_local && option_depth)
+		warning("--depth is ignored in local clones; use file:// instead.");
 
 	if (argc == 2)
 		dir = xstrdup(argv[1]);
@@ -514,7 +517,7 @@
 
 	strbuf_reset(&value);
 
-	if (path && !is_bundle) {
+	if (is_local) {
 		refs = clone_local(path, git_dir);
 		mapped_refs = wanted_peer_refs(refs, refspec);
 	} else {
diff --git a/builtin/commit.c b/builtin/commit.c
index c4a577d..66fdd22 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -25,6 +25,7 @@
 #include "rerere.h"
 #include "unpack-trees.h"
 #include "quote.h"
+#include "submodule.h"
 
 static const char * const builtin_commit_usage[] = {
 	"git commit [options] [--] <filepattern>...",
@@ -1073,6 +1074,7 @@
 		status_format = STATUS_FORMAT_PORCELAIN;
 
 	wt_status_prepare(&s);
+	gitmodules_config();
 	git_config(git_status_config, &s);
 	in_merge = file_exists(git_path("MERGE_HEAD"));
 	argc = parse_options(argc, argv, prefix,
diff --git a/builtin/config.c b/builtin/config.c
index f3d1660..ca4a0db 100644
--- a/builtin/config.c
+++ b/builtin/config.c
@@ -20,7 +20,7 @@
 static char key_delim = ' ';
 static char term = '\n';
 
-static int use_global_config, use_system_config;
+static int use_global_config, use_system_config, use_local_config;
 static const char *given_config_file;
 static int actions, types;
 static const char *get_color_slot, *get_colorbool_slot;
@@ -51,6 +51,7 @@
 	OPT_GROUP("Config file location"),
 	OPT_BOOLEAN(0, "global", &use_global_config, "use global config file"),
 	OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"),
+	OPT_BOOLEAN(0, "local", &use_local_config, "use repository config file"),
 	OPT_STRING('f', "file", &given_config_file, "FILE", "use given config file"),
 	OPT_GROUP("Action"),
 	OPT_BIT(0, "get", &actions, "get value: name [value-regex]", ACTION_GET),
@@ -330,11 +331,10 @@
 		return get_colorbool_found ? 0 : 1;
 }
 
-int cmd_config(int argc, const char **argv, const char *unused_prefix)
+int cmd_config(int argc, const char **argv, const char *prefix)
 {
-	int nongit;
+	int nongit = !startup_info->have_repository;
 	char *value;
-	const char *prefix = setup_git_directory_gently(&nongit);
 
 	config_exclusive_filename = getenv(CONFIG_ENVIRONMENT);
 
@@ -342,7 +342,7 @@
 			     builtin_config_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
-	if (use_global_config + use_system_config + !!given_config_file > 1) {
+	if (use_global_config + use_system_config + use_local_config + !!given_config_file > 1) {
 		error("only one config file at a time.");
 		usage_with_options(builtin_config_usage, builtin_config_options);
 	}
@@ -358,6 +358,8 @@
 	}
 	else if (use_system_config)
 		config_exclusive_filename = git_etc_gitconfig();
+	else if (use_local_config)
+		config_exclusive_filename = git_pathdup("config");
 	else if (given_config_file) {
 		if (!is_absolute_path(given_config_file) && prefix)
 			config_exclusive_filename = prefix_filename(prefix,
diff --git a/builtin/diff-files.c b/builtin/diff-files.c
index 5b64011..951c7c8 100644
--- a/builtin/diff-files.c
+++ b/builtin/diff-files.c
@@ -8,6 +8,7 @@
 #include "commit.h"
 #include "revision.h"
 #include "builtin.h"
+#include "submodule.h"
 
 static const char diff_files_usage[] =
 "git diff-files [-q] [-0/-1/2/3 |-c|--cc] [<common diff options>] [<path>...]"
@@ -20,6 +21,7 @@
 	unsigned options = 0;
 
 	init_revisions(&rev, prefix);
+	gitmodules_config();
 	git_config(git_diff_basic_config, NULL); /* no "diff" UI options */
 	rev.abbrev = 0;
 
diff --git a/builtin/diff-index.c b/builtin/diff-index.c
index 0483749..2eb32bd 100644
--- a/builtin/diff-index.c
+++ b/builtin/diff-index.c
@@ -3,6 +3,7 @@
 #include "commit.h"
 #include "revision.h"
 #include "builtin.h"
+#include "submodule.h"
 
 static const char diff_cache_usage[] =
 "git diff-index [-m] [--cached] "
@@ -17,6 +18,7 @@
 	int result;
 
 	init_revisions(&rev, prefix);
+	gitmodules_config();
 	git_config(git_diff_basic_config, NULL); /* no "diff" UI options */
 	rev.abbrev = 0;
 
diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c
index 3c78bda..0d2a3e9 100644
--- a/builtin/diff-tree.c
+++ b/builtin/diff-tree.c
@@ -3,6 +3,7 @@
 #include "commit.h"
 #include "log-tree.h"
 #include "builtin.h"
+#include "submodule.h"
 
 static struct rev_info log_tree_opt;
 
@@ -112,6 +113,7 @@
 	int read_stdin = 0;
 
 	init_revisions(opt, prefix);
+	gitmodules_config();
 	git_config(git_diff_basic_config, NULL); /* no "diff" UI options */
 	opt->abbrev = 0;
 	opt->diff = 1;
diff --git a/builtin/diff.c b/builtin/diff.c
index 89ae89c..a43d326 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -13,6 +13,7 @@
 #include "revision.h"
 #include "log-tree.h"
 #include "builtin.h"
+#include "submodule.h"
 
 struct blobinfo {
 	unsigned char sha1[20];
@@ -279,6 +280,7 @@
 	 */
 
 	prefix = setup_git_directory_gently(&nongit);
+	gitmodules_config();
 	git_config(git_diff_ui_config, NULL);
 
 	if (diff_use_color_default == -1)
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 9fe25ff..a9bbf86 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -27,6 +27,7 @@
 static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ABORT;
 static int fake_missing_tagger;
 static int no_data;
+static int full_tree;
 
 static int parse_opt_signed_tag_mode(const struct option *opt,
 				     const char *arg, int unset)
@@ -147,10 +148,39 @@
 	free(buf);
 }
 
+static int depth_first(const void *a_, const void *b_)
+{
+	const struct diff_filepair *a = *((const struct diff_filepair **)a_);
+	const struct diff_filepair *b = *((const struct diff_filepair **)b_);
+	const char *name_a, *name_b;
+	int len_a, len_b, len;
+	int cmp;
+
+	name_a = a->one ? a->one->path : a->two->path;
+	name_b = b->one ? b->one->path : b->two->path;
+
+	len_a = strlen(name_a);
+	len_b = strlen(name_b);
+	len = (len_a < len_b) ? len_a : len_b;
+
+	/* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
+	cmp = memcmp(name_a, name_b, len);
+	if (cmp)
+		return cmp;
+	return (len_b - len_a);
+}
+
 static void show_filemodify(struct diff_queue_struct *q,
 			    struct diff_options *options, void *data)
 {
 	int i;
+
+	/*
+	 * Handle files below a directory first, in case they are all deleted
+	 * and the directory changes to a file or symlink.
+	 */
+	qsort(q->queue, q->nr, sizeof(q->queue[0]), depth_first);
+
 	for (i = 0; i < q->nr; i++) {
 		struct diff_filespec *ospec = q->queue[i]->one;
 		struct diff_filespec *spec = q->queue[i]->two;
@@ -241,7 +271,8 @@
 		message += 2;
 
 	if (commit->parents &&
-	    get_object_mark(&commit->parents->item->object) != 0) {
+	    get_object_mark(&commit->parents->item->object) != 0 &&
+	    !full_tree) {
 		parse_commit(commit->parents->item);
 		diff_tree_sha1(commit->parents->item->tree->object.sha1,
 			       commit->tree->object.sha1, "", &rev->diffopt);
@@ -281,6 +312,8 @@
 		i++;
 	}
 
+	if (full_tree)
+		printf("deleteall\n");
 	log_tree_diff_flush(rev);
 	rev->diffopt.output_format = saved_output_format;
 
@@ -565,8 +598,8 @@
 int cmd_fast_export(int argc, const char **argv, const char *prefix)
 {
 	struct rev_info revs;
-	struct object_array commits = { 0, 0, NULL };
-	struct string_list extra_refs = { NULL, 0, 0, 0 };
+	struct object_array commits = OBJECT_ARRAY_INIT;
+	struct string_list extra_refs = STRING_LIST_INIT_NODUP;
 	struct commit *commit;
 	char *export_filename = NULL, *import_filename = NULL;
 	struct option options[] = {
@@ -584,6 +617,8 @@
 			     "Import marks from this file"),
 		OPT_BOOLEAN(0, "fake-missing-tagger", &fake_missing_tagger,
 			     "Fake a tagger when tags lack one"),
+		OPT_BOOLEAN(0, "full-tree", &full_tree,
+			     "Output full tree for each commit"),
 		{ OPTION_NEGBIT, 0, "data", &no_data, NULL,
 			"Skip output of blob data",
 			PARSE_OPT_NOARG | PARSE_OPT_NEGHELP, NULL, 1 },
@@ -608,6 +643,9 @@
 	if (import_filename)
 		import_marks(import_filename);
 
+	if (import_filename && revs.prune_data)
+		full_tree = 1;
+
 	get_tags_and_duplicates(&revs.pending, &extra_refs);
 
 	if (prepare_revision_walk(&revs))
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 7a53144..fab3fce 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -544,40 +544,14 @@
 	return 0;
 }
 
-struct tag_data {
-	struct ref **head;
-	struct ref ***tail;
-};
-
-static int add_to_tail(struct string_list_item *item, void *cb_data)
-{
-	struct tag_data *data = (struct tag_data *)cb_data;
-	struct ref *rm = NULL;
-
-	/* We have already decided to ignore this item */
-	if (!item->util)
-		return 0;
-
-	rm = alloc_ref(item->string);
-	rm->peer_ref = alloc_ref(item->string);
-	hashcpy(rm->old_sha1, item->util);
-
-	**data->tail = rm;
-	*data->tail = &rm->next;
-
-	return 0;
-}
-
 static void find_non_local_tags(struct transport *transport,
 			struct ref **head,
 			struct ref ***tail)
 {
-	struct string_list existing_refs = { NULL, 0, 0, 0 };
-	struct string_list remote_refs = { NULL, 0, 0, 0 };
-	struct tag_data data;
+	struct string_list existing_refs = STRING_LIST_INIT_NODUP;
+	struct string_list remote_refs = STRING_LIST_INIT_NODUP;
 	const struct ref *ref;
 	struct string_list_item *item = NULL;
-	data.head = head; data.tail = tail;
 
 	for_each_ref(add_existing, &existing_refs);
 	for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) {
@@ -631,10 +605,20 @@
 		item->util = NULL;
 
 	/*
-	 * For all the tags in the remote_refs string list, call
-	 * add_to_tail to add them to the list of refs to be fetched
+	 * For all the tags in the remote_refs string list,
+	 * add them to the list of refs to be fetched
 	 */
-	for_each_string_list(&remote_refs, add_to_tail, &data);
+	for_each_string_list_item(item, &remote_refs) {
+		/* Unless we have already decided to ignore this item... */
+		if (item->util)
+		{
+			struct ref *rm = alloc_ref(item->string);
+			rm->peer_ref = alloc_ref(item->string);
+			hashcpy(rm->old_sha1, item->util);
+			**tail = rm;
+			*tail = &rm->next;
+		}
+	}
 
 	string_list_clear(&remote_refs, 0);
 }
@@ -667,7 +651,7 @@
 static int do_fetch(struct transport *transport,
 		    struct refspec *refs, int ref_count)
 {
-	struct string_list existing_refs = { NULL, 0, 0, 0 };
+	struct string_list existing_refs = STRING_LIST_INIT_NODUP;
 	struct string_list_item *peer_item = NULL;
 	struct ref *ref_map;
 	struct ref *rm;
@@ -893,7 +877,7 @@
 int cmd_fetch(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct string_list list = { NULL, 0, 0, 0 };
+	struct string_list list = STRING_LIST_INIT_NODUP;
 	struct remote *remote;
 	int result = 0;
 
diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c
index bc3c5e6..e7e12ee 100644
--- a/builtin/fmt-merge-msg.c
+++ b/builtin/fmt-merge-msg.c
@@ -7,7 +7,7 @@
 #include "string-list.h"
 
 static const char * const fmt_merge_msg_usage[] = {
-	"git fmt-merge-msg [--log|--no-log] [--file <file>]",
+	"git fmt-merge-msg [-m <message>] [--log|--no-log] [--file <file>]",
 	NULL
 };
 
@@ -38,8 +38,8 @@
 	data->generic.strdup_strings = 1;
 }
 
-static struct string_list srcs = { NULL, 0, 0, 1 };
-static struct string_list origins = { NULL, 0, 0, 1 };
+static struct string_list srcs = STRING_LIST_INIT_DUP;
+static struct string_list origins = STRING_LIST_INIT_DUP;
 
 static int handle_line(char *line)
 {
@@ -146,7 +146,7 @@
 	int i, count = 0;
 	struct commit *commit;
 	struct object *branch;
-	struct string_list subjects = { NULL, 0, 0, 1 };
+	struct string_list subjects = STRING_LIST_INIT_DUP;
 	int flags = UNINTERESTING | TREESAME | SEEN | SHOWN | ADDED;
 	struct strbuf sb = STRBUF_INIT;
 
@@ -319,11 +319,14 @@
 int cmd_fmt_merge_msg(int argc, const char **argv, const char *prefix)
 {
 	const char *inpath = NULL;
+	const char *message = NULL;
 	struct option options[] = {
 		OPT_BOOLEAN(0, "log",     &merge_summary, "populate log with the shortlog"),
 		{ OPTION_BOOLEAN, 0, "summary", &merge_summary, NULL,
 		  "alias for --log (deprecated)",
 		  PARSE_OPT_NOARG | PARSE_OPT_HIDDEN },
+		OPT_STRING('m', "message", &message, "text",
+			"use <text> as start of message"),
 		OPT_FILENAME('F', "file", &inpath, "file to read from"),
 		OPT_END()
 	};
@@ -337,6 +340,12 @@
 			     0);
 	if (argc > 0)
 		usage_with_options(fmt_merge_msg_usage, options);
+	if (message && !merge_summary) {
+		char nl = '\n';
+		write_in_full(STDOUT_FILENO, message, strlen(message));
+		write_in_full(STDOUT_FILENO, &nl, 1);
+		return 0;
+	}
 
 	if (inpath && strcmp(inpath, "-")) {
 		in = fopen(inpath, "r");
@@ -346,7 +355,12 @@
 
 	if (strbuf_read(&input, fileno(in), 0) < 0)
 		die_errno("could not read input file");
-	ret = fmt_merge_msg(merge_summary, &input, &output);
+	if (message) {
+		strbuf_addstr(&output, message);
+		ret = fmt_merge_msg_shortlog(&input, &output);
+	} else {
+		ret = fmt_merge_msg(merge_summary, &input, &output);
+	}
 	if (ret)
 		return ret;
 	write_in_full(STDOUT_FILENO, output.buf, output.len);
diff --git a/builtin/grep.c b/builtin/grep.c
index 597f76b..da32f3d 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -834,12 +834,12 @@
 	int external_grep_allowed__ignored;
 	const char *show_in_pager = NULL, *default_pager = "dummy";
 	struct grep_opt opt;
-	struct object_array list = { 0, 0, NULL };
+	struct object_array list = OBJECT_ARRAY_INIT;
 	const char **paths = NULL;
-	struct string_list path_list = { NULL, 0, 0, 0 };
+	struct string_list path_list = STRING_LIST_INIT_NODUP;
 	int i;
 	int dummy;
-	int nongit = 0, use_index = 1;
+	int use_index = 1;
 	struct option options[] = {
 		OPT_BOOLEAN(0, "cached", &cached,
 			"search in index instead of in the work tree"),
@@ -930,8 +930,6 @@
 		OPT_END()
 	};
 
-	prefix = setup_git_directory_gently(&nongit);
-
 	/*
 	 * 'git grep -h', unlike 'git grep -h <pattern>', is a request
 	 * to show usage information and exit.
@@ -976,7 +974,7 @@
 			     PARSE_OPT_STOP_AT_NON_OPTION |
 			     PARSE_OPT_NO_INTERNAL_HELP);
 
-	if (use_index && nongit)
+	if (use_index && !startup_info->have_repository)
 		/* die the same way as if we did it at the beginning */
 		setup_git_directory();
 
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index fad76bf..2e680d7 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -886,25 +886,9 @@
 
 	read_replace_refs = 0;
 
-	/*
-	 * We wish to read the repository's config file if any, and
-	 * for that it is necessary to call setup_git_directory_gently().
-	 * However if the cwd was inside .git/objects/pack/ then we need
-	 * to go back there or all the pack name arguments will be wrong.
-	 * And in that case we cannot rely on any prefix returned by
-	 * setup_git_directory_gently() either.
-	 */
-	{
-		char cwd[PATH_MAX+1];
-		int nongit;
-
-		if (!getcwd(cwd, sizeof(cwd)-1))
-			die("Unable to get current working directory");
-		setup_git_directory_gently(&nongit);
-		git_config(git_index_pack_config, NULL);
-		if (chdir(cwd))
-			die("Cannot come back to cwd");
-	}
+	git_config(git_index_pack_config, NULL);
+	if (prefix && chdir(prefix))
+		die("Cannot come back to cwd");
 
 	for (i = 1; i < argc; i++) {
 		const char *arg = argv[i];
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 1b9b8a8..bb4f612 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -25,6 +25,7 @@
 static int show_killed;
 static int show_valid_bit;
 static int line_terminator = '\n';
+static int debug_mode;
 
 static const char *prefix;
 static int max_prefix_len;
@@ -162,35 +163,41 @@
 		       ce_stage(ce));
 	}
 	write_name(ce->name, ce_namelen(ce));
-}
-
-static int show_one_ru(struct string_list_item *item, void *cbdata)
-{
-	const char *path = item->string;
-	struct resolve_undo_info *ui = item->util;
-	int i, len;
-
-	len = strlen(path);
-	if (len < max_prefix_len)
-		return 0; /* outside of the prefix */
-	if (!match_pathspec(pathspec, path, len, max_prefix_len, ps_matched))
-		return 0; /* uninterested */
-	for (i = 0; i < 3; i++) {
-		if (!ui->mode[i])
-			continue;
-		printf("%s%06o %s %d\t", tag_resolve_undo, ui->mode[i],
-		       find_unique_abbrev(ui->sha1[i], abbrev),
-		       i + 1);
-		write_name(path, len);
+	if (debug_mode) {
+		printf("  ctime: %d:%d\n", ce->ce_ctime.sec, ce->ce_ctime.nsec);
+		printf("  mtime: %d:%d\n", ce->ce_mtime.sec, ce->ce_mtime.nsec);
+		printf("  dev: %d\tino: %d\n", ce->ce_dev, ce->ce_ino);
+		printf("  uid: %d\tgid: %d\n", ce->ce_uid, ce->ce_gid);
+		printf("  size: %d\tflags: %x\n", ce->ce_size, ce->ce_flags);
 	}
-	return 0;
 }
 
 static void show_ru_info(void)
 {
+	struct string_list_item *item;
+
 	if (!the_index.resolve_undo)
 		return;
-	for_each_string_list(the_index.resolve_undo, show_one_ru, NULL);
+
+	for_each_string_list_item(item, the_index.resolve_undo) {
+		const char *path = item->string;
+		struct resolve_undo_info *ui = item->util;
+		int i, len;
+
+		len = strlen(path);
+		if (len < max_prefix_len)
+			continue; /* outside of the prefix */
+		if (!match_pathspec(pathspec, path, len, max_prefix_len, ps_matched))
+			continue; /* uninterested */
+		for (i = 0; i < 3; i++) {
+			if (!ui->mode[i])
+				continue;
+			printf("%s%06o %s %d\t", tag_resolve_undo, ui->mode[i],
+			       find_unique_abbrev(ui->sha1[i], abbrev),
+			       i + 1);
+			write_name(path, len);
+		}
+	}
 }
 
 static void show_files(struct dir_struct *dir)
@@ -519,6 +526,7 @@
 		OPT_STRING(0, "with-tree", &with_tree, "tree-ish",
 			"pretend that paths removed since <tree-ish> are still present"),
 		OPT__ABBREV(&abbrev),
+		OPT_BOOLEAN(0, "debug", &debug_mode, "show debugging data"),
 		OPT_END()
 	};
 
diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c
index 34480cf..97eed40 100644
--- a/builtin/ls-remote.c
+++ b/builtin/ls-remote.c
@@ -32,7 +32,6 @@
 {
 	int i;
 	const char *dest = NULL;
-	int nongit;
 	unsigned flags = 0;
 	int quiet = 0;
 	const char *uploadpack = NULL;
@@ -42,8 +41,6 @@
 	struct transport *transport;
 	const struct ref *ref;
 
-	setup_git_directory_gently(&nongit);
-
 	for (i = 1; i < argc; i++) {
 		const char *arg = argv[i];
 
diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c
index e4560da..99654d0 100644
--- a/builtin/mailsplit.c
+++ b/builtin/mailsplit.c
@@ -137,7 +137,7 @@
 	char name[PATH_MAX];
 	int ret = -1;
 	int i;
-	struct string_list list = {NULL, 0, 0, 1};
+	struct string_list list = STRING_LIST_INIT_DUP;
 
 	if (populate_maildir_list(&list, maildir) < 0)
 		goto out;
diff --git a/builtin/merge-base.c b/builtin/merge-base.c
index 54e7ec2..96dd160 100644
--- a/builtin/merge-base.c
+++ b/builtin/merge-base.c
@@ -23,7 +23,8 @@
 }
 
 static const char * const merge_base_usage[] = {
-	"git merge-base [-a|--all] <commit> <commit>...",
+	"git merge-base [-a|--all] [--octopus] <commit> <commit>...",
+	"git merge-base --independent <commit>...",
 	NULL
 };
 
@@ -41,21 +42,58 @@
 	return r;
 }
 
+static int handle_octopus(int count, const char **args, int reduce, int show_all)
+{
+	struct commit_list *revs = NULL;
+	struct commit_list *result;
+	int i;
+
+	if (reduce)
+		show_all = 1;
+
+	for (i = count - 1; i >= 0; i--)
+		commit_list_insert(get_commit_reference(args[i]), &revs);
+
+	result = reduce ? reduce_heads(revs) : get_octopus_merge_bases(revs);
+
+	if (!result)
+		return 1;
+
+	while (result) {
+		printf("%s\n", sha1_to_hex(result->item->object.sha1));
+		if (!show_all)
+			return 0;
+		result = result->next;
+	}
+
+	return 0;
+}
+
 int cmd_merge_base(int argc, const char **argv, const char *prefix)
 {
 	struct commit **rev;
 	int rev_nr = 0;
 	int show_all = 0;
+	int octopus = 0;
+	int reduce = 0;
 
 	struct option options[] = {
-		OPT_BOOLEAN('a', "all", &show_all, "outputs all common ancestors"),
+		OPT_BOOLEAN('a', "all", &show_all, "output all common ancestors"),
+		OPT_BOOLEAN(0, "octopus", &octopus, "find ancestors for a single n-way merge"),
+		OPT_BOOLEAN(0, "independent", &reduce, "list revs not reachable from others"),
 		OPT_END()
 	};
 
 	git_config(git_default_config, NULL);
 	argc = parse_options(argc, argv, prefix, options, merge_base_usage, 0);
-	if (argc < 2)
+	if (!octopus && !reduce && argc < 2)
 		usage_with_options(merge_base_usage, options);
+	if (reduce && (show_all || octopus))
+		die("--independent cannot be used with other options");
+
+	if (octopus || reduce)
+		return handle_octopus(argc, argv, reduce, show_all);
+
 	rev = xmalloc(argc * sizeof(*rev));
 	while (argc-- > 0)
 		rev[rev_nr++] = get_commit_reference(*argv++);
diff --git a/builtin/merge-file.c b/builtin/merge-file.c
index b8e9e5b..b6664d4 100644
--- a/builtin/merge-file.c
+++ b/builtin/merge-file.c
@@ -28,7 +28,6 @@
 	xmparam_t xmp = {{0}};
 	int ret = 0, i = 0, to_stdout = 0;
 	int quiet = 0;
-	int nongit;
 	struct option options[] = {
 		OPT_BOOLEAN('p', "stdout", &to_stdout, "send results to standard output"),
 		OPT_SET_INT(0, "diff3", &xmp.style, "use a diff3 based merge", XDL_MERGE_DIFF3),
@@ -50,8 +49,7 @@
 	xmp.style = 0;
 	xmp.favor = 0;
 
-	prefix = setup_git_directory_gently(&nongit);
-	if (!nongit) {
+	if (startup_info->have_repository) {
 		/* Read the configuration file */
 		git_config(git_xmerge_config, NULL);
 		if (0 <= git_xmerge_style)
diff --git a/builtin/merge-recursive.c b/builtin/merge-recursive.c
index 3d00adb..78b9db7 100644
--- a/builtin/merge-recursive.c
+++ b/builtin/merge-recursive.c
@@ -48,6 +48,10 @@
 				o.subtree_shift = "";
 			else if (!prefixcmp(arg+2, "subtree="))
 				o.subtree_shift = arg + 10;
+			else if (!strcmp(arg+2, "renormalize"))
+				o.renormalize = 1;
+			else if (!strcmp(arg+2, "no-renormalize"))
+				o.renormalize = 0;
 			else
 				die("Unknown option %s", arg);
 			continue;
diff --git a/builtin/merge.c b/builtin/merge.c
index 37ce4f5..5f65c0c 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -54,6 +54,7 @@
 static const char **xopts;
 static size_t xopts_nr, xopts_alloc;
 static const char *branch;
+static int option_renormalize;
 static int verbosity;
 static int allow_rerere_auto;
 
@@ -131,6 +132,7 @@
 
 	ret = xcalloc(1, sizeof(struct strategy));
 	ret->name = xstrdup(name);
+	ret->attr = NO_TRIVIAL;
 	return ret;
 }
 
@@ -437,7 +439,7 @@
 		strbuf_addstr(&truname, "refs/heads/");
 		strbuf_addstr(&truname, remote);
 		strbuf_setlen(&truname, truname.len - len);
-		if (resolve_ref(truname.buf, buf_sha, 0, NULL)) {
+		if (resolve_ref(truname.buf, buf_sha, 1, NULL)) {
 			strbuf_addf(msg,
 				    "%s\t\tbranch '%s'%s of .\n",
 				    sha1_to_hex(remote_head->sha1),
@@ -486,7 +488,8 @@
 		buf = xstrdup(v);
 		argc = split_cmdline(buf, &argv);
 		if (argc < 0)
-			die("Bad branch.%s.mergeoptions string", branch);
+			die("Bad branch.%s.mergeoptions string: %s", branch,
+			    split_cmdline_strerror(argc));
 		argv = xrealloc(argv, sizeof(*argv) * (argc + 2));
 		memmove(argv + 1, argv, sizeof(*argv) * (argc + 1));
 		argc++;
@@ -503,6 +506,8 @@
 		return git_config_string(&pull_octopus, k, v);
 	else if (!strcmp(k, "merge.log") || !strcmp(k, "merge.summary"))
 		option_log = git_config_bool(k, v);
+	else if (!strcmp(k, "merge.renormalize"))
+		option_renormalize = git_config_bool(k, v);
 	return git_diff_ui_config(k, v, cb);
 }
 
@@ -624,6 +629,11 @@
 		if (!strcmp(strategy, "subtree"))
 			o.subtree_shift = "";
 
+		o.renormalize = option_renormalize;
+
+		/*
+		 * NEEDSWORK: merge with table in builtin/merge-recursive
+		 */
 		for (x = 0; x < xopts_nr; x++) {
 			if (!strcmp(xopts[x], "ours"))
 				o.recursive_variant = MERGE_RECURSIVE_OURS;
@@ -633,6 +643,10 @@
 				o.subtree_shift = "";
 			else if (!prefixcmp(xopts[x], "subtree="))
 				o.subtree_shift = xopts[x]+8;
+			else if (!strcmp(xopts[x], "renormalize"))
+				o.renormalize = 1;
+			else if (!strcmp(xopts[x], "no-renormalize"))
+				o.renormalize = 0;
 			else
 				die("Unknown option for merge-recursive: -X%s", xopts[x]);
 		}
@@ -704,7 +718,7 @@
 	opts.verbose_update = 1;
 	opts.merge = 1;
 	opts.fn = twoway_merge;
-	opts.msgs = get_porcelain_error_msgs();
+	setup_unpack_trees_porcelain(&opts, "merge");
 
 	trees[nr_trees] = parse_tree_indirect(head);
 	if (!trees[nr_trees++])
@@ -816,7 +830,7 @@
 	return 0;
 }
 
-static int suggest_conflicts(void)
+static int suggest_conflicts(int renormalizing)
 {
 	FILE *fp;
 	int pos;
@@ -1301,5 +1315,5 @@
 			"stopped before committing as requested\n");
 		return 0;
 	} else
-		return suggest_conflicts();
+		return suggest_conflicts(option_renormalize);
 }
diff --git a/builtin/mv.c b/builtin/mv.c
index 38574b8..cdbb094 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -63,7 +63,7 @@
 	const char **source, **destination, **dest_path;
 	enum update_mode { BOTH = 0, WORKING_DIRECTORY, INDEX } *modes;
 	struct stat st;
-	struct string_list src_for_dst = {NULL, 0, 0, 0};
+	struct string_list src_for_dst = STRING_LIST_INIT_NODUP;
 
 	git_config(git_default_config, NULL);
 
diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 06a38ac..31f5c1c 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -220,7 +220,7 @@
 
 int cmd_name_rev(int argc, const char **argv, const char *prefix)
 {
-	struct object_array revs = { 0, 0, NULL };
+	struct object_array revs = OBJECT_ARRAY_INIT;
 	int all = 0, transform_stdin = 0, allow_undefined = 1, always = 0;
 	struct name_ref_data data = { 0, 0, NULL };
 	struct option opts[] = {
diff --git a/builtin/push.c b/builtin/push.c
index 69bc2f2..e655eb7 100644
--- a/builtin/push.c
+++ b/builtin/push.c
@@ -22,13 +22,13 @@
 
 static const char **refspec;
 static int refspec_nr;
+static int refspec_alloc;
 
 static void add_refspec(const char *ref)
 {
-	int nr = refspec_nr + 1;
-	refspec = xrealloc(refspec, nr * sizeof(char *));
-	refspec[nr-1] = ref;
-	refspec_nr = nr;
+	refspec_nr++;
+	ALLOC_GROW(refspec, refspec_nr, refspec_alloc);
+	refspec[refspec_nr-1] = ref;
 }
 
 static void set_refspecs(const char **refs, int nr)
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index d634b5a..760817d 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -530,7 +530,7 @@
 static void check_aliased_updates(struct command *commands)
 {
 	struct command *cmd;
-	struct string_list ref_list = { NULL, 0, 0, 0 };
+	struct string_list ref_list = STRING_LIST_INIT_NODUP;
 
 	for (cmd = commands; cmd; cmd = cmd->next) {
 		struct string_list_item *item =
diff --git a/builtin/remote.c b/builtin/remote.c
index 6699bc5..48e0a6b 100644
--- a/builtin/remote.c
+++ b/builtin/remote.c
@@ -134,7 +134,7 @@
 static int add(int argc, const char **argv)
 {
 	int fetch = 0, mirror = 0, fetch_tags = TAGS_DEFAULT;
-	struct string_list track = { NULL, 0, 0 };
+	struct string_list track = STRING_LIST_INIT_NODUP;
 	const char *master = NULL;
 	struct remote *remote;
 	struct strbuf buf = STRBUF_INIT, buf2 = STRBUF_INIT;
@@ -596,7 +596,7 @@
 	};
 	struct remote *oldremote, *newremote;
 	struct strbuf buf = STRBUF_INIT, buf2 = STRBUF_INIT, buf3 = STRBUF_INIT;
-	struct string_list remote_branches = { NULL, 0, 0, 0 };
+	struct string_list remote_branches = STRING_LIST_INIT_NODUP;
 	struct rename_info rename;
 	int i;
 
@@ -734,8 +734,8 @@
 	struct remote *remote;
 	struct strbuf buf = STRBUF_INIT;
 	struct known_remotes known_remotes = { NULL, NULL };
-	struct string_list branches = { NULL, 0, 0, 1 };
-	struct string_list skipped = { NULL, 0, 0, 1 };
+	struct string_list branches = STRING_LIST_INIT_DUP;
+	struct string_list skipped = STRING_LIST_INIT_DUP;
 	struct branches_for_remote cb_data;
 	int i, result;
 
@@ -1044,7 +1044,7 @@
 		OPT_END()
 	};
 	struct ref_states states;
-	struct string_list info_list = { NULL, 0, 0, 0 };
+	struct string_list info_list = STRING_LIST_INIT_NODUP;
 	struct show_info info;
 
 	argc = parse_options(argc, argv, NULL, options, builtin_remote_show_usage,
@@ -1483,7 +1483,7 @@
 
 static int show_all(void)
 {
-	struct string_list list = { NULL, 0, 0 };
+	struct string_list list = STRING_LIST_INIT_NODUP;
 	int result;
 
 	list.strdup_strings = 1;
diff --git a/builtin/rerere.c b/builtin/rerere.c
index 0c7202e..642bf35 100644
--- a/builtin/rerere.c
+++ b/builtin/rerere.c
@@ -1,13 +1,16 @@
 #include "builtin.h"
 #include "cache.h"
 #include "dir.h"
+#include "parse-options.h"
 #include "string-list.h"
 #include "rerere.h"
 #include "xdiff/xdiff.h"
 #include "xdiff-interface.h"
 
-static const char git_rerere_usage[] =
-"git rerere [clear | status | diff | gc]";
+static const char * const rerere_usage[] = {
+	"git rerere [clear | status | diff | gc]",
+	NULL,
+};
 
 /* these values are days */
 static int cutoff_noresolve = 15;
@@ -46,7 +49,7 @@
 
 static void garbage_collect(struct string_list *rr)
 {
-	struct string_list to_remove = { NULL, 0, 0, 1 };
+	struct string_list to_remove = STRING_LIST_INIT_DUP;
 	DIR *dir;
 	struct dirent *e;
 	int i, cutoff;
@@ -113,26 +116,27 @@
 
 int cmd_rerere(int argc, const char **argv, const char *prefix)
 {
-	struct string_list merge_rr = { NULL, 0, 0, 1 };
-	int i, fd, flags = 0;
+	struct string_list merge_rr = STRING_LIST_INIT_DUP;
+	int i, fd, autoupdate = -1, flags = 0;
 
-	if (2 < argc) {
-		if (!strcmp(argv[1], "-h"))
-			usage(git_rerere_usage);
-		if (!strcmp(argv[1], "--rerere-autoupdate"))
-			flags = RERERE_AUTOUPDATE;
-		else if (!strcmp(argv[1], "--no-rerere-autoupdate"))
-			flags = RERERE_NOAUTOUPDATE;
-		if (flags) {
-			argc--;
-			argv++;
-		}
-	}
-	if (argc < 2)
+	struct option options[] = {
+		OPT_SET_INT(0, "rerere-autoupdate", &autoupdate,
+			"register clean resolutions in index", 1),
+		OPT_END(),
+	};
+
+	argc = parse_options(argc, argv, prefix, options, rerere_usage, 0);
+
+	if (autoupdate == 1)
+		flags = RERERE_AUTOUPDATE;
+	if (autoupdate == 0)
+		flags = RERERE_NOAUTOUPDATE;
+
+	if (argc < 1)
 		return rerere(flags);
 
-	if (!strcmp(argv[1], "forget")) {
-		const char **pathspec = get_pathspec(prefix, argv + 2);
+	if (!strcmp(argv[0], "forget")) {
+		const char **pathspec = get_pathspec(prefix, argv + 1);
 		return rerere_forget(pathspec);
 	}
 
@@ -140,26 +144,26 @@
 	if (fd < 0)
 		return 0;
 
-	if (!strcmp(argv[1], "clear")) {
+	if (!strcmp(argv[0], "clear")) {
 		for (i = 0; i < merge_rr.nr; i++) {
 			const char *name = (const char *)merge_rr.items[i].util;
 			if (!has_rerere_resolution(name))
 				unlink_rr_item(name);
 		}
 		unlink_or_warn(git_path("MERGE_RR"));
-	} else if (!strcmp(argv[1], "gc"))
+	} else if (!strcmp(argv[0], "gc"))
 		garbage_collect(&merge_rr);
-	else if (!strcmp(argv[1], "status"))
+	else if (!strcmp(argv[0], "status"))
 		for (i = 0; i < merge_rr.nr; i++)
 			printf("%s\n", merge_rr.items[i].string);
-	else if (!strcmp(argv[1], "diff"))
+	else if (!strcmp(argv[0], "diff"))
 		for (i = 0; i < merge_rr.nr; i++) {
 			const char *path = merge_rr.items[i].string;
 			const char *name = (const char *)merge_rr.items[i].util;
 			diff_two(rerere_path(name, "preimage"), path, path, path);
 		}
 	else
-		usage(git_rerere_usage);
+		usage_with_options(rerere_usage, options);
 
 	string_list_clear(&merge_rr, 1);
 	return 0;
diff --git a/builtin/reset.c b/builtin/reset.c
index 1283068..0037be4 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -318,7 +318,7 @@
 	 * affecting the working tree nor HEAD. */
 	if (i < argc) {
 		if (reset_type == MIXED)
-			warning("--mixed option is deprecated with paths.");
+			warning("--mixed with paths is deprecated; use 'git reset -- <paths>' instead.");
 		else if (reset_type != NONE)
 			die("Cannot do %s reset with paths.",
 					reset_type_names[reset_type]);
diff --git a/builtin/revert.c b/builtin/revert.c
index 8b9d829..4b47ace 100644
--- a/builtin/revert.c
+++ b/builtin/revert.c
@@ -102,9 +102,9 @@
 static int get_message(const char *raw_message, struct commit_message *out)
 {
 	const char *encoding;
-	const char *p, *abbrev, *eol;
+	const char *abbrev, *subject;
+	int abbrev_len, subject_len;
 	char *q;
-	int abbrev_len, oneline_len;
 
 	if (!raw_message)
 		return -1;
@@ -125,27 +125,17 @@
 	abbrev = find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV);
 	abbrev_len = strlen(abbrev);
 
-	/* Find beginning and end of commit subject. */
-	p = out->message;
-	while (*p && (*p != '\n' || p[1] != '\n'))
-		p++;
-	if (*p) {
-		p += 2;
-		for (eol = p + 1; *eol && *eol != '\n'; eol++)
-			; /* do nothing */
-	} else
-		eol = p;
-	oneline_len = eol - p;
+	subject_len = find_commit_subject(out->message, &subject);
 
 	out->parent_label = xmalloc(strlen("parent of ") + abbrev_len +
-			      strlen("... ") + oneline_len + 1);
+			      strlen("... ") + subject_len + 1);
 	q = out->parent_label;
 	q = mempcpy(q, "parent of ", strlen("parent of "));
 	out->label = q;
 	q = mempcpy(q, abbrev, abbrev_len);
 	q = mempcpy(q, "... ", strlen("... "));
 	out->subject = q;
-	q = mempcpy(q, p, oneline_len);
+	q = mempcpy(q, subject, subject_len);
 	*q = '\0';
 	return 0;
 }
@@ -241,27 +231,30 @@
 			sha1_to_hex(commit->object.sha1));
 }
 
-static char *help_msg(void)
+static void advise(const char *advice, ...)
 {
-	struct strbuf helpbuf = STRBUF_INIT;
+	va_list params;
+
+	va_start(params, advice);
+	vreportf("hint: ", advice, params);
+	va_end(params);
+}
+
+static void print_advice(void)
+{
 	char *msg = getenv("GIT_CHERRY_PICK_HELP");
 
-	if (msg)
-		return msg;
-
-	strbuf_addstr(&helpbuf, "  After resolving the conflicts,\n"
-		"mark the corrected paths with 'git add <paths>' or 'git rm <paths>'\n"
-		"and commit the result");
-
-	if (action == CHERRY_PICK) {
-		strbuf_addf(&helpbuf, " with: \n"
-			"\n"
-			"        git commit -c %s\n",
-			    sha1_to_hex(commit->object.sha1));
+	if (msg) {
+		fprintf(stderr, "%s\n", msg);
+		return;
 	}
-	else
-		strbuf_addch(&helpbuf, '.');
-	return strbuf_detach(&helpbuf, NULL);
+
+	advise("after resolving the conflicts, mark the corrected paths");
+	advise("with 'git add <paths>' or 'git rm <paths>'");
+
+	if (action == CHERRY_PICK)
+		advise("and commit the result with 'git commit -c %s'",
+		       find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV));
 }
 
 static void write_message(struct strbuf *msgbuf, const char *filename)
@@ -311,10 +304,9 @@
 	return write_ref_sha1(ref_lock, to, "cherry-pick");
 }
 
-static void do_recursive_merge(struct commit *base, struct commit *next,
-			       const char *base_label, const char *next_label,
-			       unsigned char *head, struct strbuf *msgbuf,
-			       char *defmsg)
+static int do_recursive_merge(struct commit *base, struct commit *next,
+			      const char *base_label, const char *next_label,
+			      unsigned char *head, struct strbuf *msgbuf)
 {
 	struct merge_options o;
 	struct tree *result, *next_tree, *base_tree, *head_tree;
@@ -324,6 +316,13 @@
 	index_fd = hold_locked_index(&index_lock, 1);
 
 	read_cache();
+
+	/*
+	 * NEEDSWORK: cherry-picking between branches with
+	 * different end-of-line normalization is a pain;
+	 * plumb in an option to set o.renormalize?
+	 * (or better: arbitrary -X options)
+	 */
 	init_merge_options(&o);
 	o.ancestor = base ? base_label : "(empty tree)";
 	o.branch1 = "HEAD";
@@ -357,14 +356,35 @@
 					i++;
 			}
 		}
-		write_message(msgbuf, defmsg);
-		fprintf(stderr, "Automatic %s failed.%s\n",
-			me, help_msg());
-		rerere(allow_rerere_auto);
-		exit(1);
 	}
-	write_message(msgbuf, defmsg);
-	fprintf(stderr, "Finished one %s.\n", me);
+
+	return !clean;
+}
+
+/*
+ * If we are cherry-pick, and if the merge did not result in
+ * hand-editing, we will hit this commit and inherit the original
+ * author date and name.
+ * If we are revert, or if our cherry-pick results in a hand merge,
+ * we had better say that the current user is responsible for that.
+ */
+static int run_git_commit(const char *defmsg)
+{
+	/* 6 is max possible length of our args array including NULL */
+	const char *args[6];
+	int i = 0;
+
+	args[i++] = "commit";
+	args[i++] = "-n";
+	if (signoff)
+		args[i++] = "-s";
+	if (!edit) {
+		args[i++] = "-F";
+		args[i++] = defmsg;
+	}
+	args[i] = NULL;
+
+	return run_command_v_opt(args, RUN_GIT_CMD);
 }
 
 static int do_pick_commit(void)
@@ -375,6 +395,7 @@
 	struct commit_message msg = { NULL, NULL, NULL, NULL, NULL };
 	char *defmsg = NULL;
 	struct strbuf msgbuf = STRBUF_INIT;
+	int res;
 
 	if (no_commit) {
 		/*
@@ -470,63 +491,40 @@
 		}
 	}
 
-	if (!strategy || !strcmp(strategy, "recursive") || action == REVERT)
-		do_recursive_merge(base, next, base_label, next_label,
-				   head, &msgbuf, defmsg);
-	else {
-		int res;
+	if (!strategy || !strcmp(strategy, "recursive") || action == REVERT) {
+		res = do_recursive_merge(base, next, base_label, next_label,
+					 head, &msgbuf);
+		write_message(&msgbuf, defmsg);
+	} else {
 		struct commit_list *common = NULL;
 		struct commit_list *remotes = NULL;
+
 		write_message(&msgbuf, defmsg);
+
 		commit_list_insert(base, &common);
 		commit_list_insert(next, &remotes);
 		res = try_merge_command(strategy, common,
 					sha1_to_hex(head), remotes);
 		free_commit_list(common);
 		free_commit_list(remotes);
-		if (res) {
-			fprintf(stderr, "Automatic %s with strategy %s failed.%s\n",
-				me, strategy, help_msg());
-			rerere(allow_rerere_auto);
-			exit(1);
-		}
+	}
+
+	if (res) {
+		error("could not %s %s... %s",
+		      action == REVERT ? "revert" : "apply",
+		      find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV),
+		      msg.subject);
+		print_advice();
+		rerere(allow_rerere_auto);
+	} else {
+		if (!no_commit)
+			res = run_git_commit(defmsg);
 	}
 
 	free_message(&msg);
-
-	/*
-	 *
-	 * If we are cherry-pick, and if the merge did not result in
-	 * hand-editing, we will hit this commit and inherit the original
-	 * author date and name.
-	 * If we are revert, or if our cherry-pick results in a hand merge,
-	 * we had better say that the current user is responsible for that.
-	 */
-
-	if (!no_commit) {
-		/* 6 is max possible length of our args array including NULL */
-		const char *args[6];
-		int res;
-		int i = 0;
-
-		args[i++] = "commit";
-		args[i++] = "-n";
-		if (signoff)
-			args[i++] = "-s";
-		if (!edit) {
-			args[i++] = "-F";
-			args[i++] = defmsg;
-		}
-		args[i] = NULL;
-		res = run_command_v_opt(args, RUN_GIT_CMD);
-		free(defmsg);
-
-		return res;
-	}
-
 	free(defmsg);
 
-	return 0;
+	return res;
 }
 
 static void prepare_revs(struct rev_info *revs)
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 0a9681b..2135b0d 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -249,7 +249,7 @@
 {
 	static struct shortlog log;
 	static struct rev_info rev;
-	int nongit;
+	int nongit = !startup_info->have_repository;
 
 	static const struct option options[] = {
 		OPT_BOOLEAN('n', "numbered", &log.sort_by_number,
@@ -265,7 +265,6 @@
 
 	struct parse_opt_ctx_t ctx;
 
-	prefix = setup_git_directory_gently(&nongit);
 	git_config(git_default_config, NULL);
 	shortlog_init(&log);
 	init_revisions(&rev, prefix);
diff --git a/builtin/show-ref.c b/builtin/show-ref.c
index 0b2a9ad..be9b512 100644
--- a/builtin/show-ref.c
+++ b/builtin/show-ref.c
@@ -120,7 +120,7 @@
  */
 static int exclude_existing(const char *match)
 {
-	static struct string_list existing_refs = { NULL, 0, 0, 0 };
+	static struct string_list existing_refs = STRING_LIST_INIT_NODUP;
 	char buf[1024];
 	int matchlen = match ? strlen(match) : 0;
 
diff --git a/builtin/var.c b/builtin/var.c
index 70fdb4d..0744bb8 100644
--- a/builtin/var.c
+++ b/builtin/var.c
@@ -74,14 +74,9 @@
 
 int cmd_var(int argc, const char **argv, const char *prefix)
 {
-	const char *val;
-	int nongit;
-	if (argc != 2) {
+	const char *val = NULL;
+	if (argc != 2)
 		usage(var_usage);
-	}
-
-	setup_git_directory_gently(&nongit);
-	val = NULL;
 
 	if (strcmp(argv[1], "-l") == 0) {
 		git_config(show_config, NULL);
diff --git a/cache.h b/cache.h
index 1e690d1..be02a42 100644
--- a/cache.h
+++ b/cache.h
@@ -179,8 +179,7 @@
 #define CE_UNHASHED  (0x200000)
 #define CE_CONFLICTED (0x800000)
 
-/* Only remove in work directory, not index */
-#define CE_WT_REMOVE (0x400000)
+#define CE_WT_REMOVE (0x400000) /* remove in work directory */
 
 #define CE_UNPACKED  (0x1000000)
 
@@ -641,6 +640,9 @@
 /* Return a statically allocated filename matching the sha1 signature */
 extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
 extern char *git_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+extern char *git_path_submodule(const char *path, const char *fmt, ...)
+	__attribute__((format (printf, 2, 3)));
+
 extern char *sha1_file_name(const unsigned char *sha1);
 extern char *sha1_pack_name(const unsigned char *sha1);
 extern char *sha1_pack_index_name(const unsigned char *sha1);
@@ -811,6 +813,7 @@
 			       char *timebuf,
 			       size_t timebuf_size);
 int parse_date(const char *date, char *buf, int bufsize);
+int parse_date_basic(const char *date, unsigned long *timestamp, int *offset);
 void datestamp(char *buf, int bufsize);
 #define approxidate(s) approxidate_careful((s), NULL)
 unsigned long approxidate_careful(const char *, int *);
@@ -1054,6 +1057,7 @@
 extern int convert_to_git(const char *path, const char *src, size_t len,
                           struct strbuf *dst, enum safe_crlf checksafe);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
 
 /* add */
 /*
@@ -1096,6 +1100,14 @@
 
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
+/* Takes a negative value returned by split_cmdline */
+const char *split_cmdline_strerror(int cmdline_errno);
+
+/* git.c */
+struct startup_info {
+	int have_repository;
+};
+extern struct startup_info *startup_info;
 
 /* builtin/merge.c */
 int checkout_fast_forward(const unsigned char *from, const unsigned char *to);
diff --git a/commit.c b/commit.c
index e9b0750..0094ec1 100644
--- a/commit.c
+++ b/commit.c
@@ -315,6 +315,25 @@
 	return ret;
 }
 
+int find_commit_subject(const char *commit_buffer, const char **subject)
+{
+	const char *eol;
+	const char *p = commit_buffer;
+
+	while (*p && (*p != '\n' || p[1] != '\n'))
+		p++;
+	if (*p) {
+		p += 2;
+		for (eol = p; *eol && *eol != '\n'; eol++)
+			; /* do nothing */
+	} else
+		eol = p;
+
+	*subject = p;
+
+	return eol - p;
+}
+
 struct commit_list *commit_list_insert(struct commit *item, struct commit_list **list_p)
 {
 	struct commit_list *new_list = xmalloc(sizeof(struct commit_list));
diff --git a/commit.h b/commit.h
index eb2b8ac..9113bbe 100644
--- a/commit.h
+++ b/commit.h
@@ -41,6 +41,9 @@
 
 int parse_commit(struct commit *item);
 
+/* Find beginning and length of commit subject. */
+int find_commit_subject(const char *commit_buffer, const char **subject);
+
 struct commit_list * commit_list_insert(struct commit *item, struct commit_list **list_p);
 unsigned commit_list_count(const struct commit_list *l);
 struct commit_list * insert_by_date(struct commit *item, struct commit_list **list);
diff --git a/compat/regex/regcomp.c b/compat/regex/regcomp.c
new file mode 100644
index 0000000..8c96ed9
--- /dev/null
+++ b/compat/regex/regcomp.c
@@ -0,0 +1,3884 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301 USA.  */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+					  size_t length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+				     const re_dfastate_t *init_state,
+				     char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void optimize_utf8 (re_dfa_t *dfa);
+#endif
+static reg_errcode_t analyze (regex_t *preg);
+static reg_errcode_t preorder (bin_tree_t *root,
+			       reg_errcode_t (fn (void *, bin_tree_t *)),
+			       void *extra);
+static reg_errcode_t postorder (bin_tree_t *root,
+				reg_errcode_t (fn (void *, bin_tree_t *)),
+				void *extra);
+static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
+static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
+static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
+				 bin_tree_t *node);
+static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
+static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
+static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
+static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
+				   unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+					 int node, int root);
+static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+			 reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+			reg_syntax_t syntax) internal_function;
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+			  reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+				  re_token_t *token, reg_syntax_t syntax,
+				  int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+				 re_token_t *token, reg_syntax_t syntax,
+				 int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+				     re_token_t *token, reg_syntax_t syntax,
+				     int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+				  re_token_t *token, reg_syntax_t syntax,
+				  int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+				 re_dfa_t *dfa, re_token_t *token,
+				 reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+				      re_token_t *token, reg_syntax_t syntax,
+				      reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+					    re_string_t *regexp,
+					    re_token_t *token, int token_len,
+					    re_dfa_t *dfa,
+					    reg_syntax_t syntax,
+					    int accept_hyphen);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+					  re_string_t *regexp,
+					  re_token_t *token);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+					re_charset_t *mbcset,
+					int *equiv_class_alloc,
+					const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+				      bitset_t sbcset,
+				      re_charset_t *mbcset,
+				      int *char_class_alloc,
+				      const char *class_name,
+				      reg_syntax_t syntax);
+#else  /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+					const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+				      bitset_t sbcset,
+				      const char *class_name,
+				      reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
+				       RE_TRANSLATE_TYPE trans,
+				       const char *class_name,
+				       const char *extra,
+				       int non_match, reg_errcode_t *err);
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+				bin_tree_t *left, bin_tree_t *right,
+				re_token_type_t type);
+static bin_tree_t *create_token_tree (re_dfa_t *dfa,
+				      bin_tree_t *left, bin_tree_t *right,
+				      const re_token_t *token);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+static void free_token (re_token_t *node);
+static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
+static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+
+/* This table gives an error message for each of the error codes listed
+   in regex.h.  Obviously the order here has to be same as there.
+   POSIX doesn't require that we do anything for REG_NOERROR,
+   but why not be nice?  */
+
+const char __re_error_msgid[] attribute_hidden =
+  {
+#define REG_NOERROR_IDX	0
+    gettext_noop ("Success")	/* REG_NOERROR */
+    "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+    gettext_noop ("No match")	/* REG_NOMATCH */
+    "\0"
+#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
+    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+    "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+    "\0"
+#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+    "\0"
+#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
+    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+    "\0"
+#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
+    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+    "\0"
+#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
+    gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
+    "\0"
+#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+    "\0"
+#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+    "\0"
+#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
+    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+    "\0"
+#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+    gettext_noop ("Invalid range end")	/* REG_ERANGE */
+    "\0"
+#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
+    gettext_noop ("Memory exhausted") /* REG_ESPACE */
+    "\0"
+#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
+    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+    "\0"
+#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+    gettext_noop ("Premature end of regular expression") /* REG_EEND */
+    "\0"
+#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
+    gettext_noop ("Regular expression too big") /* REG_ESIZE */
+    "\0"
+#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
+    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+  };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+  {
+    REG_NOERROR_IDX,
+    REG_NOMATCH_IDX,
+    REG_BADPAT_IDX,
+    REG_ECOLLATE_IDX,
+    REG_ECTYPE_IDX,
+    REG_EESCAPE_IDX,
+    REG_ESUBREG_IDX,
+    REG_EBRACK_IDX,
+    REG_EPAREN_IDX,
+    REG_EBRACE_IDX,
+    REG_BADBR_IDX,
+    REG_ERANGE_IDX,
+    REG_ESPACE_IDX,
+    REG_BADRPT_IDX,
+    REG_EEND_IDX,
+    REG_ESIZE_IDX,
+    REG_ERPAREN_IDX
+  };
+
+/* Entry points for GNU code.  */
+
+
+#ifdef ZOS_USS
+
+/* For ZOS USS we must define btowc */
+
+wchar_t 
+btowc (int c)
+{
+   wchar_t wtmp[2];
+   char tmp[2];
+
+   tmp[0] = c;
+   tmp[1] = 0;
+
+   mbtowc (wtmp, tmp, 1);
+   return wtmp[0];
+}
+#endif
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+   Returns 0 if the pattern was valid, otherwise an error string.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+   are set in BUFP on entry.  */
+
+const char *
+re_compile_pattern (const char *pattern,
+		    size_t length,
+		    struct re_pattern_buffer *bufp)
+{
+  reg_errcode_t ret;
+
+  /* And GNU code determines whether or not to get register information
+     by passing null for the REGS argument to re_match, etc., not by
+     setting no_sub, unless RE_NO_SUB is set.  */
+  bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
+
+  /* Match anchors at newline.  */
+  bufp->newline_anchor = 1;
+
+  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+  if (!ret)
+    return NULL;
+  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+   also be assigned to arbitrarily: each pattern buffer stores its own
+   syntax, so it can be changed between regex compilations.  */
+/* This has no initializer because initialized variables in Emacs
+   become read-only after dumping.  */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+
+   The argument SYNTAX is a bit mask comprised of the various bits
+   defined in regex.h.  We return the old syntax.  */
+
+reg_syntax_t
+re_set_syntax (reg_syntax_t syntax)
+{
+  reg_syntax_t ret = re_syntax_options;
+
+  re_syntax_options = syntax;
+  return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (struct re_pattern_buffer *bufp)
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  char *fastmap = bufp->fastmap;
+
+  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+  if (dfa->init_state != dfa->init_state_word)
+    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+  if (dfa->init_state != dfa->init_state_nl)
+    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+  if (dfa->init_state != dfa->init_state_begbuf)
+    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+  bufp->fastmap_accurate = 1;
+  return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+__attribute ((always_inline))
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+  fastmap[ch] = 1;
+  if (icase)
+    fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+   Compile fastmap for the initial_state INIT_STATE.  */
+
+static void
+re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
+			 char *fastmap)
+{
+  volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  int node_cnt;
+  int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
+  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+    {
+      int node = init_state->nodes.elems[node_cnt];
+      re_token_type_t type = dfa->nodes[node].type;
+
+      if (type == CHARACTER)
+	{
+	  re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+#ifdef RE_ENABLE_I18N
+	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+	    {
+	      unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p;
+	      wchar_t wc;
+	      mbstate_t state;
+
+	      p = buf;
+	      *p++ = dfa->nodes[node].opr.c;
+	      while (++node < dfa->nodes_len
+		     &&	dfa->nodes[node].type == CHARACTER
+		     && dfa->nodes[node].mb_partial)
+		*p++ = dfa->nodes[node].opr.c;
+	      memset (&state, '\0', sizeof (state));
+	      if (__mbrtowc (&wc, (const char *) buf, p - buf,
+			     &state) == p - buf
+		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
+		      != (size_t) -1))
+		re_set_fastmap (fastmap, 0, buf[0]);
+	      re_free (buf);
+	    }
+#endif
+	}
+      else if (type == SIMPLE_BRACKET)
+	{
+	  int i, ch;
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    {
+	      int j;
+	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+		if (w & ((bitset_word_t) 1 << j))
+		  re_set_fastmap (fastmap, icase, ch);
+	    }
+	}
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET)
+	{
+	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+	  int i;
+
+# ifdef _LIBC
+	  /* See if we have to try all bytes which start multiple collation
+	     elements.
+	     e.g. In da_DK, we want to catch 'a' since "aa" is a valid
+		  collation element, and don't catch 'b' since 'b' is
+		  the only collation element which starts from 'b' (and
+		  it is caught by SIMPLE_BRACKET).  */
+	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
+		  && (cset->ncoll_syms || cset->nranges))
+		{
+		  const int32_t *table = (const int32_t *)
+		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+		  for (i = 0; i < SBC_MAX; ++i)
+		    if (table[i] < 0)
+		      re_set_fastmap (fastmap, icase, i);
+		}
+# endif /* _LIBC */
+
+	  /* See if we have to start the match at all multibyte characters,
+	     i.e. where we would not find an invalid sequence.  This only
+	     applies to multibyte character sets; for single byte character
+	     sets, the SIMPLE_BRACKET again suffices.  */
+	  if (dfa->mb_cur_max > 1
+	      && (cset->nchar_classes || cset->non_match || cset->nranges
+# ifdef _LIBC
+		  || cset->nequiv_classes
+# endif /* _LIBC */
+		 ))
+	    {
+	      unsigned char c = 0;
+	      do
+		{
+		  mbstate_t mbs;
+		  memset (&mbs, 0, sizeof (mbs));
+		  if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
+		    re_set_fastmap (fastmap, false, (int) c);
+		}
+	      while (++c != 0);
+	    }
+
+	  else
+	    {
+	      /* ... Else catch all bytes which can start the mbchars.  */
+	      for (i = 0; i < cset->nmbchars; ++i)
+		{
+		  char buf[256];
+		  mbstate_t state;
+		  memset (&state, '\0', sizeof (state));
+		  if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+		    re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+		  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+		    {
+		      if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+			  != (size_t) -1)
+			re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+		    }
+		}
+	    }
+	}
+#endif /* RE_ENABLE_I18N */
+      else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+	       || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+	       || type == END_OF_RE)
+	{
+	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+	  if (type == END_OF_RE)
+	    bufp->can_be_null = 1;
+	  return;
+	}
+    }
+}
+
+/* Entry point for POSIX code.  */
+/* regcomp takes a regular expression as a string and compiles it.
+
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
+   since POSIX says we shouldn't.  Thus, we set
+
+     `buffer' to the compiled pattern;
+     `used' to the length of the compiled pattern;
+     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+       REG_EXTENDED bit in CFLAGS is set; otherwise, to
+       RE_SYNTAX_POSIX_BASIC;
+     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+     `fastmap' to an allocated space for the fastmap;
+     `fastmap_accurate' to zero;
+     `re_nsub' to the number of subexpressions in PATTERN.
+
+   PATTERN is the address of the pattern string.
+
+   CFLAGS is a series of bits which affect compilation.
+
+     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+     use POSIX basic syntax.
+
+     If REG_NEWLINE is set, then . and [^...] don't match newline.
+     Also, regexec will try a match beginning after every newline.
+
+     If REG_ICASE is set, then we considers upper- and lowercase
+     versions of letters to be equivalent when matching.
+
+     If REG_NOSUB is set, then when PREG is passed to regexec, that
+     routine will report only success or failure, and nothing about the
+     registers.
+
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
+   the return codes and their meanings.)  */
+
+int
+regcomp (regex_t *__restrict preg,
+	 const char *__restrict pattern,
+	 int cflags)
+{
+  reg_errcode_t ret;
+  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+			 : RE_SYNTAX_POSIX_BASIC);
+
+  preg->buffer = NULL;
+  preg->allocated = 0;
+  preg->used = 0;
+
+  /* Try to allocate space for the fastmap.  */
+  preg->fastmap = re_malloc (char, SBC_MAX);
+  if (BE (preg->fastmap == NULL, 0))
+    return REG_ESPACE;
+
+  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+  /* If REG_NEWLINE is set, newlines are treated differently.  */
+  if (cflags & REG_NEWLINE)
+    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
+      syntax &= ~RE_DOT_NEWLINE;
+      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+      /* It also changes the matching behavior.  */
+      preg->newline_anchor = 1;
+    }
+  else
+    preg->newline_anchor = 0;
+  preg->no_sub = !!(cflags & REG_NOSUB);
+  preg->translate = NULL;
+
+  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+  /* POSIX doesn't distinguish between an unmatched open-group and an
+     unmatched close-group: both are REG_EPAREN.  */
+  if (ret == REG_ERPAREN)
+    ret = REG_EPAREN;
+
+  /* We have already checked preg->fastmap != NULL.  */
+  if (BE (ret == REG_NOERROR, 1))
+    /* Compute the fastmap now, since regexec cannot modify the pattern
+       buffer.  This function never fails in this implementation.  */
+    (void) re_compile_fastmap (preg);
+  else
+    {
+      /* Some error occurred while compiling the expression.  */
+      re_free (preg->fastmap);
+      preg->fastmap = NULL;
+    }
+
+  return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+   from either regcomp or regexec.   We don't use PREG here.  */
+
+size_t
+regerror(int errcode, const regex_t *__restrict preg,
+	 char *__restrict errbuf, size_t errbuf_size)
+{
+  const char *msg;
+  size_t msg_size;
+
+  if (BE (errcode < 0
+	  || errcode >= (int) (sizeof (__re_error_msgid_idx)
+			       / sizeof (__re_error_msgid_idx[0])), 0))
+    /* Only error codes returned by the rest of the code should be passed
+       to this routine.  If we are given anything else, or if other regex
+       code generates an invalid error code, then the program has a bug.
+       Dump core so we can fix it.  */
+    abort ();
+
+  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+  msg_size = strlen (msg) + 1; /* Includes the null.  */
+
+  if (BE (errbuf_size != 0, 1))
+    {
+      if (BE (msg_size > errbuf_size, 0))
+	{
+	  memcpy (errbuf, msg, errbuf_size - 1);
+	  errbuf[errbuf_size - 1] = 0;
+	}
+      else
+	memcpy (errbuf, msg, msg_size);
+    }
+
+  return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+#ifdef RE_ENABLE_I18N
+/* This static array is used for the map to single-byte characters when
+   UTF-8 is used.  Otherwise we would allocate memory just to initialize
+   it the same all the time.  UTF-8 is the preferred encoding so this is
+   a worthwhile optimization.  */
+#if __GNUC__ >= 3
+static const bitset_t utf8_sb_map = {
+  /* Set the first 128 bits.  */
+  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
+};
+#else /* ! (__GNUC__ >= 3) */
+static bitset_t utf8_sb_map;
+#endif /* __GNUC__ >= 3 */
+#endif /* RE_ENABLE_I18N */
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+  int i, j;
+
+  if (dfa->nodes)
+    for (i = 0; i < dfa->nodes_len; ++i)
+      free_token (dfa->nodes + i);
+  re_free (dfa->nexts);
+  for (i = 0; i < dfa->nodes_len; ++i)
+    {
+      if (dfa->eclosures != NULL)
+	re_node_set_free (dfa->eclosures + i);
+      if (dfa->inveclosures != NULL)
+	re_node_set_free (dfa->inveclosures + i);
+      if (dfa->edests != NULL)
+	re_node_set_free (dfa->edests + i);
+    }
+  re_free (dfa->edests);
+  re_free (dfa->eclosures);
+  re_free (dfa->inveclosures);
+  re_free (dfa->nodes);
+
+  if (dfa->state_table)
+    for (i = 0; i <= dfa->state_hash_mask; ++i)
+      {
+	struct re_state_table_entry *entry = dfa->state_table + i;
+	for (j = 0; j < entry->num; ++j)
+	  {
+	    re_dfastate_t *state = entry->array[j];
+	    free_state (state);
+	  }
+	re_free (entry->array);
+      }
+  re_free (dfa->state_table);
+#ifdef RE_ENABLE_I18N
+  if (dfa->sb_char != utf8_sb_map)
+    re_free (dfa->sb_char);
+#endif
+  re_free (dfa->subexp_map);
+#ifdef DEBUG
+  re_free (dfa->re_str);
+#endif
+
+  re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG.  */
+
+void
+regfree (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  if (BE (dfa != NULL, 1))
+    free_dfa_content (dfa);
+  preg->buffer = NULL;
+  preg->allocated = 0;
+
+  re_free (preg->fastmap);
+  preg->fastmap = NULL;
+
+  re_free (preg->translate);
+  preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer.  */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+   these names if they don't use our functions, and still use
+   regcomp/regexec above without link errors.  */
+weak_function
+# endif
+re_comp (s)
+     const char *s;
+{
+  reg_errcode_t ret;
+  char *fastmap;
+
+  if (!s)
+    {
+      if (!re_comp_buf.buffer)
+	return gettext ("No previous regular expression");
+      return 0;
+    }
+
+  if (re_comp_buf.buffer)
+    {
+      fastmap = re_comp_buf.fastmap;
+      re_comp_buf.fastmap = NULL;
+      __regfree (&re_comp_buf);
+      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+      re_comp_buf.fastmap = fastmap;
+    }
+
+  if (re_comp_buf.fastmap == NULL)
+    {
+      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+      if (re_comp_buf.fastmap == NULL)
+	return (char *) gettext (__re_error_msgid
+				 + __re_error_msgid_idx[(int) REG_ESPACE]);
+    }
+
+  /* Since `re_exec' always passes NULL for the `regs' argument, we
+     don't need to initialize the pattern buffer fields which affect it.  */
+
+  /* Match anchors at newlines.  */
+  re_comp_buf.newline_anchor = 1;
+
+  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+  if (!ret)
+    return NULL;
+
+  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+  __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.
+   Compile the regular expression PATTERN, whose length is LENGTH.
+   SYNTAX indicate regular expression's syntax.  */
+
+static reg_errcode_t
+re_compile_internal (regex_t *preg, const char * pattern, size_t length,
+		     reg_syntax_t syntax)
+{
+  reg_errcode_t err = REG_NOERROR;
+  re_dfa_t *dfa;
+  re_string_t regexp;
+
+  /* Initialize the pattern buffer.  */
+  preg->fastmap_accurate = 0;
+  preg->syntax = syntax;
+  preg->not_bol = preg->not_eol = 0;
+  preg->used = 0;
+  preg->re_nsub = 0;
+  preg->can_be_null = 0;
+  preg->regs_allocated = REGS_UNALLOCATED;
+
+  /* Initialize the dfa.  */
+  dfa = (re_dfa_t *) preg->buffer;
+  if (BE (preg->allocated < sizeof (re_dfa_t), 0))
+    {
+      /* If zero allocated, but buffer is non-null, try to realloc
+	 enough space.  This loses if buffer's address is bogus, but
+	 that is the user's responsibility.  If ->buffer is NULL this
+	 is a simple allocation.  */
+      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+      if (dfa == NULL)
+	return REG_ESPACE;
+      preg->allocated = sizeof (re_dfa_t);
+      preg->buffer = (unsigned char *) dfa;
+    }
+  preg->used = sizeof (re_dfa_t);
+
+  err = init_dfa (dfa, length);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+#ifdef DEBUG
+  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
+  dfa->re_str = re_malloc (char, length + 1);
+  strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+  __libc_lock_init (dfa->lock);
+
+  err = re_string_construct (&regexp, pattern, length, preg->translate,
+			     syntax & RE_ICASE, dfa);
+  if (BE (err != REG_NOERROR, 0))
+    {
+    re_compile_internal_free_return:
+      free_workarea_compile (preg);
+      re_string_destruct (&regexp);
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+
+  /* Parse the regular expression, and build a structure tree.  */
+  preg->re_nsub = 0;
+  dfa->str_tree = parse (&regexp, preg, syntax, &err);
+  if (BE (dfa->str_tree == NULL, 0))
+    goto re_compile_internal_free_return;
+
+  /* Analyze the tree and create the nfa.  */
+  err = analyze (preg);
+  if (BE (err != REG_NOERROR, 0))
+    goto re_compile_internal_free_return;
+
+#ifdef RE_ENABLE_I18N
+  /* If possible, do searching in single byte encoding to speed things up.  */
+  if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
+    optimize_utf8 (dfa);
+#endif
+
+  /* Then create the initial state of the dfa.  */
+  err = create_initial_state (dfa);
+
+  /* Release work areas.  */
+  free_workarea_compile (preg);
+  re_string_destruct (&regexp);
+
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+    }
+
+  return err;
+}
+
+/* Initialize DFA.  We use the length of the regular expression PAT_LEN
+   as the initial length of some arrays.  */
+
+static reg_errcode_t
+init_dfa (re_dfa_t *dfa, size_t pat_len)
+{
+  unsigned int table_size;
+#ifndef _LIBC
+  char *codeset_name;
+#endif
+
+  memset (dfa, '\0', sizeof (re_dfa_t));
+
+  /* Force allocation of str_tree_storage the first time.  */
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
+  /* Avoid overflows.  */
+  if (pat_len == SIZE_MAX)
+    return REG_ESPACE;
+
+  dfa->nodes_alloc = pat_len + 1;
+  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+  /*  table_size = 2 ^ ceil(log pat_len) */
+  for (table_size = 1; ; table_size <<= 1)
+    if (table_size > pat_len)
+      break;
+
+  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+  dfa->state_hash_mask = table_size - 1;
+
+  dfa->mb_cur_max = MB_CUR_MAX;
+#ifdef _LIBC
+  if (dfa->mb_cur_max == 6
+      && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
+    dfa->is_utf8 = 1;
+  dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
+		       != 0);
+#else
+# ifdef HAVE_LANGINFO_CODESET
+  codeset_name = nl_langinfo (CODESET);
+# else
+  codeset_name = getenv ("LC_ALL");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LC_CTYPE");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LANG");
+  if (codeset_name == NULL)
+    codeset_name = "";
+  else if (strchr (codeset_name, '.') !=  NULL)
+    codeset_name = strchr (codeset_name, '.') + 1;
+# endif
+
+  /* strcasecmp isn't a standard interface. brute force check */
+#if 0
+  if (strcasecmp (codeset_name, "UTF-8") == 0
+      || strcasecmp (codeset_name, "UTF8") == 0)
+    dfa->is_utf8 = 1;
+#else
+  if (   (codeset_name[0] == 'U' || codeset_name[0] == 'u')
+      && (codeset_name[1] == 'T' || codeset_name[1] == 't')
+      && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
+      && (codeset_name[3] == '-'
+          ? codeset_name[4] == '8' && codeset_name[5] == '\0'
+          : codeset_name[3] == '8' && codeset_name[4] == '\0'))
+    dfa->is_utf8 = 1;
+#endif
+
+  /* We check exhaustively in the loop below if this charset is a
+     superset of ASCII.  */
+  dfa->map_notascii = 0;
+#endif
+
+#ifdef RE_ENABLE_I18N
+  if (dfa->mb_cur_max > 1)
+    {
+      if (dfa->is_utf8)
+        {
+#if !defined(__GNUC__) || __GNUC__ < 3
+	  static short utf8_sb_map_inited = 0;
+
+	  if (! utf8_sb_map_inited)
+	    {
+		int i;
+
+	  	utf8_sb_map_inited = 0;
+		for (i = 0; i <= 0x80 / BITSET_WORD_BITS - 1; i++)
+		  utf8_sb_map[i] = BITSET_WORD_MAX;
+	    }
+#endif
+	  dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
+	}
+      else
+	{
+	  int i, j, ch;
+
+	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+	  if (BE (dfa->sb_char == NULL, 0))
+	    return REG_ESPACE;
+
+	  /* Set the bits corresponding to single byte chars.  */
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+	      {
+		wint_t wch = __btowc (ch);
+		if (wch != WEOF)
+		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
+# ifndef _LIBC
+		if (isascii (ch) && wch != ch)
+		  dfa->map_notascii = 1;
+# endif
+	      }
+	}
+    }
+#endif
+
+  if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+   "word".  In this case "word" means that it is the word construction
+   character used by some operators like "\<", "\>", etc.  */
+
+static void
+internal_function
+init_word_char (re_dfa_t *dfa)
+{
+  int i, j, ch;
+  dfa->word_ops_used = 1;
+  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+      if (isalnum (ch) || ch == '_')
+	dfa->word_char[i] |= (bitset_word_t) 1 << j;
+}
+
+/* Free the work area which are only used while compiling.  */
+
+static void
+free_workarea_compile (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_storage_t *storage, *next;
+  for (storage = dfa->str_tree_storage; storage; storage = next)
+    {
+      next = storage->next;
+      re_free (storage);
+    }
+  dfa->str_tree_storage = NULL;
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+  dfa->str_tree = NULL;
+  re_free (dfa->org_indices);
+  dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts.  */
+
+static reg_errcode_t
+create_initial_state (re_dfa_t *dfa)
+{
+  int first, i;
+  reg_errcode_t err;
+  re_node_set init_nodes;
+
+  /* Initial states have the epsilon closure of the node which is
+     the first node of the regular expression.  */
+  first = dfa->str_tree->first->node_idx;
+  dfa->init_node = first;
+  err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* The back-references which are in initial states can epsilon transit,
+     since in this case all of the subexpressions can be null.
+     Then we add epsilon closures of the nodes which are the next nodes of
+     the back-references.  */
+  if (dfa->nbackref > 0)
+    for (i = 0; i < init_nodes.nelem; ++i)
+      {
+	int node_idx = init_nodes.elems[i];
+	re_token_type_t type = dfa->nodes[node_idx].type;
+
+	int clexp_idx;
+	if (type != OP_BACK_REF)
+	  continue;
+	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+	  {
+	    re_token_t *clexp_node;
+	    clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+	    if (clexp_node->type == OP_CLOSE_SUBEXP
+		&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
+	      break;
+	  }
+	if (clexp_idx == init_nodes.nelem)
+	  continue;
+
+	if (type == OP_BACK_REF)
+	  {
+	    int dest_idx = dfa->edests[node_idx].elems[0];
+	    if (!re_node_set_contains (&init_nodes, dest_idx))
+	      {
+		reg_errcode_t err = re_node_set_merge (&init_nodes,
+						       dfa->eclosures
+						       + dest_idx);
+		if (err != REG_NOERROR)
+		  return err;
+		i = 0;
+	      }
+	  }
+      }
+
+  /* It must be the first time to invoke acquire_state.  */
+  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+  /* We don't check ERR here, since the initial state must not be NULL.  */
+  if (BE (dfa->init_state == NULL, 0))
+    return err;
+  if (dfa->init_state->has_constraint)
+    {
+      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+						       CONTEXT_WORD);
+      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+						     CONTEXT_NEWLINE);
+      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+							 &init_nodes,
+							 CONTEXT_NEWLINE
+							 | CONTEXT_BEGBUF);
+      if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+	      || dfa->init_state_begbuf == NULL, 0))
+	return err;
+    }
+  else
+    dfa->init_state_word = dfa->init_state_nl
+      = dfa->init_state_begbuf = dfa->init_state;
+
+  re_node_set_free (&init_nodes);
+  return REG_NOERROR;
+}
+
+#ifdef RE_ENABLE_I18N
+/* If it is possible to do searching in single byte encoding instead of UTF-8
+   to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
+   DFA nodes where needed.  */
+
+static void
+optimize_utf8 (re_dfa_t *dfa)
+{
+  int node, i, mb_chars = 0, has_period = 0;
+
+  for (node = 0; node < dfa->nodes_len; ++node)
+    switch (dfa->nodes[node].type)
+      {
+      case CHARACTER:
+	if (dfa->nodes[node].opr.c >= 0x80)
+	  mb_chars = 1;
+	break;
+      case ANCHOR:
+	switch (dfa->nodes[node].opr.ctx_type)
+	  {
+	  case LINE_FIRST:
+	  case LINE_LAST:
+	  case BUF_FIRST:
+	  case BUF_LAST:
+	    break;
+	  default:
+	    /* Word anchors etc. cannot be handled.  It's okay to test
+	       opr.ctx_type since constraints (for all DFA nodes) are
+	       created by ORing one or more opr.ctx_type values.  */
+	    return;
+	  }
+	break;
+      case OP_PERIOD:
+	has_period = 1;
+	break;
+      case OP_BACK_REF:
+      case OP_ALT:
+      case END_OF_RE:
+      case OP_DUP_ASTERISK:
+      case OP_OPEN_SUBEXP:
+      case OP_CLOSE_SUBEXP:
+	break;
+      case COMPLEX_BRACKET:
+	return;
+      case SIMPLE_BRACKET:
+	/* Just double check.  The non-ASCII range starts at 0x80.  */
+	assert (0x80 % BITSET_WORD_BITS == 0);
+	for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+	  if (dfa->nodes[node].opr.sbcset[i])
+	    return;
+	break;
+      default:
+	abort ();
+      }
+
+  if (mb_chars || has_period)
+    for (node = 0; node < dfa->nodes_len; ++node)
+      {
+	if (dfa->nodes[node].type == CHARACTER
+	    && dfa->nodes[node].opr.c >= 0x80)
+	  dfa->nodes[node].mb_partial = 0;
+	else if (dfa->nodes[node].type == OP_PERIOD)
+	  dfa->nodes[node].type = OP_UTF8_PERIOD;
+      }
+
+  /* The search can be in single byte locale.  */
+  dfa->mb_cur_max = 1;
+  dfa->is_utf8 = 0;
+  dfa->has_mb_node = dfa->nbackref > 0 || has_period;
+}
+#endif
+
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+   "eclosure", and "inveclosure".  */
+
+static reg_errcode_t
+analyze (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  reg_errcode_t ret;
+
+  /* Allocate arrays.  */
+  dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+  dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+  dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+  dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+  if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+	  || dfa->eclosures == NULL, 0))
+    return REG_ESPACE;
+
+  dfa->subexp_map = re_malloc (int, preg->re_nsub);
+  if (dfa->subexp_map != NULL)
+    {
+      int i;
+      for (i = 0; i < preg->re_nsub; i++)
+	dfa->subexp_map[i] = i;
+      preorder (dfa->str_tree, optimize_subexps, dfa);
+      for (i = 0; i < preg->re_nsub; i++)
+	if (dfa->subexp_map[i] != i)
+	  break;
+      if (i == preg->re_nsub)
+	{
+	  free (dfa->subexp_map);
+	  dfa->subexp_map = NULL;
+	}
+    }
+
+  ret = postorder (dfa->str_tree, lower_subexps, preg);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  ret = postorder (dfa->str_tree, calc_first, dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  preorder (dfa->str_tree, calc_next, dfa);
+  ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  ret = calc_eclosure (dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  /* We only need this during the prune_impossible_nodes pass in regexec.c;
+     skip it if p_i_n will not run, as calc_inveclosure can be quadratic.  */
+  if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
+      || dfa->nbackref)
+    {
+      dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+      if (BE (dfa->inveclosures == NULL, 0))
+	return REG_ESPACE;
+      ret = calc_inveclosure (dfa);
+    }
+
+  return ret;
+}
+
+/* Our parse trees are very unbalanced, so we cannot use a stack to
+   implement parse tree visits.  Instead, we use parent pointers and
+   some hairy code in these two functions.  */
+static reg_errcode_t
+postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+	   void *extra)
+{
+  bin_tree_t *node, *prev;
+
+  for (node = root; ; )
+    {
+      /* Descend down the tree, preferably to the left (or to the right
+	 if that's the only child).  */
+      while (node->left || node->right)
+	if (node->left)
+	  node = node->left;
+	else
+	  node = node->right;
+
+      do
+	{
+	  reg_errcode_t err = fn (extra, node);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	  if (node->parent == NULL)
+	    return REG_NOERROR;
+	  prev = node;
+	  node = node->parent;
+	}
+      /* Go up while we have a node that is reached from the right.  */
+      while (node->right == prev || node->right == NULL);
+      node = node->right;
+    }
+}
+
+static reg_errcode_t
+preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+	  void *extra)
+{
+  bin_tree_t *node;
+
+  for (node = root; ; )
+    {
+      reg_errcode_t err = fn (extra, node);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+
+      /* Go to the left node, or up and to the right.  */
+      if (node->left)
+	node = node->left;
+      else
+	{
+	  bin_tree_t *prev = NULL;
+	  while (node->right == prev || node->right == NULL)
+	    {
+	      prev = node;
+	      node = node->parent;
+	      if (!node)
+		return REG_NOERROR;
+	    }
+	  node = node->right;
+	}
+    }
+}
+
+/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
+   re_search_internal to map the inner one's opr.idx to this one's.  Adjust
+   backreferences as well.  Requires a preorder visit.  */
+static reg_errcode_t
+optimize_subexps (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+
+  if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+    {
+      int idx = node->token.opr.idx;
+      node->token.opr.idx = dfa->subexp_map[idx];
+      dfa->used_bkref_map |= 1 << node->token.opr.idx;
+    }
+
+  else if (node->token.type == SUBEXP
+	   && node->left && node->left->token.type == SUBEXP)
+    {
+      int other_idx = node->left->token.opr.idx;
+
+      node->left = node->left->left;
+      if (node->left)
+	node->left->parent = node;
+
+      dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
+      if (other_idx < BITSET_WORD_BITS)
+	  dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
+    }
+
+  return REG_NOERROR;
+}
+
+/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
+   of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */
+static reg_errcode_t
+lower_subexps (void *extra, bin_tree_t *node)
+{
+  regex_t *preg = (regex_t *) extra;
+  reg_errcode_t err = REG_NOERROR;
+
+  if (node->left && node->left->token.type == SUBEXP)
+    {
+      node->left = lower_subexp (&err, preg, node->left);
+      if (node->left)
+	node->left->parent = node;
+    }
+  if (node->right && node->right->token.type == SUBEXP)
+    {
+      node->right = lower_subexp (&err, preg, node->right);
+      if (node->right)
+	node->right->parent = node;
+    }
+
+  return err;
+}
+
+static bin_tree_t *
+lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *body = node->left;
+  bin_tree_t *op, *cls, *tree1, *tree;
+
+  if (preg->no_sub
+      /* We do not optimize empty subexpressions, because otherwise we may
+	 have bad CONCAT nodes with NULL children.  This is obviously not
+	 very common, so we do not lose much.  An example that triggers
+	 this case is the sed "script" /\(\)/x.  */
+      && node->left != NULL
+      && (node->token.opr.idx >= BITSET_WORD_BITS
+	  || !(dfa->used_bkref_map
+	       & ((bitset_word_t) 1 << node->token.opr.idx))))
+    return node->left;
+
+  /* Convert the SUBEXP node to the concatenation of an
+     OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP.  */
+  op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
+  cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
+  tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
+  tree = create_tree (dfa, op, tree1, CONCAT);
+  if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
+  op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
+  return tree;
+}
+
+/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
+   nodes.  Requires a postorder visit.  */
+static reg_errcode_t
+calc_first (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+  if (node->token.type == CONCAT)
+    {
+      node->first = node->left->first;
+      node->node_idx = node->left->node_idx;
+    }
+  else
+    {
+      node->first = node;
+      node->node_idx = re_dfa_add_node (dfa, node->token);
+      if (BE (node->node_idx == -1, 0))
+	return REG_ESPACE;
+      if (node->token.type == ANCHOR)
+	dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
+    }
+  return REG_NOERROR;
+}
+
+/* Pass 2: compute NEXT on the tree.  Preorder visit.  */
+static reg_errcode_t
+calc_next (void *extra, bin_tree_t *node)
+{
+  switch (node->token.type)
+    {
+    case OP_DUP_ASTERISK:
+      node->left->next = node;
+      break;
+    case CONCAT:
+      node->left->next = node->right->first;
+      node->right->next = node->next;
+      break;
+    default:
+      if (node->left)
+	node->left->next = node->next;
+      if (node->right)
+	node->right->next = node->next;
+      break;
+    }
+  return REG_NOERROR;
+}
+
+/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */
+static reg_errcode_t
+link_nfa_nodes (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+  int idx = node->node_idx;
+  reg_errcode_t err = REG_NOERROR;
+
+  switch (node->token.type)
+    {
+    case CONCAT:
+      break;
+
+    case END_OF_RE:
+      assert (node->next == NULL);
+      break;
+
+    case OP_DUP_ASTERISK:
+    case OP_ALT:
+      {
+	int left, right;
+	dfa->has_plural_match = 1;
+	if (node->left != NULL)
+	  left = node->left->first->node_idx;
+	else
+	  left = node->next->node_idx;
+	if (node->right != NULL)
+	  right = node->right->first->node_idx;
+	else
+	  right = node->next->node_idx;
+	assert (left > -1);
+	assert (right > -1);
+	err = re_node_set_init_2 (dfa->edests + idx, left, right);
+      }
+      break;
+
+    case ANCHOR:
+    case OP_OPEN_SUBEXP:
+    case OP_CLOSE_SUBEXP:
+      err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
+      break;
+
+    case OP_BACK_REF:
+      dfa->nexts[idx] = node->next->node_idx;
+      if (node->token.type == OP_BACK_REF)
+	err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
+      break;
+
+    default:
+      assert (!IS_EPSILON_NODE (node->token.type));
+      dfa->nexts[idx] = node->next->node_idx;
+      break;
+    }
+
+  return err;
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+   to their own constraint.  */
+
+static reg_errcode_t
+internal_function
+duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
+			int root_node, unsigned int init_constraint)
+{
+  int org_node, clone_node, ret;
+  unsigned int constraint = init_constraint;
+  for (org_node = top_org_node, clone_node = top_clone_node;;)
+    {
+      int org_dest, clone_dest;
+      if (dfa->nodes[org_node].type == OP_BACK_REF)
+	{
+	  /* If the back reference epsilon-transit, its destination must
+	     also have the constraint.  Then duplicate the epsilon closure
+	     of the destination of the back reference, and store it in
+	     edests of the back reference.  */
+	  org_dest = dfa->nexts[org_node];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  dfa->nexts[clone_node] = dfa->nexts[org_node];
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      else if (dfa->edests[org_node].nelem == 0)
+	{
+	  /* In case of the node can't epsilon-transit, don't duplicate the
+	     destination and store the original destination as the
+	     destination of the node.  */
+	  dfa->nexts[clone_node] = dfa->nexts[org_node];
+	  break;
+	}
+      else if (dfa->edests[org_node].nelem == 1)
+	{
+	  /* In case of the node can epsilon-transit, and it has only one
+	     destination.  */
+	  org_dest = dfa->edests[org_node].elems[0];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  /* If the node is root_node itself, it means the epsilon clsoure
+	     has a loop.   Then tie it to the destination of the root_node.  */
+	  if (org_node == root_node && clone_node != org_node)
+	    {
+	      ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	      break;
+	    }
+	  /* In case of the node has another constraint, add it.  */
+	  constraint |= dfa->nodes[org_node].constraint;
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      else /* dfa->edests[org_node].nelem == 2 */
+	{
+	  /* In case of the node can epsilon-transit, and it has two
+	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */
+	  org_dest = dfa->edests[org_node].elems[0];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  /* Search for a duplicated node which satisfies the constraint.  */
+	  clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+	  if (clone_dest == -1)
+	    {
+	      /* There is no such duplicated node, create a new one.  */
+	      reg_errcode_t err;
+	      clone_dest = duplicate_node (dfa, org_dest, constraint);
+	      if (BE (clone_dest == -1, 0))
+		return REG_ESPACE;
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	      err = duplicate_node_closure (dfa, org_dest, clone_dest,
+					    root_node, constraint);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	  else
+	    {
+	      /* There is a duplicated node which satisfies the constraint,
+		 use it to avoid infinite loop.  */
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	    }
+
+	  org_dest = dfa->edests[org_node].elems[1];
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      org_node = org_dest;
+      clone_node = clone_dest;
+    }
+  return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+   satisfies the constraint CONSTRAINT.  */
+
+static int
+search_duplicated_node (const re_dfa_t *dfa, int org_node,
+			unsigned int constraint)
+{
+  int idx;
+  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+    {
+      if (org_node == dfa->org_indices[idx]
+	  && constraint == dfa->nodes[idx].constraint)
+	return idx; /* Found.  */
+    }
+  return -1; /* Not found.  */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+   Return the index of the new node, or -1 if insufficient storage is
+   available.  */
+
+static int
+duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
+{
+  int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+  if (BE (dup_idx != -1, 1))
+    {
+      dfa->nodes[dup_idx].constraint = constraint;
+      dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
+      dfa->nodes[dup_idx].duplicated = 1;
+
+      /* Store the index of the original node.  */
+      dfa->org_indices[dup_idx] = org_idx;
+    }
+  return dup_idx;
+}
+
+static reg_errcode_t
+calc_inveclosure (re_dfa_t *dfa)
+{
+  int src, idx, ret;
+  for (idx = 0; idx < dfa->nodes_len; ++idx)
+    re_node_set_init_empty (dfa->inveclosures + idx);
+
+  for (src = 0; src < dfa->nodes_len; ++src)
+    {
+      int *elems = dfa->eclosures[src].elems;
+      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+	{
+	  ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
+	  if (BE (ret == -1, 0))
+	    return REG_ESPACE;
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Calculate "eclosure" for all the node in DFA.  */
+
+static reg_errcode_t
+calc_eclosure (re_dfa_t *dfa)
+{
+  int node_idx, incomplete;
+#ifdef DEBUG
+  assert (dfa->nodes_len > 0);
+#endif
+  incomplete = 0;
+  /* For each nodes, calculate epsilon closure.  */
+  for (node_idx = 0; ; ++node_idx)
+    {
+      reg_errcode_t err;
+      re_node_set eclosure_elem;
+      if (node_idx == dfa->nodes_len)
+	{
+	  if (!incomplete)
+	    break;
+	  incomplete = 0;
+	  node_idx = 0;
+	}
+
+#ifdef DEBUG
+      assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+
+      /* If we have already calculated, skip it.  */
+      if (dfa->eclosures[node_idx].nelem != 0)
+	continue;
+      /* Calculate epsilon closure of `node_idx'.  */
+      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+
+      if (dfa->eclosures[node_idx].nelem == 0)
+	{
+	  incomplete = 1;
+	  re_node_set_free (&eclosure_elem);
+	}
+    }
+  return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE.  */
+
+static reg_errcode_t
+calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
+{
+  reg_errcode_t err;
+  int i;
+  re_node_set eclosure;
+  int ret;
+  int incomplete = 0;
+  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* This indicates that we are calculating this node now.
+     We reference this value to avoid infinite loop.  */
+  dfa->eclosures[node].nelem = -1;
+
+  /* If the current node has constraints, duplicate all nodes
+     since they must inherit the constraints.  */
+  if (dfa->nodes[node].constraint
+      && dfa->edests[node].nelem
+      && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+    {
+      err = duplicate_node_closure (dfa, node, node, node,
+				    dfa->nodes[node].constraint);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  /* Expand each epsilon destination nodes.  */
+  if (IS_EPSILON_NODE(dfa->nodes[node].type))
+    for (i = 0; i < dfa->edests[node].nelem; ++i)
+      {
+	re_node_set eclosure_elem;
+	int edest = dfa->edests[node].elems[i];
+	/* If calculating the epsilon closure of `edest' is in progress,
+	   return intermediate result.  */
+	if (dfa->eclosures[edest].nelem == -1)
+	  {
+	    incomplete = 1;
+	    continue;
+	  }
+	/* If we haven't calculated the epsilon closure of `edest' yet,
+	   calculate now. Otherwise use calculated epsilon closure.  */
+	if (dfa->eclosures[edest].nelem == 0)
+	  {
+	    err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+	    if (BE (err != REG_NOERROR, 0))
+	      return err;
+	  }
+	else
+	  eclosure_elem = dfa->eclosures[edest];
+	/* Merge the epsilon closure of `edest'.  */
+	err = re_node_set_merge (&eclosure, &eclosure_elem);
+	if (BE (err != REG_NOERROR, 0))
+	  return err;
+	/* If the epsilon closure of `edest' is incomplete,
+	   the epsilon closure of this node is also incomplete.  */
+	if (dfa->eclosures[edest].nelem == 0)
+	  {
+	    incomplete = 1;
+	    re_node_set_free (&eclosure_elem);
+	  }
+      }
+
+  /* An epsilon closure includes itself.  */
+  ret = re_node_set_insert (&eclosure, node);
+  if (BE (ret < 0, 0))
+    return REG_ESPACE;
+  if (incomplete && !root)
+    dfa->eclosures[node].nelem = 0;
+  else
+    dfa->eclosures[node] = eclosure;
+  *new_set = eclosure;
+  return REG_NOERROR;
+}
+
+/* Functions for token which are used in the parser.  */
+
+/* Fetch a token from INPUT.
+   We must not use this function inside bracket expressions.  */
+
+static void
+internal_function
+fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
+{
+  re_string_skip_bytes (input, peek_token (result, input, syntax));
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function inside bracket expressions.  */
+
+static int
+internal_function
+peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+  unsigned char c;
+
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+  token->word_char = 0;
+#ifdef RE_ENABLE_I18N
+  token->mb_partial = 0;
+  if (input->mb_cur_max > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      token->mb_partial = 1;
+      return 1;
+    }
+#endif
+  if (c == '\\')
+    {
+      unsigned char c2;
+      if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+	{
+	  token->type = BACK_SLASH;
+	  return 1;
+	}
+
+      c2 = re_string_peek_byte_case (input, 1);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+      if (input->mb_cur_max > 1)
+	{
+	  wint_t wc = re_string_wchar_at (input,
+					  re_string_cur_idx (input) + 1);
+	  token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+	}
+      else
+#endif
+	token->word_char = IS_WORD_CHAR (c2) != 0;
+
+      switch (c2)
+	{
+	case '|':
+	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+	    token->type = OP_ALT;
+	  break;
+	case '1': case '2': case '3': case '4': case '5':
+	case '6': case '7': case '8': case '9':
+	  if (!(syntax & RE_NO_BK_REFS))
+	    {
+	      token->type = OP_BACK_REF;
+	      token->opr.idx = c2 - '1';
+	    }
+	  break;
+	case '<':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_FIRST;
+	    }
+	  break;
+	case '>':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_LAST;
+	    }
+	  break;
+	case 'b':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_DELIM;
+	    }
+	  break;
+	case 'B':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = NOT_WORD_DELIM;
+	    }
+	  break;
+	case 'w':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_WORD;
+	  break;
+	case 'W':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_NOTWORD;
+	  break;
+	case 's':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_SPACE;
+	  break;
+	case 'S':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_NOTSPACE;
+	  break;
+	case '`':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = BUF_FIRST;
+	    }
+	  break;
+	case '\'':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = BUF_LAST;
+	    }
+	  break;
+	case '(':
+	  if (!(syntax & RE_NO_BK_PARENS))
+	    token->type = OP_OPEN_SUBEXP;
+	  break;
+	case ')':
+	  if (!(syntax & RE_NO_BK_PARENS))
+	    token->type = OP_CLOSE_SUBEXP;
+	  break;
+	case '+':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_PLUS;
+	  break;
+	case '?':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_QUESTION;
+	  break;
+	case '{':
+	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+	    token->type = OP_OPEN_DUP_NUM;
+	  break;
+	case '}':
+	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+	    token->type = OP_CLOSE_DUP_NUM;
+	  break;
+	default:
+	  break;
+	}
+      return 2;
+    }
+
+  token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1)
+    {
+      wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
+      token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+    }
+  else
+#endif
+    token->word_char = IS_WORD_CHAR (token->opr.c);
+
+  switch (c)
+    {
+    case '\n':
+      if (syntax & RE_NEWLINE_ALT)
+	token->type = OP_ALT;
+      break;
+    case '|':
+      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+	token->type = OP_ALT;
+      break;
+    case '*':
+      token->type = OP_DUP_ASTERISK;
+      break;
+    case '+':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	token->type = OP_DUP_PLUS;
+      break;
+    case '?':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	token->type = OP_DUP_QUESTION;
+      break;
+    case '{':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	token->type = OP_OPEN_DUP_NUM;
+      break;
+    case '}':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	token->type = OP_CLOSE_DUP_NUM;
+      break;
+    case '(':
+      if (syntax & RE_NO_BK_PARENS)
+	token->type = OP_OPEN_SUBEXP;
+      break;
+    case ')':
+      if (syntax & RE_NO_BK_PARENS)
+	token->type = OP_CLOSE_SUBEXP;
+      break;
+    case '[':
+      token->type = OP_OPEN_BRACKET;
+      break;
+    case '.':
+      token->type = OP_PERIOD;
+      break;
+    case '^':
+      if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+	  re_string_cur_idx (input) != 0)
+	{
+	  char prev = re_string_peek_byte (input, -1);
+	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+	    break;
+	}
+      token->type = ANCHOR;
+      token->opr.ctx_type = LINE_FIRST;
+      break;
+    case '$':
+      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+	  re_string_cur_idx (input) + 1 != re_string_length (input))
+	{
+	  re_token_t next;
+	  re_string_skip_bytes (input, 1);
+	  peek_token (&next, input, syntax);
+	  re_string_skip_bytes (input, -1);
+	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+	    break;
+	}
+      token->type = ANCHOR;
+      token->opr.ctx_type = LINE_LAST;
+      break;
+    default:
+      break;
+    }
+  return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function out of bracket expressions.  */
+
+static int
+internal_function
+peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+  unsigned char c;
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      return 1;
+    }
+#endif /* RE_ENABLE_I18N */
+
+  if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
+      && re_string_cur_idx (input) + 1 < re_string_length (input))
+    {
+      /* In this case, '\' escape a character.  */
+      unsigned char c2;
+      re_string_skip_bytes (input, 1);
+      c2 = re_string_peek_byte (input, 0);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+      return 1;
+    }
+  if (c == '[') /* '[' is a special char in a bracket exps.  */
+    {
+      unsigned char c2;
+      int token_len;
+      if (re_string_cur_idx (input) + 1 < re_string_length (input))
+	c2 = re_string_peek_byte (input, 1);
+      else
+	c2 = 0;
+      token->opr.c = c2;
+      token_len = 2;
+      switch (c2)
+	{
+	case '.':
+	  token->type = OP_OPEN_COLL_ELEM;
+	  break;
+	case '=':
+	  token->type = OP_OPEN_EQUIV_CLASS;
+	  break;
+	case ':':
+	  if (syntax & RE_CHAR_CLASSES)
+	    {
+	      token->type = OP_OPEN_CHAR_CLASS;
+	      break;
+	    }
+	  /* else fall through.  */
+	default:
+	  token->type = CHARACTER;
+	  token->opr.c = c;
+	  token_len = 1;
+	  break;
+	}
+      return token_len;
+    }
+  switch (c)
+    {
+    case '-':
+      token->type = OP_CHARSET_RANGE;
+      break;
+    case ']':
+      token->type = OP_CLOSE_BRACKET;
+      break;
+    case '^':
+      token->type = OP_NON_MATCH_LIST;
+      break;
+    default:
+      token->type = CHARACTER;
+    }
+  return 1;
+}
+
+/* Functions for parser.  */
+
+/* Entry point of the parser.
+   Parse the regular expression REGEXP and return the structure tree.
+   If an error is occured, ERR is set by error code, and return NULL.
+   This function build the following tree, from regular expression <reg_exp>:
+	   CAT
+	   / \
+	  /   \
+   <reg_exp>  EOR
+
+   CAT means concatenation.
+   EOR means end of regular expression.  */
+
+static bin_tree_t *
+parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
+       reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *eor, *root;
+  re_token_t current_token;
+  dfa->syntax = syntax;
+  fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+  tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+  eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+  if (tree != NULL)
+    root = create_tree (dfa, tree, eor, CONCAT);
+  else
+    root = eor;
+  if (BE (eor == NULL || root == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  return root;
+}
+
+/* This function build the following tree, from regular expression
+   <branch1>|<branch2>:
+	   ALT
+	   / \
+	  /   \
+   <branch1> <branch2>
+
+   ALT means alternative, which represents the operator `|'.  */
+
+static bin_tree_t *
+parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	       reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *branch = NULL;
+  tree = parse_branch (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type == OP_ALT)
+    {
+      fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+      if (token->type != OP_ALT && token->type != END_OF_RE
+	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+	{
+	  branch = parse_branch (regexp, preg, token, syntax, nest, err);
+	  if (BE (*err != REG_NOERROR && branch == NULL, 0))
+	    return NULL;
+	}
+      else
+	branch = NULL;
+      tree = create_tree (dfa, tree, branch, OP_ALT);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   <exp1><exp2>:
+	CAT
+	/ \
+       /   \
+   <exp1> <exp2>
+
+   CAT means concatenation.  */
+
+static bin_tree_t *
+parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	      reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  bin_tree_t *tree, *exp;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  tree = parse_expression (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type != OP_ALT && token->type != END_OF_RE
+	 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+    {
+      exp = parse_expression (regexp, preg, token, syntax, nest, err);
+      if (BE (*err != REG_NOERROR && exp == NULL, 0))
+	{
+	  return NULL;
+	}
+      if (tree != NULL && exp != NULL)
+	{
+	  tree = create_tree (dfa, tree, exp, CONCAT);
+	  if (tree == NULL)
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      else if (tree == NULL)
+	tree = exp;
+      /* Otherwise exp == NULL, we don't need to create new tree.  */
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+	 *
+	 |
+	 a
+*/
+
+static bin_tree_t *
+parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
+		  reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  switch (token->type)
+    {
+    case CHARACTER:
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	{
+	  while (!re_string_eoi (regexp)
+		 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+	    {
+	      bin_tree_t *mbc_remain;
+	      fetch_token (token, regexp, syntax);
+	      mbc_remain = create_token_tree (dfa, NULL, NULL, token);
+	      tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+	      if (BE (mbc_remain == NULL || tree == NULL, 0))
+		{
+		  *err = REG_ESPACE;
+		  return NULL;
+		}
+	    }
+	}
+#endif
+      break;
+    case OP_OPEN_SUBEXP:
+      tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_OPEN_BRACKET:
+      tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_BACK_REF:
+      if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
+	{
+	  *err = REG_ESUBREG;
+	  return NULL;
+	}
+      dfa->used_bkref_map |= 1 << token->opr.idx;
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      ++dfa->nbackref;
+      dfa->has_mb_node = 1;
+      break;
+    case OP_OPEN_DUP_NUM:
+      if (syntax & RE_CONTEXT_INVALID_DUP)
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+      /* FALLTHROUGH */
+    case OP_DUP_ASTERISK:
+    case OP_DUP_PLUS:
+    case OP_DUP_QUESTION:
+      if (syntax & RE_CONTEXT_INVALID_OPS)
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+      else if (syntax & RE_CONTEXT_INDEP_OPS)
+	{
+	  fetch_token (token, regexp, syntax);
+	  return parse_expression (regexp, preg, token, syntax, nest, err);
+	}
+      /* else fall through  */
+    case OP_CLOSE_SUBEXP:
+      if ((token->type == OP_CLOSE_SUBEXP) &&
+	  !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+	{
+	  *err = REG_ERPAREN;
+	  return NULL;
+	}
+      /* else fall through  */
+    case OP_CLOSE_DUP_NUM:
+      /* We treat it as a normal character.  */
+
+      /* Then we can these characters as normal characters.  */
+      token->type = CHARACTER;
+      /* mb_partial and word_char bits should be initialized already
+	 by peek_token.  */
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      break;
+    case ANCHOR:
+      if ((token->opr.ctx_type
+	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+	  && dfa->word_ops_used == 0)
+	init_word_char (dfa);
+      if (token->opr.ctx_type == WORD_DELIM
+	  || token->opr.ctx_type == NOT_WORD_DELIM)
+	{
+	  bin_tree_t *tree_first, *tree_last;
+	  if (token->opr.ctx_type == WORD_DELIM)
+	    {
+	      token->opr.ctx_type = WORD_FIRST;
+	      tree_first = create_token_tree (dfa, NULL, NULL, token);
+	      token->opr.ctx_type = WORD_LAST;
+	    }
+	  else
+	    {
+	      token->opr.ctx_type = INSIDE_WORD;
+	      tree_first = create_token_tree (dfa, NULL, NULL, token);
+	      token->opr.ctx_type = INSIDE_NOTWORD;
+	    }
+	  tree_last = create_token_tree (dfa, NULL, NULL, token);
+	  tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      else
+	{
+	  tree = create_token_tree (dfa, NULL, NULL, token);
+	  if (BE (tree == NULL, 0))
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      /* We must return here, since ANCHORs can't be followed
+	 by repetition operators.
+	 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+	     it must not be "<ANCHOR(^)><REPEAT(*)>".  */
+      fetch_token (token, regexp, syntax);
+      return tree;
+    case OP_PERIOD:
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      if (dfa->mb_cur_max > 1)
+	dfa->has_mb_node = 1;
+      break;
+    case OP_WORD:
+    case OP_NOTWORD:
+      tree = build_charclass_op (dfa, regexp->trans,
+				 "alnum",
+				 "_",
+				 token->type == OP_NOTWORD, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_SPACE:
+    case OP_NOTSPACE:
+      tree = build_charclass_op (dfa, regexp->trans,
+				 "space",
+				 "",
+				 token->type == OP_NOTSPACE, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_ALT:
+    case END_OF_RE:
+      return NULL;
+    case BACK_SLASH:
+      *err = REG_EESCAPE;
+      return NULL;
+    default:
+      /* Must not happen?  */
+#ifdef DEBUG
+      assert (0);
+#endif
+      return NULL;
+    }
+  fetch_token (token, regexp, syntax);
+
+  while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+	 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+    {
+      tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      /* In BRE consecutive duplications are not allowed.  */
+      if ((syntax & RE_CONTEXT_INVALID_DUP)
+	  && (token->type == OP_DUP_ASTERISK
+	      || token->type == OP_OPEN_DUP_NUM))
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+    }
+
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   (<reg_exp>):
+	 SUBEXP
+	    |
+	<reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	       reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  size_t cur_nsub;
+  cur_nsub = preg->re_nsub++;
+
+  fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+
+  /* The subexpression may be a null string.  */
+  if (token->type == OP_CLOSE_SUBEXP)
+    tree = NULL;
+  else
+    {
+      tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+      if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
+	*err = REG_EPAREN;
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+    }
+
+  if (cur_nsub <= '9' - '1')
+    dfa->completed_bkref_map |= 1 << cur_nsub;
+
+  tree = create_tree (dfa, tree, NULL, SUBEXP);
+  if (BE (tree == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  tree->token.opr.idx = cur_nsub;
+  return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
+
+static bin_tree_t *
+parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
+	      re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
+{
+  bin_tree_t *tree = NULL, *old_tree = NULL;
+  int i, start, end, start_idx = re_string_cur_idx (regexp);
+#ifndef RE_TOKEN_INIT_BUG
+  re_token_t start_token = *token;
+#else
+  re_token_t start_token;
+
+  memcpy ((void *) &start_token, (void *) token, sizeof start_token);
+#endif
+
+  if (token->type == OP_OPEN_DUP_NUM)
+    {
+      end = 0;
+      start = fetch_number (regexp, token, syntax);
+      if (start == -1)
+	{
+	  if (token->type == CHARACTER && token->opr.c == ',')
+	    start = 0; /* We treat "{,m}" as "{0,m}".  */
+	  else
+	    {
+	      *err = REG_BADBR; /* <re>{} is invalid.  */
+	      return NULL;
+	    }
+	}
+      if (BE (start != -2, 1))
+	{
+	  /* We treat "{n}" as "{n,n}".  */
+	  end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+		 : ((token->type == CHARACTER && token->opr.c == ',')
+		    ? fetch_number (regexp, token, syntax) : -2));
+	}
+      if (BE (start == -2 || end == -2, 0))
+	{
+	  /* Invalid sequence.  */
+	  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+	    {
+	      if (token->type == END_OF_RE)
+		*err = REG_EBRACE;
+	      else
+		*err = REG_BADBR;
+
+	      return NULL;
+	    }
+
+	  /* If the syntax bit is set, rollback.  */
+	  re_string_set_index (regexp, start_idx);
+	  *token = start_token;
+	  token->type = CHARACTER;
+	  /* mb_partial and word_char bits should be already initialized by
+	     peek_token.  */
+	  return elem;
+	}
+
+      if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0))
+	{
+	  /* First number greater than second.  */
+	  *err = REG_BADBR;
+	  return NULL;
+	}
+    }
+  else
+    {
+      start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+      end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
+    }
+
+  fetch_token (token, regexp, syntax);
+
+  if (BE (elem == NULL, 0))
+    return NULL;
+  if (BE (start == 0 && end == 0, 0))
+    {
+      postorder (elem, free_tree, NULL);
+      return NULL;
+    }
+
+  /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
+  if (BE (start > 0, 0))
+    {
+      tree = elem;
+      for (i = 2; i <= start; ++i)
+	{
+	  elem = duplicate_tree (elem, dfa);
+	  tree = create_tree (dfa, tree, elem, CONCAT);
+	  if (BE (elem == NULL || tree == NULL, 0))
+	    goto parse_dup_op_espace;
+	}
+
+      if (start == end)
+	return tree;
+
+      /* Duplicate ELEM before it is marked optional.  */
+      elem = duplicate_tree (elem, dfa);
+      old_tree = tree;
+    }
+  else
+    old_tree = NULL;
+
+  if (elem->token.type == SUBEXP)
+    postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
+
+  tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
+  if (BE (tree == NULL, 0))
+    goto parse_dup_op_espace;
+
+  /* This loop is actually executed only when end != -1,
+     to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?...  We have
+     already created the start+1-th copy.  */
+  for (i = start + 2; i <= end; ++i)
+    {
+      elem = duplicate_tree (elem, dfa);
+      tree = create_tree (dfa, tree, elem, CONCAT);
+      if (BE (elem == NULL || tree == NULL, 0))
+	goto parse_dup_op_espace;
+
+      tree = create_tree (dfa, tree, NULL, OP_ALT);
+      if (BE (tree == NULL, 0))
+	goto parse_dup_op_espace;
+    }
+
+  if (old_tree)
+    tree = create_tree (dfa, old_tree, tree, CONCAT);
+
+  return tree;
+
+ parse_dup_op_espace:
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+   I'm not sure, but maybe enough.  */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+		 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
+# else /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
+		 bracket_elem_t *end_elem)
+# endif /* not RE_ENABLE_I18N */
+{
+  unsigned int start_ch, end_ch;
+  /* Equivalence Classes and Character Classes can't be a range start/end.  */
+  if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+	  || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+	  0))
+    return REG_ERANGE;
+
+  /* We can handle no multi character collating elements without libc
+     support.  */
+  if (BE ((start_elem->type == COLL_SYM
+	   && strlen ((char *) start_elem->opr.name) > 1)
+	  || (end_elem->type == COLL_SYM
+	      && strlen ((char *) end_elem->opr.name) > 1), 0))
+    return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+  {
+    wchar_t wc;
+    wint_t start_wc;
+    wint_t end_wc;
+    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+    start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+		   : 0));
+    end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+		 : 0));
+#ifdef GAWK
+    /*
+     * Fedora Core 2, maybe others, have broken `btowc' that returns -1
+     * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are
+     * unsigned, so we don't have sign extension problems.
+     */
+    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+		? start_ch : start_elem->opr.wch);
+    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+	      ? end_ch : end_elem->opr.wch);
+#else
+    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+		? __btowc (start_ch) : start_elem->opr.wch);
+    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+	      ? __btowc (end_ch) : end_elem->opr.wch);
+#endif
+    if (start_wc == WEOF || end_wc == WEOF)
+      return REG_ECOLLATE;
+    cmp_buf[0] = start_wc;
+    cmp_buf[4] = end_wc;
+    if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+      return REG_ERANGE;
+
+    /* Got valid collation sequence values, add them as a new entry.
+       However, for !_LIBC we have no collation elements: if the
+       character set is single byte, the single byte character set
+       that we build below suffices.  parse_bracket_exp passes
+       no MBCSET if dfa->mb_cur_max == 1.  */
+    if (mbcset)
+      {
+	/* Check the space of the arrays.  */
+	if (BE (*range_alloc == mbcset->nranges, 0))
+	  {
+	    /* There is not enough space, need realloc.  */
+	    wchar_t *new_array_start, *new_array_end;
+	    int new_nranges;
+
+	    /* +1 in case of mbcset->nranges is 0.  */
+	    new_nranges = 2 * mbcset->nranges + 1;
+	    /* Use realloc since mbcset->range_starts and mbcset->range_ends
+	       are NULL if *range_alloc == 0.  */
+	    new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+					  new_nranges);
+	    new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+					new_nranges);
+
+	    if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+	      return REG_ESPACE;
+
+	    mbcset->range_starts = new_array_start;
+	    mbcset->range_ends = new_array_end;
+	    *range_alloc = new_nranges;
+	  }
+
+	mbcset->range_starts[mbcset->nranges] = start_wc;
+	mbcset->range_ends[mbcset->nranges++] = end_wc;
+      }
+
+    /* Build the table for single byte characters.  */
+    for (wc = 0; wc < SBC_MAX; ++wc)
+      {
+	cmp_buf[2] = wc;
+	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+	    && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+	  bitset_set (sbcset, wc);
+      }
+  }
+# else /* not RE_ENABLE_I18N */
+  {
+    unsigned int ch;
+    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+		   : 0));
+    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+		 : 0));
+    if (start_ch > end_ch)
+      return REG_ERANGE;
+    /* Build the table for single byte characters.  */
+    for (ch = 0; ch < SBC_MAX; ++ch)
+      if (start_ch <= ch  && ch <= end_ch)
+	bitset_set (sbcset, ch);
+  }
+# endif /* not RE_ENABLE_I18N */
+  return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+   Build the collating element which is represented by NAME.
+   The result are written to MBCSET and SBCSET.
+   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+   pointer argument since we may update it.  */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+			int *coll_sym_alloc, const unsigned char *name)
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (bitset_t sbcset, const unsigned char *name)
+# endif /* not RE_ENABLE_I18N */
+{
+  size_t name_len = strlen ((const char *) name);
+  if (BE (name_len != 1, 0))
+    return REG_ECOLLATE;
+  else
+    {
+      bitset_set (sbcset, name[0]);
+      return REG_NOERROR;
+    }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+   "[[.a-a.]]" etc.  */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+		   reg_syntax_t syntax, reg_errcode_t *err)
+{
+#ifdef _LIBC
+  const unsigned char *collseqmb;
+  const char *collseqwc;
+  uint32_t nrules;
+  int32_t table_size;
+  const int32_t *symb_table;
+  const unsigned char *extra;
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Seek the collating symbol entry correspondings to NAME.
+     Return the index of the symbol in the SYMB_TABLE.  */
+
+  auto inline int32_t
+  __attribute ((always_inline))
+  seek_collating_symbol_entry (name, name_len)
+	 const unsigned char *name;
+	 size_t name_len;
+    {
+      int32_t hash = elem_hash ((const char *) name, name_len);
+      int32_t elem = hash % table_size;
+      if (symb_table[2 * elem] != 0)
+	{
+	  int32_t second = hash % (table_size - 2) + 1;
+
+	  do
+	    {
+	      /* First compare the hashing value.  */
+	      if (symb_table[2 * elem] == hash
+		  /* Compare the length of the name.  */
+		  && name_len == extra[symb_table[2 * elem + 1]]
+		  /* Compare the name.  */
+		  && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+			     name_len) == 0)
+		{
+		  /* Yep, this is the entry.  */
+		  break;
+		}
+
+	      /* Next entry.  */
+	      elem += second;
+	    }
+	  while (symb_table[2 * elem] != 0);
+	}
+      return elem;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environment.
+     Look up the collation sequence value of BR_ELEM.
+     Return the value if succeeded, UINT_MAX otherwise.  */
+
+  auto inline unsigned int
+  __attribute ((always_inline))
+  lookup_collation_sequence_value (br_elem)
+	 bracket_elem_t *br_elem;
+    {
+      if (br_elem->type == SB_CHAR)
+	{
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    return collseqmb[br_elem->opr.ch];
+	  else
+	    {
+	      wint_t wc = __btowc (br_elem->opr.ch);
+	      return __collseq_table_lookup (collseqwc, wc);
+	    }
+	}
+      else if (br_elem->type == MB_CHAR)
+	{
+	  if (nrules != 0)
+	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+	}
+      else if (br_elem->type == COLL_SYM)
+	{
+	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+	  if (nrules != 0)
+	    {
+	      int32_t elem, idx;
+	      elem = seek_collating_symbol_entry (br_elem->opr.name,
+						  sym_name_len);
+	      if (symb_table[2 * elem] != 0)
+		{
+		  /* We found the entry.  */
+		  idx = symb_table[2 * elem + 1];
+		  /* Skip the name of collating element name.  */
+		  idx += 1 + extra[idx];
+		  /* Skip the byte sequence of the collating element.  */
+		  idx += 1 + extra[idx];
+		  /* Adjust for the alignment.  */
+		  idx = (idx + 3) & ~3;
+		  /* Skip the multibyte collation sequence value.  */
+		  idx += sizeof (unsigned int);
+		  /* Skip the wide char sequence of the collating element.  */
+		  idx += sizeof (unsigned int) *
+		    (1 + *(unsigned int *) (extra + idx));
+		  /* Return the collation sequence value.  */
+		  return *(unsigned int *) (extra + idx);
+		}
+	      else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+		{
+		  /* No valid character.  Match it as a single byte
+		     character.  */
+		  return collseqmb[br_elem->opr.name[0]];
+		}
+	    }
+	  else if (sym_name_len == 1)
+	    return collseqmb[br_elem->opr.name[0]];
+	}
+      return UINT_MAX;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+  auto inline reg_errcode_t
+  __attribute ((always_inline))
+  build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+	 re_charset_t *mbcset;
+	 int *range_alloc;
+	 bitset_t sbcset;
+	 bracket_elem_t *start_elem, *end_elem;
+    {
+      unsigned int ch;
+      uint32_t start_collseq;
+      uint32_t end_collseq;
+
+      /* Equivalence Classes and Character Classes can't be a range
+	 start/end.  */
+      if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+	      || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+	      0))
+	return REG_ERANGE;
+
+      start_collseq = lookup_collation_sequence_value (start_elem);
+      end_collseq = lookup_collation_sequence_value (end_elem);
+      /* Check start/end collation sequence values.  */
+      if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+	return REG_ECOLLATE;
+      if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+	return REG_ERANGE;
+
+      /* Got valid collation sequence values, add them as a new entry.
+	 However, if we have no collation elements, and the character set
+	 is single byte, the single byte character set that we
+	 build below suffices. */
+      if (nrules > 0 || dfa->mb_cur_max > 1)
+	{
+	  /* Check the space of the arrays.  */
+	  if (BE (*range_alloc == mbcset->nranges, 0))
+	    {
+	      /* There is not enough space, need realloc.  */
+	      uint32_t *new_array_start;
+	      uint32_t *new_array_end;
+	      int new_nranges;
+
+	      /* +1 in case of mbcset->nranges is 0.  */
+	      new_nranges = 2 * mbcset->nranges + 1;
+	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+					    new_nranges);
+	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+					  new_nranges);
+
+	      if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+		return REG_ESPACE;
+
+	      mbcset->range_starts = new_array_start;
+	      mbcset->range_ends = new_array_end;
+	      *range_alloc = new_nranges;
+	    }
+
+	  mbcset->range_starts[mbcset->nranges] = start_collseq;
+	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
+	}
+
+      /* Build the table for single byte characters.  */
+      for (ch = 0; ch < SBC_MAX; ch++)
+	{
+	  uint32_t ch_collseq;
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    ch_collseq = collseqmb[ch];
+	  else
+	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+	    bitset_set (sbcset, ch);
+	}
+      return REG_NOERROR;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the collating element which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+     pointer argument sinse we may update it.  */
+
+  auto inline reg_errcode_t
+  __attribute ((always_inline))
+  build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+	 re_charset_t *mbcset;
+	 int *coll_sym_alloc;
+	 bitset_t sbcset;
+	 const unsigned char *name;
+    {
+      int32_t elem, idx;
+      size_t name_len = strlen ((const char *) name);
+      if (nrules != 0)
+	{
+	  elem = seek_collating_symbol_entry (name, name_len);
+	  if (symb_table[2 * elem] != 0)
+	    {
+	      /* We found the entry.  */
+	      idx = symb_table[2 * elem + 1];
+	      /* Skip the name of collating element name.  */
+	      idx += 1 + extra[idx];
+	    }
+	  else if (symb_table[2 * elem] == 0 && name_len == 1)
+	    {
+	      /* No valid character, treat it as a normal
+		 character.  */
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	  else
+	    return REG_ECOLLATE;
+
+	  /* Got valid collation sequence, add it as a new entry.  */
+	  /* Check the space of the arrays.  */
+	  if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
+	    {
+	      /* Not enough, realloc it.  */
+	      /* +1 in case of mbcset->ncoll_syms is 0.  */
+	      int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+	      /* Use realloc since mbcset->coll_syms is NULL
+		 if *alloc == 0.  */
+	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+						   new_coll_sym_alloc);
+	      if (BE (new_coll_syms == NULL, 0))
+		return REG_ESPACE;
+	      mbcset->coll_syms = new_coll_syms;
+	      *coll_sym_alloc = new_coll_sym_alloc;
+	    }
+	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+	  return REG_NOERROR;
+	}
+      else
+	{
+	  if (BE (name_len != 1, 0))
+	    return REG_ECOLLATE;
+	  else
+	    {
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	}
+    }
+#endif
+
+  re_token_t br_token;
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+  int equiv_class_alloc = 0, char_class_alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+  int non_match = 0;
+  bin_tree_t *work_tree;
+  int token_len;
+  int first_round = 1;
+#ifdef _LIBC
+  collseqmb = (const unsigned char *)
+    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules)
+    {
+      /*
+      if (MB_CUR_MAX > 1)
+      */
+      collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						  _NL_COLLATE_SYMB_TABLEMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+						   _NL_COLLATE_SYMB_EXTRAMB);
+    }
+#endif
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+  if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  token_len = peek_token_bracket (token, regexp, syntax);
+  if (BE (token->type == END_OF_RE, 0))
+    {
+      *err = REG_BADPAT;
+      goto parse_bracket_exp_free_return;
+    }
+  if (token->type == OP_NON_MATCH_LIST)
+    {
+#ifdef RE_ENABLE_I18N
+      mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+      non_match = 1;
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+	bitset_set (sbcset, '\n');
+      re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+      if (BE (token->type == END_OF_RE, 0))
+	{
+	  *err = REG_BADPAT;
+	  goto parse_bracket_exp_free_return;
+	}
+    }
+
+  /* We treat the first ']' as a normal character.  */
+  if (token->type == OP_CLOSE_BRACKET)
+    token->type = CHARACTER;
+
+  while (1)
+    {
+      bracket_elem_t start_elem, end_elem;
+      unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+      unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+      reg_errcode_t ret;
+      int token_len2 = 0, is_range_exp = 0;
+      re_token_t token2;
+
+      start_elem.opr.name = start_name_buf;
+      ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+				   syntax, first_round);
+      if (BE (ret != REG_NOERROR, 0))
+	{
+	  *err = ret;
+	  goto parse_bracket_exp_free_return;
+	}
+      first_round = 0;
+
+      /* Get information about the next token.  We need it in any case.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+
+      /* Do not check for ranges if we know they are not allowed.  */
+      if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
+	{
+	  if (BE (token->type == END_OF_RE, 0))
+	    {
+	      *err = REG_EBRACK;
+	      goto parse_bracket_exp_free_return;
+	    }
+	  if (token->type == OP_CHARSET_RANGE)
+	    {
+	      re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
+	      token_len2 = peek_token_bracket (&token2, regexp, syntax);
+	      if (BE (token2.type == END_OF_RE, 0))
+		{
+		  *err = REG_EBRACK;
+		  goto parse_bracket_exp_free_return;
+		}
+	      if (token2.type == OP_CLOSE_BRACKET)
+		{
+		  /* We treat the last '-' as a normal character.  */
+		  re_string_skip_bytes (regexp, -token_len);
+		  token->type = CHARACTER;
+		}
+	      else
+		is_range_exp = 1;
+	    }
+	}
+
+      if (is_range_exp == 1)
+	{
+	  end_elem.opr.name = end_name_buf;
+	  ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+				       dfa, syntax, 1);
+	  if (BE (ret != REG_NOERROR, 0))
+	    {
+	      *err = ret;
+	      goto parse_bracket_exp_free_return;
+	    }
+
+	  token_len = peek_token_bracket (token, regexp, syntax);
+
+#ifdef _LIBC
+	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
+				  &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
+	  *err = build_range_exp (sbcset,
+				  dfa->mb_cur_max > 1 ? mbcset : NULL,
+				  &range_alloc, &start_elem, &end_elem);
+# else
+	  *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
+#endif /* RE_ENABLE_I18N */
+	  if (BE (*err != REG_NOERROR, 0))
+	    goto parse_bracket_exp_free_return;
+	}
+      else
+	{
+	  switch (start_elem.type)
+	    {
+	    case SB_CHAR:
+	      bitset_set (sbcset, start_elem.opr.ch);
+	      break;
+#ifdef RE_ENABLE_I18N
+	    case MB_CHAR:
+	      /* Check whether the array has enough space.  */
+	      if (BE (mbchar_alloc == mbcset->nmbchars, 0))
+		{
+		  wchar_t *new_mbchars;
+		  /* Not enough, realloc it.  */
+		  /* +1 in case of mbcset->nmbchars is 0.  */
+		  mbchar_alloc = 2 * mbcset->nmbchars + 1;
+		  /* Use realloc since array is NULL if *alloc == 0.  */
+		  new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
+					    mbchar_alloc);
+		  if (BE (new_mbchars == NULL, 0))
+		    goto parse_bracket_exp_espace;
+		  mbcset->mbchars = new_mbchars;
+		}
+	      mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+	      break;
+#endif /* RE_ENABLE_I18N */
+	    case EQUIV_CLASS:
+	      *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+					mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+					start_elem.opr.name);
+	      if (BE (*err != REG_NOERROR, 0))
+		goto parse_bracket_exp_free_return;
+	      break;
+	    case COLL_SYM:
+	      *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+					     mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+					     start_elem.opr.name);
+	      if (BE (*err != REG_NOERROR, 0))
+		goto parse_bracket_exp_free_return;
+	      break;
+	    case CHAR_CLASS:
+	      *err = build_charclass (regexp->trans, sbcset,
+#ifdef RE_ENABLE_I18N
+				      mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+				      (const char *) start_elem.opr.name, syntax);
+	      if (BE (*err != REG_NOERROR, 0))
+	       goto parse_bracket_exp_free_return;
+	      break;
+	    default:
+	      assert (0);
+	      break;
+	    }
+	}
+      if (BE (token->type == END_OF_RE, 0))
+	{
+	  *err = REG_EBRACK;
+	  goto parse_bracket_exp_free_return;
+	}
+      if (token->type == OP_CLOSE_BRACKET)
+	break;
+    }
+
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+
+  /* If it is non-matching list.  */
+  if (non_match)
+    bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure only single byte characters are set.  */
+  if (dfa->mb_cur_max > 1)
+    bitset_mask (sbcset, dfa->sb_char);
+
+  if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+      || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
+						     || mbcset->non_match)))
+    {
+      bin_tree_t *mbc_tree;
+      int sbc_idx;
+      /* Build a tree for complex bracket.  */
+      dfa->has_mb_node = 1;
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto parse_bracket_exp_espace;
+      for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
+	if (sbcset[sbc_idx])
+	  break;
+      /* If there are no bits set in sbcset, there is no point
+	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
+      if (sbc_idx < BITSET_WORDS)
+	{
+	  /* Build a tree for simple bracket.  */
+	  br_token.type = SIMPLE_BRACKET;
+	  br_token.opr.sbcset = sbcset;
+	  work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+	  if (BE (work_tree == NULL, 0))
+	    goto parse_bracket_exp_espace;
+
+	  /* Then join them by ALT node.  */
+	  work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
+	  if (BE (work_tree == NULL, 0))
+	    goto parse_bracket_exp_espace;
+	}
+      else
+	{
+	  re_free (sbcset);
+	  work_tree = mbc_tree;
+	}
+    }
+  else
+#endif /* not RE_ENABLE_I18N */
+    {
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif
+      /* Build a tree for simple bracket.  */
+      br_token.type = SIMPLE_BRACKET;
+      br_token.opr.sbcset = sbcset;
+      work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (work_tree == NULL, 0))
+	goto parse_bracket_exp_espace;
+    }
+  return work_tree;
+
+ parse_bracket_exp_espace:
+  *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  return NULL;
+}
+
+/* Parse an element in the bracket expression.  */
+
+static reg_errcode_t
+parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
+		       re_token_t *token, int token_len, re_dfa_t *dfa,
+		       reg_syntax_t syntax, int accept_hyphen)
+{
+#ifdef RE_ENABLE_I18N
+  int cur_char_size;
+  cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+  if (cur_char_size > 1)
+    {
+      elem->type = MB_CHAR;
+      elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+      re_string_skip_bytes (regexp, cur_char_size);
+      return REG_NOERROR;
+    }
+#endif /* RE_ENABLE_I18N */
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+  if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+      || token->type == OP_OPEN_EQUIV_CLASS)
+    return parse_bracket_symbol (elem, regexp, token);
+  if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
+    {
+      /* A '-' must only appear as anything but a range indicator before
+	 the closing bracket.  Everything else is an error.  */
+      re_token_t token2;
+      (void) peek_token_bracket (&token2, regexp, syntax);
+      if (token2.type != OP_CLOSE_BRACKET)
+	/* The actual error value is not standardized since this whole
+	   case is undefined.  But ERANGE makes good sense.  */
+	return REG_ERANGE;
+    }
+  elem->type = SB_CHAR;
+  elem->opr.ch = token->opr.c;
+  return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression.  Bracket symbols are
+   such as [:<character_class>:], [.<collating_element>.], and
+   [=<equivalent_class>=].  */
+
+static reg_errcode_t
+parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
+		      re_token_t *token)
+{
+  unsigned char ch, delim = token->opr.c;
+  int i = 0;
+  if (re_string_eoi(regexp))
+    return REG_EBRACK;
+  for (;; ++i)
+    {
+      if (i >= BRACKET_NAME_BUF_SIZE)
+	return REG_EBRACK;
+      if (token->type == OP_OPEN_CHAR_CLASS)
+	ch = re_string_fetch_byte_case (regexp);
+      else
+	ch = re_string_fetch_byte (regexp);
+      if (re_string_eoi(regexp))
+	return REG_EBRACK;
+      if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+	break;
+      elem->opr.name[i] = ch;
+    }
+  re_string_skip_bytes (regexp, 1);
+  elem->opr.name[i] = '\0';
+  switch (token->type)
+    {
+    case OP_OPEN_COLL_ELEM:
+      elem->type = COLL_SYM;
+      break;
+    case OP_OPEN_EQUIV_CLASS:
+      elem->type = EQUIV_CLASS;
+      break;
+    case OP_OPEN_CHAR_CLASS:
+      elem->type = CHAR_CLASS;
+      break;
+    default:
+      break;
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the equivalence class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
+		   int *equiv_class_alloc, const unsigned char *name)
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (bitset_t sbcset, const unsigned char *name)
+#endif /* not RE_ENABLE_I18N */
+{
+#ifdef _LIBC
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules != 0)
+    {
+      const int32_t *table, *indirect;
+      const unsigned char *weights, *extra, *cp;
+      unsigned char char_buf[2];
+      int32_t idx1, idx2;
+      unsigned int ch;
+      size_t len;
+      /* This #include defines a local function!  */
+# include <locale/weight.h>
+      /* Calculate the index for equivalence class.  */
+      cp = name;
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+					       _NL_COLLATE_WEIGHTMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+						   _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						_NL_COLLATE_INDIRECTMB);
+      idx1 = findidx (&cp);
+      if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+	/* This isn't a valid character.  */
+	return REG_ECOLLATE;
+
+      /* Build single byte matcing table for this equivalence class.  */
+      char_buf[1] = (unsigned char) '\0';
+      len = weights[idx1 & 0xffffff];
+      for (ch = 0; ch < SBC_MAX; ++ch)
+	{
+	  char_buf[0] = ch;
+	  cp = char_buf;
+	  idx2 = findidx (&cp);
+/*
+	  idx2 = table[ch];
+*/
+	  if (idx2 == 0)
+	    /* This isn't a valid character.  */
+	    continue;
+	  /* Compare only if the length matches and the collation rule
+	     index is the same.  */
+	  if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
+	    {
+	      int cnt = 0;
+
+	      while (cnt <= len &&
+		     weights[(idx1 & 0xffffff) + 1 + cnt]
+		     == weights[(idx2 & 0xffffff) + 1 + cnt])
+		++cnt;
+
+	      if (cnt > len)
+		bitset_set (sbcset, ch);
+	    }
+	}
+      /* Check whether the array has enough space.  */
+      if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
+	{
+	  /* Not enough, realloc it.  */
+	  /* +1 in case of mbcset->nequiv_classes is 0.  */
+	  int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+	  /* Use realloc since the array is NULL if *alloc == 0.  */
+	  int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
+						   int32_t,
+						   new_equiv_class_alloc);
+	  if (BE (new_equiv_classes == NULL, 0))
+	    return REG_ESPACE;
+	  mbcset->equiv_classes = new_equiv_classes;
+	  *equiv_class_alloc = new_equiv_class_alloc;
+	}
+      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+    }
+  else
+#endif /* _LIBC */
+    {
+      if (BE (strlen ((const char *) name) != 1, 0))
+	return REG_ECOLLATE;
+      bitset_set (sbcset, *name);
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the character class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+		 re_charset_t *mbcset, int *char_class_alloc,
+		 const char *class_name, reg_syntax_t syntax)
+#else /* not RE_ENABLE_I18N */
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+		 const char *class_name, reg_syntax_t syntax)
+#endif /* not RE_ENABLE_I18N */
+{
+  int i;
+
+  /* In case of REG_ICASE "upper" and "lower" match the both of
+     upper and lower cases.  */
+  if ((syntax & RE_ICASE)
+      && (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0))
+    class_name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+  /* Check the space of the arrays.  */
+  if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
+    {
+      /* Not enough, realloc it.  */
+      /* +1 in case of mbcset->nchar_classes is 0.  */
+      int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+      /* Use realloc since array is NULL if *alloc == 0.  */
+      wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
+					       new_char_class_alloc);
+      if (BE (new_char_classes == NULL, 0))
+	return REG_ESPACE;
+      mbcset->char_classes = new_char_classes;
+      *char_class_alloc = new_char_class_alloc;
+    }
+  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (class_name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func)	\
+  do {						\
+    if (BE (trans != NULL, 0))			\
+      {						\
+	for (i = 0; i < SBC_MAX; ++i)		\
+  	  if (ctype_func (i))			\
+	    bitset_set (sbcset, trans[i]);	\
+      }						\
+    else					\
+      {						\
+	for (i = 0; i < SBC_MAX; ++i)		\
+  	  if (ctype_func (i))			\
+	    bitset_set (sbcset, i);		\
+      }						\
+  } while (0)
+
+  if (strcmp (class_name, "alnum") == 0)
+    BUILD_CHARCLASS_LOOP (isalnum);
+  else if (strcmp (class_name, "cntrl") == 0)
+    BUILD_CHARCLASS_LOOP (iscntrl);
+  else if (strcmp (class_name, "lower") == 0)
+    BUILD_CHARCLASS_LOOP (islower);
+  else if (strcmp (class_name, "space") == 0)
+    BUILD_CHARCLASS_LOOP (isspace);
+  else if (strcmp (class_name, "alpha") == 0)
+    BUILD_CHARCLASS_LOOP (isalpha);
+  else if (strcmp (class_name, "digit") == 0)
+    BUILD_CHARCLASS_LOOP (isdigit);
+  else if (strcmp (class_name, "print") == 0)
+    BUILD_CHARCLASS_LOOP (isprint);
+  else if (strcmp (class_name, "upper") == 0)
+    BUILD_CHARCLASS_LOOP (isupper);
+  else if (strcmp (class_name, "blank") == 0)
+#ifndef GAWK
+    BUILD_CHARCLASS_LOOP (isblank);
+#else
+    /* see comments above */
+    BUILD_CHARCLASS_LOOP (is_blank);
+#endif
+  else if (strcmp (class_name, "graph") == 0)
+    BUILD_CHARCLASS_LOOP (isgraph);
+  else if (strcmp (class_name, "punct") == 0)
+    BUILD_CHARCLASS_LOOP (ispunct);
+  else if (strcmp (class_name, "xdigit") == 0)
+    BUILD_CHARCLASS_LOOP (isxdigit);
+  else
+    return REG_ECTYPE;
+
+  return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+		    const char *class_name,
+		    const char *extra, int non_match,
+		    reg_errcode_t *err)
+{
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+  reg_errcode_t ret;
+  re_token_t br_token;
+  bin_tree_t *tree;
+
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+  if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  if (non_match)
+    {
+#ifdef RE_ENABLE_I18N
+      mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+    }
+
+  /* We don't care the syntax in this case.  */
+  ret = build_charclass (trans, sbcset,
+#ifdef RE_ENABLE_I18N
+			 mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+			 class_name, 0);
+
+  if (BE (ret != REG_NOERROR, 0))
+    {
+      re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+      *err = ret;
+      return NULL;
+    }
+  /* \w match '_' also.  */
+  for (; *extra; extra++)
+    bitset_set (sbcset, *extra);
+
+  /* If it is non-matching list.  */
+  if (non_match)
+    bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure only single byte characters are set.  */
+  if (dfa->mb_cur_max > 1)
+    bitset_mask (sbcset, dfa->sb_char);
+#endif
+
+  /* Build a tree for simple bracket.  */
+  br_token.type = SIMPLE_BRACKET;
+  br_token.opr.sbcset = sbcset;
+  tree = create_token_tree (dfa, NULL, NULL, &br_token);
+  if (BE (tree == NULL, 0))
+    goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+  if (dfa->mb_cur_max > 1)
+    {
+      bin_tree_t *mbc_tree;
+      /* Build a tree for complex bracket.  */
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      dfa->has_mb_node = 1;
+      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto build_word_op_espace;
+      /* Then join them by ALT node.  */
+      tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+      if (BE (mbc_tree != NULL, 1))
+	return tree;
+    }
+  else
+    {
+      free_charset (mbcset);
+      return tree;
+    }
+#else /* not RE_ENABLE_I18N */
+  return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+   Fetch a number from `input', and return the number.
+   Return -1, if the number field is empty like "{,1}".
+   Return -2, If an error is occured.  */
+
+static int
+fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
+{
+  int num = -1;
+  unsigned char c;
+  while (1)
+    {
+      fetch_token (token, input, syntax);
+      c = token->opr.c;
+      if (BE (token->type == END_OF_RE, 0))
+	return -2;
+      if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+	break;
+      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+	     ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+      num = (num > RE_DUP_MAX) ? -2 : num;
+    }
+  return num;
+}
+
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+  re_free (cset->mbchars);
+# ifdef _LIBC
+  re_free (cset->coll_syms);
+  re_free (cset->equiv_classes);
+  re_free (cset->range_starts);
+  re_free (cset->range_ends);
+# endif
+  re_free (cset->char_classes);
+  re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Functions for binary tree operation.  */
+
+/* Create a tree node.  */
+
+static bin_tree_t *
+create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+	     re_token_type_t type)
+{
+  re_token_t t;
+  t.type = type;
+  return create_token_tree (dfa, left, right, &t);
+}
+
+static bin_tree_t *
+create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+		   const re_token_t *token)
+{
+  bin_tree_t *tree;
+  if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
+    {
+      bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+      if (storage == NULL)
+	return NULL;
+      storage->next = dfa->str_tree_storage;
+      dfa->str_tree_storage = storage;
+      dfa->str_tree_storage_idx = 0;
+    }
+  tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
+  tree->parent = NULL;
+  tree->left = left;
+  tree->right = right;
+  tree->token = *token;
+  tree->token.duplicated = 0;
+  tree->token.opt_subexp = 0;
+  tree->first = NULL;
+  tree->next = NULL;
+  tree->node_idx = -1;
+
+  if (left != NULL)
+    left->parent = tree;
+  if (right != NULL)
+    right->parent = tree;
+  return tree;
+}
+
+/* Mark the tree SRC as an optional subexpression.
+   To be called from preorder or postorder.  */
+
+static reg_errcode_t
+mark_opt_subexp (void *extra, bin_tree_t *node)
+{
+  int idx = (int) (long) extra;
+  if (node->token.type == SUBEXP && node->token.opr.idx == idx)
+    node->token.opt_subexp = 1;
+
+  return REG_NOERROR;
+}
+
+/* Free the allocated memory inside NODE. */
+
+static void
+free_token (re_token_t *node)
+{
+#ifdef RE_ENABLE_I18N
+  if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+    free_charset (node->opr.mbcset);
+  else
+#endif /* RE_ENABLE_I18N */
+    if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+      re_free (node->opr.sbcset);
+}
+
+/* Worker function for tree walking.  Free the allocated memory inside NODE
+   and its children. */
+
+static reg_errcode_t
+free_tree (void *extra, bin_tree_t *node)
+{
+  free_token (&node->token);
+  return REG_NOERROR;
+}
+
+
+/* Duplicate the node SRC, and return new node.  This is a preorder
+   visit similar to the one implemented by the generic visitor, but
+   we need more infrastructure to maintain two parallel trees --- so,
+   it's easier to duplicate.  */
+
+static bin_tree_t *
+duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
+{
+  const bin_tree_t *node;
+  bin_tree_t *dup_root;
+  bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+
+  for (node = root; ; )
+    {
+      /* Create a new tree and link it back to the current parent.  */
+      *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
+      if (*p_new == NULL)
+	return NULL;
+      (*p_new)->parent = dup_node;
+      (*p_new)->token.duplicated = 1;
+      dup_node = *p_new;
+
+      /* Go to the left node, or up and to the right.  */
+      if (node->left)
+	{
+	  node = node->left;
+	  p_new = &dup_node->left;
+	}
+      else
+	{
+	  const bin_tree_t *prev = NULL;
+	  while (node->right == prev || node->right == NULL)
+	    {
+	      prev = node;
+	      node = node->parent;
+	      dup_node = dup_node->parent;
+	      if (!node)
+		return dup_root;
+	    }
+	  node = node->right;
+	  p_new = &dup_node->right;
+	}
+    }
+}
diff --git a/compat/regex/regex.c b/compat/regex/regex.c
index be851fc..3dd8dfa 100644
--- a/compat/regex/regex.c
+++ b/compat/regex/regex.c
@@ -1,4924 +1,87 @@
-/* Extended regular expression matching and search library,
-   version 0.12.
-   (Implements POSIX draft P10003.2/D11.2, except for
-   internationalization features.)
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
 
-   Copyright (C) 1993 Free Software Foundation, Inc.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   This program is distributed in the hope that it will be useful,
+   The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301 USA.  */
 
-/* AIX requires this to be the first thing in the file. */
-#if defined (_AIX) && !defined (REGEX_MALLOC)
-  #pragma alloca
+#ifdef HAVE_CONFIG_H
+#include "config.h"
 #endif
 
-#define _GNU_SOURCE
-
-/* We need this for `regex.h', and perhaps for the Emacs include files.  */
-#include <sys/types.h>
-
-/* We used to test for `BSTRING' here, but only GCC and Emacs define
-   `BSTRING', as far as I know, and neither of them use this code.  */
-#include <string.h>
-#ifndef bcmp
-#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
-#endif
-#ifndef bcopy
-#define bcopy(s, d, n)	memcpy ((d), (s), (n))
-#endif
-#ifndef bzero
-#define bzero(s, n)	memset ((s), 0, (n))
+/* Make sure noone compiles this code with a C++ compiler.  */
+#ifdef __cplusplus
+# error "This is C code, use a C compiler"
 #endif
 
-#include <stdlib.h>
+#ifdef _LIBC
+/* We have to keep the namespace clean.  */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+	__regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+	__re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+	__re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+	__re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+	__re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
 
-
-/* Define the syntax stuff for \<, \>, etc.  */
-
-/* This must be nonzero for the wordchar and notwordchar pattern
-   commands in re_match_2.  */
-#ifndef Sword
-#define Sword 1
+# include "../locale/localeinfo.h"
 #endif
 
-#ifdef SYNTAX_TABLE
-
-extern char *re_syntax_table;
-
-#else /* not SYNTAX_TABLE */
-
-/* How many characters in the character set.  */
-#define CHAR_SET_SIZE 256
-
-static char re_syntax_table[CHAR_SET_SIZE];
-
-static void
-init_syntax_once ()
-{
-   register int c;
-   static int done = 0;
-
-   if (done)
-     return;
-
-   bzero (re_syntax_table, sizeof re_syntax_table);
-
-   for (c = 'a'; c <= 'z'; c++)
-     re_syntax_table[c] = Sword;
-
-   for (c = 'A'; c <= 'Z'; c++)
-     re_syntax_table[c] = Sword;
-
-   for (c = '0'; c <= '9'; c++)
-     re_syntax_table[c] = Sword;
-
-   re_syntax_table['_'] = Sword;
-
-   done = 1;
-}
-
-#endif /* not SYNTAX_TABLE */
-
-#define SYNTAX(c) re_syntax_table[c]
-
-
-/* Get the interface, including the syntax bits.  */
-#include "regex.h"
-
-/* isalpha etc. are used for the character classes.  */
-#include <ctype.h>
-
-#ifndef isascii
-#define isascii(c) 1
+#if defined (_MSC_VER)
+#include <stdio.h> /* for size_t */
 #endif
 
-#ifdef isblank
-#define ISBLANK(c) (isascii (c) && isblank (c))
-#else
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
+   GNU regex allows.  Include it before <regex.h>, which correctly
+   #undefs RE_DUP_MAX and sets it to the right value.  */
+#include <limits.h>
+
+#ifdef GAWK
+#undef alloca
+#define alloca alloca_is_bad_you_should_never_use_it
 #endif
-#ifdef isgraph
-#define ISGRAPH(c) (isascii (c) && isgraph (c))
-#else
-#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#include <regex.h>
+#include "regex_internal.h"
+
+#include "regex_internal.c"
+#ifdef GAWK
+#define bool int
+#define true (1)
+#define false (0)
 #endif
+#include "regcomp.c"
+#include "regexec.c"
 
-#define ISPRINT(c) (isascii (c) && isprint (c))
-#define ISDIGIT(c) (isascii (c) && isdigit (c))
-#define ISALNUM(c) (isascii (c) && isalnum (c))
-#define ISALPHA(c) (isascii (c) && isalpha (c))
-#define ISCNTRL(c) (isascii (c) && iscntrl (c))
-#define ISLOWER(c) (isascii (c) && islower (c))
-#define ISPUNCT(c) (isascii (c) && ispunct (c))
-#define ISSPACE(c) (isascii (c) && isspace (c))
-#define ISUPPER(c) (isascii (c) && isupper (c))
-#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-/* We remove any previous definition of `SIGN_EXTEND_CHAR',
-   since ours (we hope) works properly with all combinations of
-   machines, compilers, `char' and `unsigned char' argument types.
-   (Per Bothner suggested the basic approach.)  */
-#undef SIGN_EXTEND_CHAR
-#if __STDC__
-#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
-#else  /* not __STDC__ */
-/* As in Harbison and Steele.  */
-#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
-#endif
-
-/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
-   use `alloca' instead of `malloc'.  This is because using malloc in
-   re_search* or re_match* could cause memory leaks when C-g is used in
-   Emacs; also, malloc is slower and causes storage fragmentation.  On
-   the other hand, malloc is more portable, and easier to debug.
-
-   Because we sometimes use alloca, some routines have to be macros,
-   not functions -- `alloca'-allocated space disappears at the end of the
-   function it is called in.  */
-
-#ifdef REGEX_MALLOC
-
-#define REGEX_ALLOCATE malloc
-#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
-
-#else /* not REGEX_MALLOC  */
-
-/* Emacs already defines alloca, sometimes.  */
-#ifndef alloca
-
-/* Make alloca work the best possible way.  */
-#ifdef __GNUC__
-#define alloca __builtin_alloca
-#else /* not __GNUC__ */
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#else /* not __GNUC__ or HAVE_ALLOCA_H */
-#ifndef _AIX /* Already did AIX, up at the top.  */
-char *alloca ();
-#endif /* not _AIX */
-#endif /* not HAVE_ALLOCA_H */
-#endif /* not __GNUC__ */
-
-#endif /* not alloca */
-
-#define REGEX_ALLOCATE alloca
-
-/* Assumes a `char *destination' variable.  */
-#define REGEX_REALLOCATE(source, osize, nsize)				\
-  (destination = (char *) alloca (nsize),				\
-   bcopy (source, destination, osize),					\
-   destination)
-
-#endif /* not REGEX_MALLOC */
-
-
-/* True if `size1' is non-NULL and PTR is pointing anywhere inside
-   `string1' or just past its end.  This works if PTR is NULL, which is
-   a good thing.  */
-#define FIRST_STRING_P(ptr) 					\
-  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
-
-/* (Re)Allocate N items of type T using malloc, or fail.  */
-#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
-#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
-#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
-
-#define BYTEWIDTH 8 /* In bits.  */
-
-#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
-
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-typedef char boolean;
-#define false 0
-#define true 1
-
-/* These are the command codes that appear in compiled regular
-   expressions.  Some opcodes are followed by argument bytes.  A
-   command code can specify any interpretation whatsoever for its
-   arguments.  Zero bytes may appear in the compiled regular expression.
-
-   The value of `exactn' is needed in search.c (search_buffer) in Emacs.
-   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
-   `exactn' we use here must also be 1.  */
-
-typedef enum
-{
-  no_op = 0,
-
-	/* Followed by one byte giving n, then by n literal bytes.  */
-  exactn = 1,
-
-	/* Matches any (more or less) character.  */
-  anychar,
-
-	/* Matches any one char belonging to specified set.  First
-	   following byte is number of bitmap bytes.  Then come bytes
-	   for a bitmap saying which chars are in.  Bits in each byte
-	   are ordered low-bit-first.  A character is in the set if its
-	   bit is 1.  A character too large to have a bit in the map is
-	   automatically not in the set.  */
-  charset,
-
-	/* Same parameters as charset, but match any character that is
-	   not one of those specified.  */
-  charset_not,
-
-	/* Start remembering the text that is matched, for storing in a
-	   register.  Followed by one byte with the register number, in
-	   the range 0 to one less than the pattern buffer's re_nsub
-	   field.  Then followed by one byte with the number of groups
-	   inner to this one.  (This last has to be part of the
-	   start_memory only because we need it in the on_failure_jump
-	   of re_match_2.)  */
-  start_memory,
-
-	/* Stop remembering the text that is matched and store it in a
-	   memory register.  Followed by one byte with the register
-	   number, in the range 0 to one less than `re_nsub' in the
-	   pattern buffer, and one byte with the number of inner groups,
-	   just like `start_memory'.  (We need the number of inner
-	   groups here because we don't have any easy way of finding the
-	   corresponding start_memory when we're at a stop_memory.)  */
-  stop_memory,
-
-	/* Match a duplicate of something remembered. Followed by one
-	   byte containing the register number.  */
-  duplicate,
-
-	/* Fail unless at beginning of line.  */
-  begline,
-
-	/* Fail unless at end of line.  */
-  endline,
-
-	/* Succeeds if at beginning of buffer (if emacs) or at beginning
-	   of string to be matched (if not).  */
-  begbuf,
-
-	/* Analogously, for end of buffer/string.  */
-  endbuf,
-
-	/* Followed by two byte relative address to which to jump.  */
-  jump,
-
-	/* Same as jump, but marks the end of an alternative.  */
-  jump_past_alt,
-
-	/* Followed by two-byte relative address of place to resume at
-	   in case of failure.  */
-  on_failure_jump,
-
-	/* Like on_failure_jump, but pushes a placeholder instead of the
-	   current string position when executed.  */
-  on_failure_keep_string_jump,
-
-	/* Throw away latest failure point and then jump to following
-	   two-byte relative address.  */
-  pop_failure_jump,
-
-	/* Change to pop_failure_jump if know won't have to backtrack to
-	   match; otherwise change to jump.  This is used to jump
-	   back to the beginning of a repeat.  If what follows this jump
-	   clearly won't match what the repeat does, such that we can be
-	   sure that there is no use backtracking out of repetitions
-	   already matched, then we change it to a pop_failure_jump.
-	   Followed by two-byte address.  */
-  maybe_pop_jump,
-
-	/* Jump to following two-byte address, and push a dummy failure
-	   point. This failure point will be thrown away if an attempt
-	   is made to use it for a failure.  A `+' construct makes this
-	   before the first repeat.  Also used as an intermediary kind
-	   of jump when compiling an alternative.  */
-  dummy_failure_jump,
-
-	/* Push a dummy failure point and continue.  Used at the end of
-	   alternatives.  */
-  push_dummy_failure,
-
-	/* Followed by two-byte relative address and two-byte number n.
-	   After matching N times, jump to the address upon failure.  */
-  succeed_n,
-
-	/* Followed by two-byte relative address, and two-byte number n.
-	   Jump to the address N times, then fail.  */
-  jump_n,
-
-	/* Set the following two-byte relative address to the
-	   subsequent two-byte number.  The address *includes* the two
-	   bytes of number.  */
-  set_number_at,
-
-  wordchar,	/* Matches any word-constituent character.  */
-  notwordchar,	/* Matches any char that is not a word-constituent.  */
-
-  wordbeg,	/* Succeeds if at word beginning.  */
-  wordend,	/* Succeeds if at word end.  */
-
-  wordbound,	/* Succeeds if at a word boundary.  */
-  notwordbound	/* Succeeds if not at a word boundary.  */
-
-#ifdef emacs
-  ,before_dot,	/* Succeeds if before point.  */
-  at_dot,	/* Succeeds if at point.  */
-  after_dot,	/* Succeeds if after point.  */
-
-	/* Matches any character whose syntax is specified.  Followed by
-	   a byte which contains a syntax code, e.g., Sword.  */
-  syntaxspec,
-
-	/* Matches any character whose syntax is not that specified.  */
-  notsyntaxspec
-#endif /* emacs */
-} re_opcode_t;
-
-/* Common operations on the compiled pattern.  */
-
-/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
-
-#define STORE_NUMBER(destination, number)				\
-  do {									\
-    (destination)[0] = (number) & 0377;					\
-    (destination)[1] = (number) >> 8;					\
-  } while (0)
-
-/* Same as STORE_NUMBER, except increment DESTINATION to
-   the byte after where the number is stored.  Therefore, DESTINATION
-   must be an lvalue.  */
-
-#define STORE_NUMBER_AND_INCR(destination, number)			\
-  do {									\
-    STORE_NUMBER (destination, number);					\
-    (destination) += 2;							\
-  } while (0)
-
-/* Put into DESTINATION a number stored in two contiguous bytes starting
-   at SOURCE.  */
-
-#define EXTRACT_NUMBER(destination, source)				\
-  do {									\
-    (destination) = *(source) & 0377;					\
-    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
-  } while (0)
-
-#ifdef DEBUG
-static void
-extract_number (dest, source)
-    int *dest;
-    unsigned char *source;
-{
-  int temp = SIGN_EXTEND_CHAR (*(source + 1));
-  *dest = *source & 0377;
-  *dest += temp << 8;
-}
-
-#ifndef EXTRACT_MACROS /* To debug the macros.  */
-#undef EXTRACT_NUMBER
-#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
-#endif /* not EXTRACT_MACROS */
-
-#endif /* DEBUG */
-
-/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
-   SOURCE must be an lvalue.  */
-
-#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
-  do {									\
-    EXTRACT_NUMBER (destination, source);				\
-    (source) += 2; 							\
-  } while (0)
-
-#ifdef DEBUG
-static void
-extract_number_and_incr (destination, source)
-    int *destination;
-    unsigned char **source;
-{
-  extract_number (destination, *source);
-  *source += 2;
-}
-
-#ifndef EXTRACT_MACROS
-#undef EXTRACT_NUMBER_AND_INCR
-#define EXTRACT_NUMBER_AND_INCR(dest, src) \
-  extract_number_and_incr (&dest, &src)
-#endif /* not EXTRACT_MACROS */
-
-#endif /* DEBUG */
-
-/* If DEBUG is defined, Regex prints many voluminous messages about what
-   it is doing (if the variable `debug' is nonzero).  If linked with the
-   main program in `iregex.c', you can enter patterns and strings
-   interactively.  And if linked with the main program in `main.c' and
-   the other test files, you can run the already-written tests.  */
-
-#ifdef DEBUG
-
-/* We use standard I/O for debugging.  */
-#include <stdio.h>
-
-/* It is useful to test things that ``must'' be true when debugging.  */
-#include <assert.h>
-
-static int debug = 0;
-
-#define DEBUG_STATEMENT(e) e
-#define DEBUG_PRINT1(x) if (debug) printf (x)
-#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
-#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
-#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
-  if (debug) print_partial_compiled_pattern (s, e)
-#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
-  if (debug) print_double_string (w, s1, sz1, s2, sz2)
-
-
-extern void printchar ();
-
-/* Print the fastmap in human-readable form.  */
-
-void
-print_fastmap (fastmap)
-    char *fastmap;
-{
-  unsigned was_a_range = 0;
-  unsigned i = 0;
-
-  while (i < (1 << BYTEWIDTH))
-    {
-      if (fastmap[i++])
-	{
-	  was_a_range = 0;
-	  printchar (i - 1);
-	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
-	    {
-	      was_a_range = 1;
-	      i++;
-	    }
-	  if (was_a_range)
-	    {
-	      printf ("-");
-	      printchar (i - 1);
-	    }
-	}
-    }
-  putchar ('\n');
-}
-
-
-/* Print a compiled pattern string in human-readable form, starting at
-   the START pointer into it and ending just before the pointer END.  */
-
-void
-print_partial_compiled_pattern (start, end)
-    unsigned char *start;
-    unsigned char *end;
-{
-  int mcnt, mcnt2;
-  unsigned char *p = start;
-  unsigned char *pend = end;
-
-  if (start == NULL)
-    {
-      printf ("(null)\n");
-      return;
-    }
-
-  /* Loop over pattern commands.  */
-  while (p < pend)
-    {
-      switch ((re_opcode_t) *p++)
-	{
-	case no_op:
-	  printf ("/no_op");
-	  break;
-
-	case exactn:
-	  mcnt = *p++;
-	  printf ("/exactn/%d", mcnt);
-	  do
-	    {
-	      putchar ('/');
-	      printchar (*p++);
-	    }
-	  while (--mcnt);
-	  break;
-
-	case start_memory:
-	  mcnt = *p++;
-	  printf ("/start_memory/%d/%d", mcnt, *p++);
-	  break;
-
-	case stop_memory:
-	  mcnt = *p++;
-	  printf ("/stop_memory/%d/%d", mcnt, *p++);
-	  break;
-
-	case duplicate:
-	  printf ("/duplicate/%d", *p++);
-	  break;
-
-	case anychar:
-	  printf ("/anychar");
-	  break;
-
-	case charset:
-	case charset_not:
-	  {
-	    register int c;
-
-	    printf ("/charset%s",
-		    (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
-
-	    assert (p + *p < pend);
-
-	    for (c = 0; c < *p; c++)
-	      {
-		unsigned bit;
-		unsigned char map_byte = p[1 + c];
-
-		putchar ('/');
-
-		for (bit = 0; bit < BYTEWIDTH; bit++)
-		  if (map_byte & (1 << bit))
-		    printchar (c * BYTEWIDTH + bit);
-	      }
-	    p += 1 + *p;
-	    break;
-	  }
-
-	case begline:
-	  printf ("/begline");
-	  break;
-
-	case endline:
-	  printf ("/endline");
-	  break;
-
-	case on_failure_jump:
-	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/on_failure_jump/0/%d", mcnt);
-	  break;
-
-	case on_failure_keep_string_jump:
-	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
-	  break;
-
-	case dummy_failure_jump:
-	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/dummy_failure_jump/0/%d", mcnt);
-	  break;
-
-	case push_dummy_failure:
-	  printf ("/push_dummy_failure");
-	  break;
-
-	case maybe_pop_jump:
-	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/maybe_pop_jump/0/%d", mcnt);
-	  break;
-
-	case pop_failure_jump:
-	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/pop_failure_jump/0/%d", mcnt);
-	  break;
-
-	case jump_past_alt:
-	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/jump_past_alt/0/%d", mcnt);
-	  break;
-
-	case jump:
-	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/jump/0/%d", mcnt);
-	  break;
-
-	case succeed_n:
-	  extract_number_and_incr (&mcnt, &p);
-	  extract_number_and_incr (&mcnt2, &p);
-	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
-	  break;
-
-	case jump_n:
-	  extract_number_and_incr (&mcnt, &p);
-	  extract_number_and_incr (&mcnt2, &p);
-	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
-	  break;
-
-	case set_number_at:
-	  extract_number_and_incr (&mcnt, &p);
-	  extract_number_and_incr (&mcnt2, &p);
-	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
-	  break;
-
-	case wordbound:
-	  printf ("/wordbound");
-	  break;
-
-	case notwordbound:
-	  printf ("/notwordbound");
-	  break;
-
-	case wordbeg:
-	  printf ("/wordbeg");
-	  break;
-
-	case wordend:
-	  printf ("/wordend");
-
-#ifdef emacs
-	case before_dot:
-	  printf ("/before_dot");
-	  break;
-
-	case at_dot:
-	  printf ("/at_dot");
-	  break;
-
-	case after_dot:
-	  printf ("/after_dot");
-	  break;
-
-	case syntaxspec:
-	  printf ("/syntaxspec");
-	  mcnt = *p++;
-	  printf ("/%d", mcnt);
-	  break;
-
-	case notsyntaxspec:
-	  printf ("/notsyntaxspec");
-	  mcnt = *p++;
-	  printf ("/%d", mcnt);
-	  break;
-#endif /* emacs */
-
-	case wordchar:
-	  printf ("/wordchar");
-	  break;
-
-	case notwordchar:
-	  printf ("/notwordchar");
-	  break;
-
-	case begbuf:
-	  printf ("/begbuf");
-	  break;
-
-	case endbuf:
-	  printf ("/endbuf");
-	  break;
-
-	default:
-	  printf ("?%d", *(p-1));
-	}
-    }
-  printf ("/\n");
-}
-
-
-void
-print_compiled_pattern (bufp)
-    struct re_pattern_buffer *bufp;
-{
-  unsigned char *buffer = bufp->buffer;
-
-  print_partial_compiled_pattern (buffer, buffer + bufp->used);
-  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
-
-  if (bufp->fastmap_accurate && bufp->fastmap)
-    {
-      printf ("fastmap: ");
-      print_fastmap (bufp->fastmap);
-    }
-
-  printf ("re_nsub: %d\t", bufp->re_nsub);
-  printf ("regs_alloc: %d\t", bufp->regs_allocated);
-  printf ("can_be_null: %d\t", bufp->can_be_null);
-  printf ("newline_anchor: %d\n", bufp->newline_anchor);
-  printf ("no_sub: %d\t", bufp->no_sub);
-  printf ("not_bol: %d\t", bufp->not_bol);
-  printf ("not_eol: %d\t", bufp->not_eol);
-  printf ("syntax: %d\n", bufp->syntax);
-  /* Perhaps we should print the translate table?  */
-}
-
-
-void
-print_double_string (where, string1, size1, string2, size2)
-    const char *where;
-    const char *string1;
-    const char *string2;
-    int size1;
-    int size2;
-{
-  unsigned this_char;
-
-  if (where == NULL)
-    printf ("(null)");
-  else
-    {
-      if (FIRST_STRING_P (where))
-	{
-	  for (this_char = where - string1; this_char < size1; this_char++)
-	    printchar (string1[this_char]);
-
-	  where = string2;
-	}
-
-      for (this_char = where - string2; this_char < size2; this_char++)
-	printchar (string2[this_char]);
-    }
-}
-
-#else /* not DEBUG */
-
-#undef assert
-#define assert(e)
-
-#define DEBUG_STATEMENT(e)
-#define DEBUG_PRINT1(x)
-#define DEBUG_PRINT2(x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3)
-#define DEBUG_PRINT4(x1, x2, x3, x4)
-#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
-#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
-
-#endif /* not DEBUG */
-
-/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
-   also be assigned to arbitrarily: each pattern buffer stores its own
-   syntax, so it can be changed between regex compilations.  */
-reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
-
-
-/* Specify the precise syntax of regexps for compilation.  This provides
-   for compatibility for various utilities which historically have
-   different, incompatible syntaxes.
-
-   The argument SYNTAX is a bit mask comprised of the various bits
-   defined in regex.h.  We return the old syntax.  */
-
-reg_syntax_t
-re_set_syntax (syntax)
-    reg_syntax_t syntax;
-{
-  reg_syntax_t ret = re_syntax_options;
-
-  re_syntax_options = syntax;
-  return ret;
-}
-
-/* This table gives an error message for each of the error codes listed
-   in regex.h.  Obviously the order here has to be same as there.  */
-
-static const char *re_error_msg[] =
-  { NULL,					/* REG_NOERROR */
-    "No match",					/* REG_NOMATCH */
-    "Invalid regular expression",		/* REG_BADPAT */
-    "Invalid collation character",		/* REG_ECOLLATE */
-    "Invalid character class name",		/* REG_ECTYPE */
-    "Trailing backslash",			/* REG_EESCAPE */
-    "Invalid back reference",			/* REG_ESUBREG */
-    "Unmatched [ or [^",			/* REG_EBRACK */
-    "Unmatched ( or \\(",			/* REG_EPAREN */
-    "Unmatched \\{",				/* REG_EBRACE */
-    "Invalid content of \\{\\}",		/* REG_BADBR */
-    "Invalid range end",			/* REG_ERANGE */
-    "Memory exhausted",				/* REG_ESPACE */
-    "Invalid preceding regular expression",	/* REG_BADRPT */
-    "Premature end of regular expression",	/* REG_EEND */
-    "Regular expression too big",		/* REG_ESIZE */
-    "Unmatched ) or \\)",			/* REG_ERPAREN */
-  };
-
-/* Subroutine declarations and macros for regex_compile.  */
-
-static void store_op1 (), store_op2 ();
-static void insert_op1 (), insert_op2 ();
-static boolean at_begline_loc_p (), at_endline_loc_p ();
-static boolean group_in_compile_stack ();
-static reg_errcode_t compile_range ();
-
-/* Fetch the next character in the uncompiled pattern---translating it
-   if necessary.  Also cast from a signed character in the constant
-   string passed to us by the user to an unsigned char that we can use
-   as an array index (in, e.g., `translate').  */
-#define PATFETCH(c)							\
-  do {if (p == pend) return REG_EEND;					\
-    c = (unsigned char) *p++;						\
-    if (translate) c = translate[c]; 					\
-  } while (0)
-
-/* Fetch the next character in the uncompiled pattern, with no
-   translation.  */
-#define PATFETCH_RAW(c)							\
-  do {if (p == pend) return REG_EEND;					\
-    c = (unsigned char) *p++; 						\
-  } while (0)
-
-/* Go backwards one character in the pattern.  */
-#define PATUNFETCH p--
-
-
-/* If `translate' is non-null, return translate[D], else just D.  We
-   cast the subscript to translate because some data is declared as
-   `char *', to avoid warnings when a string constant is passed.  But
-   when we use a character as a subscript we must make it unsigned.  */
-#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
-
-
-/* Macros for outputting the compiled pattern into `buffer'.  */
-
-/* If the buffer isn't allocated when it comes in, use this.  */
-#define INIT_BUF_SIZE  32
-
-/* Make sure we have at least N more bytes of space in buffer.  */
-#define GET_BUFFER_SPACE(n)						\
-    while (b - bufp->buffer + (n) > bufp->allocated)			\
-      EXTEND_BUFFER ()
-
-/* Make sure we have one more byte of buffer space and then add C to it.  */
-#define BUF_PUSH(c)							\
-  do {									\
-    GET_BUFFER_SPACE (1);						\
-    *b++ = (unsigned char) (c);						\
-  } while (0)
-
-
-/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
-#define BUF_PUSH_2(c1, c2)						\
-  do {									\
-    GET_BUFFER_SPACE (2);						\
-    *b++ = (unsigned char) (c1);					\
-    *b++ = (unsigned char) (c2);					\
-  } while (0)
-
-
-/* As with BUF_PUSH_2, except for three bytes.  */
-#define BUF_PUSH_3(c1, c2, c3)						\
-  do {									\
-    GET_BUFFER_SPACE (3);						\
-    *b++ = (unsigned char) (c1);					\
-    *b++ = (unsigned char) (c2);					\
-    *b++ = (unsigned char) (c3);					\
-  } while (0)
-
-
-/* Store a jump with opcode OP at LOC to location TO.  We store a
-   relative address offset by the three bytes the jump itself occupies.  */
-#define STORE_JUMP(op, loc, to) \
-  store_op1 (op, loc, (to) - (loc) - 3)
-
-/* Likewise, for a two-argument jump.  */
-#define STORE_JUMP2(op, loc, to, arg) \
-  store_op2 (op, loc, (to) - (loc) - 3, arg)
-
-/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
-#define INSERT_JUMP(op, loc, to) \
-  insert_op1 (op, loc, (to) - (loc) - 3, b)
-
-/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
-#define INSERT_JUMP2(op, loc, to, arg) \
-  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
-
-
-/* This is not an arbitrary limit: the arguments which represent offsets
-   into the pattern are two bytes long.  So if 2^16 bytes turns out to
-   be too small, many things would have to change.  */
-#define MAX_BUF_SIZE (1L << 16)
-
-
-/* Extend the buffer by twice its current size via realloc and
-   reset the pointers that pointed into the old block to point to the
-   correct places in the new one.  If extending the buffer results in it
-   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
-#define EXTEND_BUFFER()							\
-  do { 									\
-    unsigned char *old_buffer = bufp->buffer;				\
-    if (bufp->allocated == MAX_BUF_SIZE) 				\
-      return REG_ESIZE;							\
-    bufp->allocated <<= 1;						\
-    if (bufp->allocated > MAX_BUF_SIZE)					\
-      bufp->allocated = MAX_BUF_SIZE; 					\
-    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
-    if (bufp->buffer == NULL)						\
-      return REG_ESPACE;						\
-    /* If the buffer moved, move all the pointers into it.  */		\
-    if (old_buffer != bufp->buffer)					\
-      {									\
-	b = (b - old_buffer) + bufp->buffer;				\
-	begalt = (begalt - old_buffer) + bufp->buffer;			\
-	if (fixup_alt_jump)						\
-	  fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
-	if (laststart)							\
-	  laststart = (laststart - old_buffer) + bufp->buffer;		\
-	if (pending_exact)						\
-	  pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
-      }									\
-  } while (0)
-
-
-/* Since we have one byte reserved for the register number argument to
-   {start,stop}_memory, the maximum number of groups we can report
-   things about is what fits in that byte.  */
-#define MAX_REGNUM 255
-
-/* But patterns can have more than `MAX_REGNUM' registers.  We just
-   ignore the excess.  */
-typedef unsigned regnum_t;
-
-
-/* Macros for the compile stack.  */
-
-/* Since offsets can go either forwards or backwards, this type needs to
-   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
-typedef int pattern_offset_t;
-
-typedef struct
-{
-  pattern_offset_t begalt_offset;
-  pattern_offset_t fixup_alt_jump;
-  pattern_offset_t inner_group_offset;
-  pattern_offset_t laststart_offset;
-  regnum_t regnum;
-} compile_stack_elt_t;
-
-
-typedef struct
-{
-  compile_stack_elt_t *stack;
-  unsigned size;
-  unsigned avail;			/* Offset of next open position.  */
-} compile_stack_type;
-
-
-#define INIT_COMPILE_STACK_SIZE 32
-
-#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
-#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
-
-/* The next available element.  */
-#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
-
-
-/* Set the bit for character C in a list.  */
-#define SET_LIST_BIT(c)                               \
-  (b[((unsigned char) (c)) / BYTEWIDTH]               \
-   |= 1 << (((unsigned char) c) % BYTEWIDTH))
-
-
-/* Get the next unsigned number in the uncompiled pattern.  */
-#define GET_UNSIGNED_NUMBER(num) 					\
-  { if (p != pend)							\
-     {									\
-       PATFETCH (c); 							\
-       while (ISDIGIT (c)) 						\
-	 { 								\
-	   if (num < 0)							\
-	      num = 0;							\
-	   num = num * 10 + c - '0'; 					\
-	   if (p == pend) 						\
-	      break; 							\
-	   PATFETCH (c);						\
-	 } 								\
-       } 								\
-    }
-
-#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
-
-#define IS_CHAR_CLASS(string)						\
-   (STREQ (string, "alpha") || STREQ (string, "upper")			\
-    || STREQ (string, "lower") || STREQ (string, "digit")		\
-    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
-    || STREQ (string, "space") || STREQ (string, "print")		\
-    || STREQ (string, "punct") || STREQ (string, "graph")		\
-    || STREQ (string, "cntrl") || STREQ (string, "blank"))
-
-/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
-   Returns one of error codes defined in `regex.h', or zero for success.
-
-   Assumes the `allocated' (and perhaps `buffer') and `translate'
-   fields are set in BUFP on entry.
-
-   If it succeeds, results are put in BUFP (if it returns an error, the
-   contents of BUFP are undefined):
-     `buffer' is the compiled pattern;
-     `syntax' is set to SYNTAX;
-     `used' is set to the length of the compiled pattern;
-     `fastmap_accurate' is zero;
-     `re_nsub' is the number of subexpressions in PATTERN;
-     `not_bol' and `not_eol' are zero;
-
-   The `fastmap' and `newline_anchor' fields are neither
-   examined nor set.  */
-
-static reg_errcode_t
-regex_compile (pattern, size, syntax, bufp)
-     const char *pattern;
-     int size;
-     reg_syntax_t syntax;
-     struct re_pattern_buffer *bufp;
-{
-  /* We fetch characters from PATTERN here.  Even though PATTERN is
-     `char *' (i.e., signed), we declare these variables as unsigned, so
-     they can be reliably used as array indices.  */
-  register unsigned char c, c1;
-
-  /* A random temporary spot in PATTERN.  */
-  const char *p1;
-
-  /* Points to the end of the buffer, where we should append.  */
-  register unsigned char *b;
-
-  /* Keeps track of unclosed groups.  */
-  compile_stack_type compile_stack;
-
-  /* Points to the current (ending) position in the pattern.  */
-  const char *p = pattern;
-  const char *pend = pattern + size;
-
-  /* How to translate the characters in the pattern.  */
-  char *translate = bufp->translate;
-
-  /* Address of the count-byte of the most recently inserted `exactn'
-     command.  This makes it possible to tell if a new exact-match
-     character can be added to that command or if the character requires
-     a new `exactn' command.  */
-  unsigned char *pending_exact = 0;
-
-  /* Address of start of the most recently finished expression.
-     This tells, e.g., postfix * where to find the start of its
-     operand.  Reset at the beginning of groups and alternatives.  */
-  unsigned char *laststart = 0;
-
-  /* Address of beginning of regexp, or inside of last group.  */
-  unsigned char *begalt;
-
-  /* Place in the uncompiled pattern (i.e., the {) to
-     which to go back if the interval is invalid.  */
-  const char *beg_interval;
-
-  /* Address of the place where a forward jump should go to the end of
-     the containing expression.  Each alternative of an `or' -- except the
-     last -- ends with a forward jump of this sort.  */
-  unsigned char *fixup_alt_jump = 0;
-
-  /* Counts open-groups as they are encountered.  Remembered for the
-     matching close-group on the compile stack, so the same register
-     number is put in the stop_memory as the start_memory.  */
-  regnum_t regnum = 0;
-
-#ifdef DEBUG
-  DEBUG_PRINT1 ("\nCompiling pattern: ");
-  if (debug)
-    {
-      unsigned debug_count;
-
-      for (debug_count = 0; debug_count < size; debug_count++)
-	printchar (pattern[debug_count]);
-      putchar ('\n');
-    }
-#endif /* DEBUG */
-
-  /* Initialize the compile stack.  */
-  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
-  if (compile_stack.stack == NULL)
-    return REG_ESPACE;
-
-  compile_stack.size = INIT_COMPILE_STACK_SIZE;
-  compile_stack.avail = 0;
-
-  /* Initialize the pattern buffer.  */
-  bufp->syntax = syntax;
-  bufp->fastmap_accurate = 0;
-  bufp->not_bol = bufp->not_eol = 0;
-
-  /* Set `used' to zero, so that if we return an error, the pattern
-     printer (for debugging) will think there's no pattern.  We reset it
-     at the end.  */
-  bufp->used = 0;
-
-  /* Always count groups, whether or not bufp->no_sub is set.  */
-  bufp->re_nsub = 0;
-
-#if !defined (emacs) && !defined (SYNTAX_TABLE)
-  /* Initialize the syntax table.  */
-   init_syntax_once ();
-#endif
-
-  if (bufp->allocated == 0)
-    {
-      if (bufp->buffer)
-	{ /* If zero allocated, but buffer is non-null, try to realloc
-	     enough space.  This loses if buffer's address is bogus, but
-	     that is the user's responsibility.  */
-	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
-	}
-      else
-	{ /* Caller did not allocate a buffer.  Do it for them.  */
-	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
-	}
-      if (!bufp->buffer) return REG_ESPACE;
-
-      bufp->allocated = INIT_BUF_SIZE;
-    }
-
-  begalt = b = bufp->buffer;
-
-  /* Loop through the uncompiled pattern until we're at the end.  */
-  while (p != pend)
-    {
-      PATFETCH (c);
-
-      switch (c)
-	{
-	case '^':
-	  {
-	    if (   /* If at start of pattern, it's an operator.  */
-		   p == pattern + 1
-		   /* If context independent, it's an operator.  */
-		|| syntax & RE_CONTEXT_INDEP_ANCHORS
-		   /* Otherwise, depends on what's come before.  */
-		|| at_begline_loc_p (pattern, p, syntax))
-	      BUF_PUSH (begline);
-	    else
-	      goto normal_char;
-	  }
-	  break;
-
-
-	case '$':
-	  {
-	    if (   /* If at end of pattern, it's an operator.  */
-		   p == pend
-		   /* If context independent, it's an operator.  */
-		|| syntax & RE_CONTEXT_INDEP_ANCHORS
-		   /* Otherwise, depends on what's next.  */
-		|| at_endline_loc_p (p, pend, syntax))
-	       BUF_PUSH (endline);
-	     else
-	       goto normal_char;
-	   }
-	   break;
-
-
-	case '+':
-	case '?':
-	  if ((syntax & RE_BK_PLUS_QM)
-	      || (syntax & RE_LIMITED_OPS))
-	    goto normal_char;
-	handle_plus:
-	case '*':
-	  /* If there is no previous pattern... */
-	  if (!laststart)
-	    {
-	      if (syntax & RE_CONTEXT_INVALID_OPS)
-		return REG_BADRPT;
-	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
-		goto normal_char;
-	    }
-
-	  {
-	    /* Are we optimizing this jump?  */
-	    boolean keep_string_p = false;
-
-	    /* 1 means zero (many) matches is allowed.  */
-	    char zero_times_ok = 0, many_times_ok = 0;
-
-	    /* If there is a sequence of repetition chars, collapse it
-	       down to just one (the right one).  We can't combine
-	       interval operators with these because of, e.g., `a{2}*',
-	       which should only match an even number of `a's.  */
-
-	    for (;;)
-	      {
-		zero_times_ok |= c != '+';
-		many_times_ok |= c != '?';
-
-		if (p == pend)
-		  break;
-
-		PATFETCH (c);
-
-		if (c == '*'
-		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
-		  ;
-
-		else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
-		  {
-		    if (p == pend) return REG_EESCAPE;
-
-		    PATFETCH (c1);
-		    if (!(c1 == '+' || c1 == '?'))
-		      {
-			PATUNFETCH;
-			PATUNFETCH;
-			break;
-		      }
-
-		    c = c1;
-		  }
-		else
-		  {
-		    PATUNFETCH;
-		    break;
-		  }
-
-		/* If we get here, we found another repeat character.  */
-	       }
-
-	    /* Star, etc. applied to an empty pattern is equivalent
-	       to an empty pattern.  */
-	    if (!laststart)
-	      break;
-
-	    /* Now we know whether or not zero matches is allowed
-	       and also whether or not two or more matches is allowed.  */
-	    if (many_times_ok)
-	      { /* More than one repetition is allowed, so put in at the
-		   end a backward relative jump from `b' to before the next
-		   jump we're going to put in below (which jumps from
-		   laststart to after this jump).
-
-		   But if we are at the `*' in the exact sequence `.*\n',
-		   insert an unconditional jump backwards to the .,
-		   instead of the beginning of the loop.  This way we only
-		   push a failure point once, instead of every time
-		   through the loop.  */
-		assert (p - 1 > pattern);
-
-		/* Allocate the space for the jump.  */
-		GET_BUFFER_SPACE (3);
-
-		/* We know we are not at the first character of the pattern,
-		   because laststart was nonzero.  And we've already
-		   incremented `p', by the way, to be the character after
-		   the `*'.  Do we have to do something analogous here
-		   for null bytes, because of RE_DOT_NOT_NULL?  */
-		if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
-		    && zero_times_ok
-		    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
-		    && !(syntax & RE_DOT_NEWLINE))
-		  { /* We have .*\n.  */
-		    STORE_JUMP (jump, b, laststart);
-		    keep_string_p = true;
-		  }
-		else
-		  /* Anything else.  */
-		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
-
-		/* We've added more stuff to the buffer.  */
-		b += 3;
-	      }
-
-	    /* On failure, jump from laststart to b + 3, which will be the
-	       end of the buffer after this jump is inserted.  */
-	    GET_BUFFER_SPACE (3);
-	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
-				       : on_failure_jump,
-			 laststart, b + 3);
-	    pending_exact = 0;
-	    b += 3;
-
-	    if (!zero_times_ok)
-	      {
-		/* At least one repetition is required, so insert a
-		   `dummy_failure_jump' before the initial
-		   `on_failure_jump' instruction of the loop. This
-		   effects a skip over that instruction the first time
-		   we hit that loop.  */
-		GET_BUFFER_SPACE (3);
-		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
-		b += 3;
-	      }
-	    }
-	  break;
-
-
-	case '.':
-	  laststart = b;
-	  BUF_PUSH (anychar);
-	  break;
-
-
-	case '[':
-	  {
-	    boolean had_char_class = false;
-
-	    if (p == pend) return REG_EBRACK;
-
-	    /* Ensure that we have enough space to push a charset: the
-	       opcode, the length count, and the bitset; 34 bytes in all.  */
-	    GET_BUFFER_SPACE (34);
-
-	    laststart = b;
-
-	    /* We test `*p == '^' twice, instead of using an if
-	       statement, so we only need one BUF_PUSH.  */
-	    BUF_PUSH (*p == '^' ? charset_not : charset);
-	    if (*p == '^')
-	      p++;
-
-	    /* Remember the first position in the bracket expression.  */
-	    p1 = p;
-
-	    /* Push the number of bytes in the bitmap.  */
-	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
-
-	    /* Clear the whole map.  */
-	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
-
-	    /* charset_not matches newline according to a syntax bit.  */
-	    if ((re_opcode_t) b[-2] == charset_not
-		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
-	      SET_LIST_BIT ('\n');
-
-	    /* Read in characters and ranges, setting map bits.  */
-	    for (;;)
-	      {
-		if (p == pend) return REG_EBRACK;
-
-		PATFETCH (c);
-
-		/* \ might escape characters inside [...] and [^...].  */
-		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
-		  {
-		    if (p == pend) return REG_EESCAPE;
-
-		    PATFETCH (c1);
-		    SET_LIST_BIT (c1);
-		    continue;
-		  }
-
-		/* Could be the end of the bracket expression.  If it's
-		   not (i.e., when the bracket expression is `[]' so
-		   far), the ']' character bit gets set way below.  */
-		if (c == ']' && p != p1 + 1)
-		  break;
-
-		/* Look ahead to see if it's a range when the last thing
-		   was a character class.  */
-		if (had_char_class && c == '-' && *p != ']')
-		  return REG_ERANGE;
-
-		/* Look ahead to see if it's a range when the last thing
-		   was a character: if this is a hyphen not at the
-		   beginning or the end of a list, then it's the range
-		   operator.  */
-		if (c == '-'
-		    && !(p - 2 >= pattern && p[-2] == '[')
-		    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
-		    && *p != ']')
-		  {
-		    reg_errcode_t ret
-		      = compile_range (&p, pend, translate, syntax, b);
-		    if (ret != REG_NOERROR) return ret;
-		  }
-
-		else if (p[0] == '-' && p[1] != ']')
-		  { /* This handles ranges made up of characters only.  */
-		    reg_errcode_t ret;
-
-		    /* Move past the `-'.  */
-		    PATFETCH (c1);
-
-		    ret = compile_range (&p, pend, translate, syntax, b);
-		    if (ret != REG_NOERROR) return ret;
-		  }
-
-		/* See if we're at the beginning of a possible character
-		   class.  */
-
-		else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
-		  { /* Leave room for the null.  */
-		    char str[CHAR_CLASS_MAX_LENGTH + 1];
-
-		    PATFETCH (c);
-		    c1 = 0;
-
-		    /* If pattern is `[[:'.  */
-		    if (p == pend) return REG_EBRACK;
-
-		    for (;;)
-		      {
-			PATFETCH (c);
-			if (c == ':' || c == ']' || p == pend
-			    || c1 == CHAR_CLASS_MAX_LENGTH)
-			  break;
-			str[c1++] = c;
-		      }
-		    str[c1] = '\0';
-
-		    /* If isn't a word bracketed by `[:' and:`]':
-		       undo the ending character, the letters, and leave
-		       the leading `:' and `[' (but set bits for them).  */
-		    if (c == ':' && *p == ']')
-		      {
-			int ch;
-			boolean is_alnum = STREQ (str, "alnum");
-			boolean is_alpha = STREQ (str, "alpha");
-			boolean is_blank = STREQ (str, "blank");
-			boolean is_cntrl = STREQ (str, "cntrl");
-			boolean is_digit = STREQ (str, "digit");
-			boolean is_graph = STREQ (str, "graph");
-			boolean is_lower = STREQ (str, "lower");
-			boolean is_print = STREQ (str, "print");
-			boolean is_punct = STREQ (str, "punct");
-			boolean is_space = STREQ (str, "space");
-			boolean is_upper = STREQ (str, "upper");
-			boolean is_xdigit = STREQ (str, "xdigit");
-
-			if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
-
-			/* Throw away the ] at the end of the character
-			   class.  */
-			PATFETCH (c);
-
-			if (p == pend) return REG_EBRACK;
-
-			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
-			  {
-			    if (   (is_alnum  && ISALNUM (ch))
-				|| (is_alpha  && ISALPHA (ch))
-				|| (is_blank  && ISBLANK (ch))
-				|| (is_cntrl  && ISCNTRL (ch))
-				|| (is_digit  && ISDIGIT (ch))
-				|| (is_graph  && ISGRAPH (ch))
-				|| (is_lower  && ISLOWER (ch))
-				|| (is_print  && ISPRINT (ch))
-				|| (is_punct  && ISPUNCT (ch))
-				|| (is_space  && ISSPACE (ch))
-				|| (is_upper  && ISUPPER (ch))
-				|| (is_xdigit && ISXDIGIT (ch)))
-			    SET_LIST_BIT (ch);
-			  }
-			had_char_class = true;
-		      }
-		    else
-		      {
-			c1++;
-			while (c1--)
-			  PATUNFETCH;
-			SET_LIST_BIT ('[');
-			SET_LIST_BIT (':');
-			had_char_class = false;
-		      }
-		  }
-		else
-		  {
-		    had_char_class = false;
-		    SET_LIST_BIT (c);
-		  }
-	      }
-
-	    /* Discard any (non)matching list bytes that are all 0 at the
-	       end of the map.  Decrease the map-length byte too.  */
-	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
-	      b[-1]--;
-	    b += b[-1];
-	  }
-	  break;
-
-
-	case '(':
-	  if (syntax & RE_NO_BK_PARENS)
-	    goto handle_open;
-	  else
-	    goto normal_char;
-
-
-	case ')':
-	  if (syntax & RE_NO_BK_PARENS)
-	    goto handle_close;
-	  else
-	    goto normal_char;
-
-
-	case '\n':
-	  if (syntax & RE_NEWLINE_ALT)
-	    goto handle_alt;
-	  else
-	    goto normal_char;
-
-
-	case '|':
-	  if (syntax & RE_NO_BK_VBAR)
-	    goto handle_alt;
-	  else
-	    goto normal_char;
-
-
-	case '{':
-	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
-	     goto handle_interval;
-	   else
-	     goto normal_char;
-
-
-	case '\\':
-	  if (p == pend) return REG_EESCAPE;
-
-	  /* Do not translate the character after the \, so that we can
-	     distinguish, e.g., \B from \b, even if we normally would
-	     translate, e.g., B to b.  */
-	  PATFETCH_RAW (c);
-
-	  switch (c)
-	    {
-	    case '(':
-	      if (syntax & RE_NO_BK_PARENS)
-		goto normal_backslash;
-
-	    handle_open:
-	      bufp->re_nsub++;
-	      regnum++;
-
-	      if (COMPILE_STACK_FULL)
-		{
-		  RETALLOC (compile_stack.stack, compile_stack.size << 1,
-			    compile_stack_elt_t);
-		  if (compile_stack.stack == NULL) return REG_ESPACE;
-
-		  compile_stack.size <<= 1;
-		}
-
-	      /* These are the values to restore when we hit end of this
-		 group.  They are all relative offsets, so that if the
-		 whole pattern moves because of realloc, they will still
-		 be valid.  */
-	      COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
-	      COMPILE_STACK_TOP.fixup_alt_jump
-		= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
-	      COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
-	      COMPILE_STACK_TOP.regnum = regnum;
-
-	      /* We will eventually replace the 0 with the number of
-		 groups inner to this one.  But do not push a
-		 start_memory for groups beyond the last one we can
-		 represent in the compiled pattern.  */
-	      if (regnum <= MAX_REGNUM)
-		{
-		  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
-		  BUF_PUSH_3 (start_memory, regnum, 0);
-		}
-
-	      compile_stack.avail++;
-
-	      fixup_alt_jump = 0;
-	      laststart = 0;
-	      begalt = b;
-	      /* If we've reached MAX_REGNUM groups, then this open
-		 won't actually generate any code, so we'll have to
-		 clear pending_exact explicitly.  */
-	      pending_exact = 0;
-	      break;
-
-
-	    case ')':
-	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
-
-	      if (COMPILE_STACK_EMPTY)
-	      {
-		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
-		  goto normal_backslash;
-		else
-		  return REG_ERPAREN;
-	      }
-
-	    handle_close:
-	      if (fixup_alt_jump)
-		{ /* Push a dummy failure point at the end of the
-		     alternative for a possible future
-		     `pop_failure_jump' to pop.  See comments at
-		     `push_dummy_failure' in `re_match_2'.  */
-		  BUF_PUSH (push_dummy_failure);
-
-		  /* We allocated space for this jump when we assigned
-		     to `fixup_alt_jump', in the `handle_alt' case below.  */
-		  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
-		}
-
-	      /* See similar code for backslashed left paren above.  */
-	      if (COMPILE_STACK_EMPTY)
-	      {
-		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
-		  goto normal_char;
-		else
-		  return REG_ERPAREN;
-	      }
-
-	      /* Since we just checked for an empty stack above, this
-		 ``can't happen''.  */
-	      assert (compile_stack.avail != 0);
-	      {
-		/* We don't just want to restore into `regnum', because
-		   later groups should continue to be numbered higher,
-		   as in `(ab)c(de)' -- the second group is #2.  */
-		regnum_t this_group_regnum;
-
-		compile_stack.avail--;
-		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
-		fixup_alt_jump
-		  = COMPILE_STACK_TOP.fixup_alt_jump
-		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
-		    : 0;
-		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
-		this_group_regnum = COMPILE_STACK_TOP.regnum;
-		/* If we've reached MAX_REGNUM groups, then this open
-		   won't actually generate any code, so we'll have to
-		   clear pending_exact explicitly.  */
-		pending_exact = 0;
-
-		/* We're at the end of the group, so now we know how many
-		   groups were inside this one.  */
-		if (this_group_regnum <= MAX_REGNUM)
-		  {
-		    unsigned char *inner_group_loc
-		      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
-
-		    *inner_group_loc = regnum - this_group_regnum;
-		    BUF_PUSH_3 (stop_memory, this_group_regnum,
-				regnum - this_group_regnum);
-		  }
-	      }
-	      break;
-
-
-	    case '|':					/* `\|'.  */
-	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
-		goto normal_backslash;
-	    handle_alt:
-	      if (syntax & RE_LIMITED_OPS)
-		goto normal_char;
-
-	      /* Insert before the previous alternative a jump which
-		 jumps to this alternative if the former fails.  */
-	      GET_BUFFER_SPACE (3);
-	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
-	      pending_exact = 0;
-	      b += 3;
-
-	      /* The alternative before this one has a jump after it
-		 which gets executed if it gets matched.  Adjust that
-		 jump so it will jump to this alternative's analogous
-		 jump (put in below, which in turn will jump to the next
-		 (if any) alternative's such jump, etc.).  The last such
-		 jump jumps to the correct final destination.  A picture:
-			  _____ _____
-			  |   | |   |
-			  |   v |   v
-			 a | b   | c
-
-		 If we are at `b', then fixup_alt_jump right now points to a
-		 three-byte space after `a'.  We'll put in the jump, set
-		 fixup_alt_jump to right after `b', and leave behind three
-		 bytes which we'll fill in when we get to after `c'.  */
-
-	      if (fixup_alt_jump)
-		STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
-
-	      /* Mark and leave space for a jump after this alternative,
-		 to be filled in later either by next alternative or
-		 when know we're at the end of a series of alternatives.  */
-	      fixup_alt_jump = b;
-	      GET_BUFFER_SPACE (3);
-	      b += 3;
-
-	      laststart = 0;
-	      begalt = b;
-	      break;
-
-
-	    case '{':
-	      /* If \{ is a literal.  */
-	      if (!(syntax & RE_INTERVALS)
-		     /* If we're at `\{' and it's not the open-interval
-			operator.  */
-		  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
-		  || (p - 2 == pattern  &&  p == pend))
-		goto normal_backslash;
-
-	    handle_interval:
-	      {
-		/* If got here, then the syntax allows intervals.  */
-
-		/* At least (most) this many matches must be made.  */
-		int lower_bound = -1, upper_bound = -1;
-
-		beg_interval = p - 1;
-
-		if (p == pend)
-		  {
-		    if (syntax & RE_NO_BK_BRACES)
-		      goto unfetch_interval;
-		    else
-		      return REG_EBRACE;
-		  }
-
-		GET_UNSIGNED_NUMBER (lower_bound);
-
-		if (c == ',')
-		  {
-		    GET_UNSIGNED_NUMBER (upper_bound);
-		    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
-		  }
-		else
-		  /* Interval such as `{1}' => match exactly once. */
-		  upper_bound = lower_bound;
-
-		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
-		    || lower_bound > upper_bound)
-		  {
-		    if (syntax & RE_NO_BK_BRACES)
-		      goto unfetch_interval;
-		    else
-		      return REG_BADBR;
-		  }
-
-		if (!(syntax & RE_NO_BK_BRACES))
-		  {
-		    if (c != '\\') return REG_EBRACE;
-
-		    PATFETCH (c);
-		  }
-
-		if (c != '}')
-		  {
-		    if (syntax & RE_NO_BK_BRACES)
-		      goto unfetch_interval;
-		    else
-		      return REG_BADBR;
-		  }
-
-		/* We just parsed a valid interval.  */
-
-		/* If it's invalid to have no preceding re.  */
-		if (!laststart)
-		  {
-		    if (syntax & RE_CONTEXT_INVALID_OPS)
-		      return REG_BADRPT;
-		    else if (syntax & RE_CONTEXT_INDEP_OPS)
-		      laststart = b;
-		    else
-		      goto unfetch_interval;
-		  }
-
-		/* If the upper bound is zero, don't want to succeed at
-		   all; jump from `laststart' to `b + 3', which will be
-		   the end of the buffer after we insert the jump.  */
-		 if (upper_bound == 0)
-		   {
-		     GET_BUFFER_SPACE (3);
-		     INSERT_JUMP (jump, laststart, b + 3);
-		     b += 3;
-		   }
-
-		 /* Otherwise, we have a nontrivial interval.  When
-		    we're all done, the pattern will look like:
-		      set_number_at <jump count> <upper bound>
-		      set_number_at <succeed_n count> <lower bound>
-		      succeed_n <after jump addr> <succeed_n count>
-		      <body of loop>
-		      jump_n <succeed_n addr> <jump count>
-		    (The upper bound and `jump_n' are omitted if
-		    `upper_bound' is 1, though.)  */
-		 else
-		   { /* If the upper bound is > 1, we need to insert
-			more at the end of the loop.  */
-		     unsigned nbytes = 10 + (upper_bound > 1) * 10;
-
-		     GET_BUFFER_SPACE (nbytes);
-
-		     /* Initialize lower bound of the `succeed_n', even
-			though it will be set during matching by its
-			attendant `set_number_at' (inserted next),
-			because `re_compile_fastmap' needs to know.
-			Jump to the `jump_n' we might insert below.  */
-		     INSERT_JUMP2 (succeed_n, laststart,
-				   b + 5 + (upper_bound > 1) * 5,
-				   lower_bound);
-		     b += 5;
-
-		     /* Code to initialize the lower bound.  Insert
-			before the `succeed_n'.  The `5' is the last two
-			bytes of this `set_number_at', plus 3 bytes of
-			the following `succeed_n'.  */
-		     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
-		     b += 5;
-
-		     if (upper_bound > 1)
-		       { /* More than one repetition is allowed, so
-			    append a backward jump to the `succeed_n'
-			    that starts this interval.
-
-			    When we've reached this during matching,
-			    we'll have matched the interval once, so
-			    jump back only `upper_bound - 1' times.  */
-			 STORE_JUMP2 (jump_n, b, laststart + 5,
-				      upper_bound - 1);
-			 b += 5;
-
-			 /* The location we want to set is the second
-			    parameter of the `jump_n'; that is `b-2' as
-			    an absolute address.  `laststart' will be
-			    the `set_number_at' we're about to insert;
-			    `laststart+3' the number to set, the source
-			    for the relative address.  But we are
-			    inserting into the middle of the pattern --
-			    so everything is getting moved up by 5.
-			    Conclusion: (b - 2) - (laststart + 3) + 5,
-			    i.e., b - laststart.
-
-			    We insert this at the beginning of the loop
-			    so that if we fail during matching, we'll
-			    reinitialize the bounds.  */
-			 insert_op2 (set_number_at, laststart, b - laststart,
-				     upper_bound - 1, b);
-			 b += 5;
-		       }
-		   }
-		pending_exact = 0;
-		beg_interval = NULL;
-	      }
-	      break;
-
-	    unfetch_interval:
-	      /* If an invalid interval, match the characters as literals.  */
-	       assert (beg_interval);
-	       p = beg_interval;
-	       beg_interval = NULL;
-
-	       /* normal_char and normal_backslash need `c'.  */
-	       PATFETCH (c);
-
-	       if (!(syntax & RE_NO_BK_BRACES))
-		 {
-		   if (p > pattern  &&  p[-1] == '\\')
-		     goto normal_backslash;
-		 }
-	       goto normal_char;
-
-#ifdef emacs
-	    /* There is no way to specify the before_dot and after_dot
-	       operators.  rms says this is ok.  --karl  */
-	    case '=':
-	      BUF_PUSH (at_dot);
-	      break;
-
-	    case 's':
-	      laststart = b;
-	      PATFETCH (c);
-	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
-	      break;
-
-	    case 'S':
-	      laststart = b;
-	      PATFETCH (c);
-	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
-	      break;
-#endif /* emacs */
-
-
-	    case 'w':
-	      laststart = b;
-	      BUF_PUSH (wordchar);
-	      break;
-
-
-	    case 'W':
-	      laststart = b;
-	      BUF_PUSH (notwordchar);
-	      break;
-
-
-	    case '<':
-	      BUF_PUSH (wordbeg);
-	      break;
-
-	    case '>':
-	      BUF_PUSH (wordend);
-	      break;
-
-	    case 'b':
-	      BUF_PUSH (wordbound);
-	      break;
-
-	    case 'B':
-	      BUF_PUSH (notwordbound);
-	      break;
-
-	    case '`':
-	      BUF_PUSH (begbuf);
-	      break;
-
-	    case '\'':
-	      BUF_PUSH (endbuf);
-	      break;
-
-	    case '1': case '2': case '3': case '4': case '5':
-	    case '6': case '7': case '8': case '9':
-	      if (syntax & RE_NO_BK_REFS)
-		goto normal_char;
-
-	      c1 = c - '0';
-
-	      if (c1 > regnum)
-		return REG_ESUBREG;
-
-	      /* Can't back reference to a subexpression if inside of it.  */
-	      if (group_in_compile_stack (compile_stack, c1))
-		goto normal_char;
-
-	      laststart = b;
-	      BUF_PUSH_2 (duplicate, c1);
-	      break;
-
-
-	    case '+':
-	    case '?':
-	      if (syntax & RE_BK_PLUS_QM)
-		goto handle_plus;
-	      else
-		goto normal_backslash;
-
-	    default:
-	    normal_backslash:
-	      /* You might think it would be useful for \ to mean
-		 not to translate; but if we don't translate it
-		 it will never match anything.  */
-	      c = TRANSLATE (c);
-	      goto normal_char;
-	    }
-	  break;
-
-
-	default:
-	/* Expects the character in `c'.  */
-	normal_char:
-	      /* If no exactn currently being built.  */
-	  if (!pending_exact
-
-	      /* If last exactn not at current position.  */
-	      || pending_exact + *pending_exact + 1 != b
-
-	      /* We have only one byte following the exactn for the count.  */
-	      || *pending_exact == (1 << BYTEWIDTH) - 1
-
-	      /* If followed by a repetition operator.  */
-	      || *p == '*' || *p == '^'
-	      || ((syntax & RE_BK_PLUS_QM)
-		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
-		  : (*p == '+' || *p == '?'))
-	      || ((syntax & RE_INTERVALS)
-		  && ((syntax & RE_NO_BK_BRACES)
-		      ? *p == '{'
-		      : (p[0] == '\\' && p[1] == '{'))))
-	    {
-	      /* Start building a new exactn.  */
-
-	      laststart = b;
-
-	      BUF_PUSH_2 (exactn, 0);
-	      pending_exact = b - 1;
-	    }
-
-	  BUF_PUSH (c);
-	  (*pending_exact)++;
-	  break;
-	} /* switch (c) */
-    } /* while p != pend */
-
-
-  /* Through the pattern now.  */
-
-  if (fixup_alt_jump)
-    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
-
-  if (!COMPILE_STACK_EMPTY)
-    return REG_EPAREN;
-
-  free (compile_stack.stack);
-
-  /* We have succeeded; set the length of the buffer.  */
-  bufp->used = b - bufp->buffer;
-
-#ifdef DEBUG
-  if (debug)
-    {
-      DEBUG_PRINT1 ("\nCompiled pattern: ");
-      print_compiled_pattern (bufp);
-    }
-#endif /* DEBUG */
-
-  return REG_NOERROR;
-} /* regex_compile */
-
-/* Subroutines for `regex_compile'.  */
-
-/* Store OP at LOC followed by two-byte integer parameter ARG.  */
-
-static void
-store_op1 (op, loc, arg)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg;
-{
-  *loc = (unsigned char) op;
-  STORE_NUMBER (loc + 1, arg);
-}
-
-
-/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
-
-static void
-store_op2 (op, loc, arg1, arg2)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg1, arg2;
-{
-  *loc = (unsigned char) op;
-  STORE_NUMBER (loc + 1, arg1);
-  STORE_NUMBER (loc + 3, arg2);
-}
-
-
-/* Copy the bytes from LOC to END to open up three bytes of space at LOC
-   for OP followed by two-byte integer parameter ARG.  */
-
-static void
-insert_op1 (op, loc, arg, end)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg;
-    unsigned char *end;
-{
-  register unsigned char *pfrom = end;
-  register unsigned char *pto = end + 3;
-
-  while (pfrom != loc)
-    *--pto = *--pfrom;
-
-  store_op1 (op, loc, arg);
-}
-
-
-/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
-
-static void
-insert_op2 (op, loc, arg1, arg2, end)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg1, arg2;
-    unsigned char *end;
-{
-  register unsigned char *pfrom = end;
-  register unsigned char *pto = end + 5;
-
-  while (pfrom != loc)
-    *--pto = *--pfrom;
-
-  store_op2 (op, loc, arg1, arg2);
-}
-
-
-/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
-   after an alternative or a begin-subexpression.  We assume there is at
-   least one character before the ^.  */
-
-static boolean
-at_begline_loc_p (pattern, p, syntax)
-    const char *pattern, *p;
-    reg_syntax_t syntax;
-{
-  const char *prev = p - 2;
-  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
-
-  return
-       /* After a subexpression?  */
-       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
-       /* After an alternative?  */
-    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
-}
-
-
-/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
-   at least one character after the $, i.e., `P < PEND'.  */
-
-static boolean
-at_endline_loc_p (p, pend, syntax)
-    const char *p, *pend;
-    int syntax;
-{
-  const char *next = p;
-  boolean next_backslash = *next == '\\';
-  const char *next_next = p + 1 < pend ? p + 1 : NULL;
-
-  return
-       /* Before a subexpression?  */
-       (syntax & RE_NO_BK_PARENS ? *next == ')'
-	: next_backslash && next_next && *next_next == ')')
-       /* Before an alternative?  */
-    || (syntax & RE_NO_BK_VBAR ? *next == '|'
-	: next_backslash && next_next && *next_next == '|');
-}
-
-
-/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
-   false if it's not.  */
-
-static boolean
-group_in_compile_stack (compile_stack, regnum)
-    compile_stack_type compile_stack;
-    regnum_t regnum;
-{
-  int this_element;
-
-  for (this_element = compile_stack.avail - 1;
-       this_element >= 0;
-       this_element--)
-    if (compile_stack.stack[this_element].regnum == regnum)
-      return true;
-
-  return false;
-}
-
-
-/* Read the ending character of a range (in a bracket expression) from the
-   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
-   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
-   Then we set the translation of all bits between the starting and
-   ending characters (inclusive) in the compiled pattern B.
-
-   Return an error code.
-
-   We use these short variable names so we can use the same macros as
-   `regex_compile' itself.  */
-
-static reg_errcode_t
-compile_range (p_ptr, pend, translate, syntax, b)
-    const char **p_ptr, *pend;
-    char *translate;
-    reg_syntax_t syntax;
-    unsigned char *b;
-{
-  unsigned this_char;
-
-  const char *p = *p_ptr;
-  int range_start, range_end;
-
-  if (p == pend)
-    return REG_ERANGE;
-
-  /* Even though the pattern is a signed `char *', we need to fetch
-     with unsigned char *'s; if the high bit of the pattern character
-     is set, the range endpoints will be negative if we fetch using a
-     signed char *.
-
-     We also want to fetch the endpoints without translating them; the
-     appropriate translation is done in the bit-setting loop below.  */
-  range_start = ((unsigned char *) p)[-2];
-  range_end   = ((unsigned char *) p)[0];
-
-  /* Have to increment the pointer into the pattern string, so the
-     caller isn't still at the ending character.  */
-  (*p_ptr)++;
-
-  /* If the start is after the end, the range is empty.  */
-  if (range_start > range_end)
-    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
-
-  /* Here we see why `this_char' has to be larger than an `unsigned
-     char' -- the range is inclusive, so if `range_end' == 0xff
-     (assuming 8-bit characters), we would otherwise go into an infinite
-     loop, since all characters <= 0xff.  */
-  for (this_char = range_start; this_char <= range_end; this_char++)
-    {
-      SET_LIST_BIT (TRANSLATE (this_char));
-    }
-
-  return REG_NOERROR;
-}
-
-/* Failure stack declarations and macros; both re_compile_fastmap and
-   re_match_2 use a failure stack.  These have to be macros because of
-   REGEX_ALLOCATE.  */
-
-
-/* Number of failure points for which to initially allocate space
-   when matching.  If this number is exceeded, we allocate more
-   space, so it is not a hard limit.  */
-#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
-#endif
-
-/* Roughly the maximum number of failure points on the stack.  Would be
-   exactly that if always used MAX_FAILURE_SPACE each time we failed.
-   This is a variable only so users of regex can assign to it; we never
-   change it ourselves.  */
+/* Binary backward compatibility.  */
+#if _LIBC
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
 int re_max_failures = 2000;
-
-typedef const unsigned char *fail_stack_elt_t;
-
-typedef struct
-{
-  fail_stack_elt_t *stack;
-  unsigned size;
-  unsigned avail;			/* Offset of next open position.  */
-} fail_stack_type;
-
-#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
-#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
-#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
-#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
-
-
-/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
-
-#define INIT_FAIL_STACK()						\
-  do {									\
-    fail_stack.stack = (fail_stack_elt_t *)				\
-      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
-									\
-    if (fail_stack.stack == NULL)					\
-      return -2;							\
-									\
-    fail_stack.size = INIT_FAILURE_ALLOC;				\
-    fail_stack.avail = 0;						\
-  } while (0)
-
-
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
-
-   Return 1 if succeeds, and 0 if either ran out of memory
-   allocating space for it or it was already too large.
-
-   REGEX_REALLOCATE requires `destination' be declared.   */
-
-#define DOUBLE_FAIL_STACK(fail_stack)					\
-  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
-   ? 0									\
-   : ((fail_stack).stack = (fail_stack_elt_t *)				\
-	REGEX_REALLOCATE ((fail_stack).stack, 				\
-	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
-	  ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
-									\
-      (fail_stack).stack == NULL					\
-      ? 0								\
-      : ((fail_stack).size <<= 1, 					\
-	 1)))
-
-
-/* Push PATTERN_OP on FAIL_STACK.
-
-   Return 1 if was able to do so and 0 if ran out of memory allocating
-   space to do so.  */
-#define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
-  ((FAIL_STACK_FULL ()							\
-    && !DOUBLE_FAIL_STACK (fail_stack))					\
-    ? 0									\
-    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
-       1))
-
-/* This pushes an item onto the failure stack.  Must be a four-byte
-   value.  Assumes the variable `fail_stack'.  Probably should only
-   be called from within `PUSH_FAILURE_POINT'.  */
-#define PUSH_FAILURE_ITEM(item)						\
-  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
-
-/* The complement operation.  Assumes `fail_stack' is nonempty.  */
-#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
-
-/* Used to omit pushing failure point id's when we're not debugging.  */
-#ifdef DEBUG
-#define DEBUG_PUSH PUSH_FAILURE_ITEM
-#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
-#else
-#define DEBUG_PUSH(item)
-#define DEBUG_POP(item_addr)
+# endif
 #endif
-
-
-/* Push the information about the state we will need
-   if we ever fail back to it.
-
-   Requires variables fail_stack, regstart, regend, reg_info, and
-   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
-   declared.
-
-   Does `return FAILURE_CODE' if runs out of memory.  */
-
-#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
-  do {									\
-    char *destination;							\
-    /* Must be int, so when we don't save any registers, the arithmetic	\
-       of 0 + -1 isn't done as unsigned.  */				\
-    int this_reg;							\
-									\
-    DEBUG_STATEMENT (failure_id++);					\
-    DEBUG_STATEMENT (nfailure_points_pushed++);				\
-    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
-    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
-    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
-									\
-    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
-    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
-									\
-    /* Ensure we have enough space allocated for what we will push.  */	\
-    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
-      {									\
-	if (!DOUBLE_FAIL_STACK (fail_stack))			\
-	  return failure_code;						\
-									\
-	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
-		       (fail_stack).size);				\
-	DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
-      }									\
-									\
-    /* Push the info, starting with the registers.  */			\
-    DEBUG_PRINT1 ("\n");						\
-									\
-    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
-	 this_reg++)							\
-      {									\
-	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
-	DEBUG_STATEMENT (num_regs_pushed++);				\
-									\
-	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
-	PUSH_FAILURE_ITEM (regstart[this_reg]);				\
-									\
-	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
-	PUSH_FAILURE_ITEM (regend[this_reg]);				\
-									\
-	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
-	DEBUG_PRINT2 (" match_null=%d",					\
-		      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
-	DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
-	DEBUG_PRINT2 (" matched_something=%d",				\
-		      MATCHED_SOMETHING (reg_info[this_reg]));		\
-	DEBUG_PRINT2 (" ever_matched=%d",				\
-		      EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
-	DEBUG_PRINT1 ("\n");						\
-	PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
-      }									\
-									\
-    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
-    PUSH_FAILURE_ITEM (lowest_active_reg);				\
-									\
-    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
-    PUSH_FAILURE_ITEM (highest_active_reg);				\
-									\
-    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
-    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
-    PUSH_FAILURE_ITEM (pattern_place);					\
-									\
-    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
-    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
-				 size2);				\
-    DEBUG_PRINT1 ("'\n");						\
-    PUSH_FAILURE_ITEM (string_place);					\
-									\
-    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
-    DEBUG_PUSH (failure_id);						\
-  } while (0)
-
-/* This is the number of items that are pushed and popped on the stack
-   for each register.  */
-#define NUM_REG_ITEMS  3
-
-/* Individual items aside from the registers.  */
-#ifdef DEBUG
-#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
-#else
-#define NUM_NONREG_ITEMS 4
-#endif
-
-/* We push at most this many items on the stack.  */
-#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
-
-/* We actually push this many items.  */
-#define NUM_FAILURE_ITEMS						\
-  ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
-    + NUM_NONREG_ITEMS)
-
-/* How many items can still be added to the stack without overflowing it.  */
-#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
-
-
-/* Pops what PUSH_FAIL_STACK pushes.
-
-   We restore into the parameters, all of which should be lvalues:
-     STR -- the saved data position.
-     PAT -- the saved pattern position.
-     LOW_REG, HIGH_REG -- the highest and lowest active registers.
-     REGSTART, REGEND -- arrays of string positions.
-     REG_INFO -- array of information about each subexpression.
-
-   Also assumes the variables `fail_stack' and (if debugging), `bufp',
-   `pend', `string1', `size1', `string2', and `size2'.  */
-
-#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
-{									\
-  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
-  int this_reg;								\
-  const unsigned char *string_temp;					\
-									\
-  assert (!FAIL_STACK_EMPTY ());					\
-									\
-  /* Remove failure points and point to how many regs pushed.  */	\
-  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
-  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
-  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
-									\
-  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
-									\
-  DEBUG_POP (&failure_id);						\
-  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
-									\
-  /* If the saved string location is NULL, it came from an		\
-     on_failure_keep_string_jump opcode, and we want to throw away the	\
-     saved NULL, thus retaining our current position in the string.  */	\
-  string_temp = POP_FAILURE_ITEM ();					\
-  if (string_temp != NULL)						\
-    str = (const char *) string_temp;					\
-									\
-  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
-  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
-  DEBUG_PRINT1 ("'\n");							\
-									\
-  pat = (unsigned char *) POP_FAILURE_ITEM ();				\
-  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
-  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
-									\
-  /* Restore register info.  */						\
-  high_reg = (unsigned) POP_FAILURE_ITEM ();				\
-  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
-									\
-  low_reg = (unsigned) POP_FAILURE_ITEM ();				\
-  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
-									\
-  for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
-    {									\
-      DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
-									\
-      reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
-      DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
-									\
-      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
-      DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
-									\
-      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
-      DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
-    }									\
-									\
-  DEBUG_STATEMENT (nfailure_points_popped++);				\
-} /* POP_FAILURE_POINT */
-
-/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
-   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
-   characters can start a string that matches the pattern.  This fastmap
-   is used by re_search to skip quickly over impossible starting points.
-
-   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
-   area as BUFP->fastmap.
-
-   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
-   the pattern buffer.
-
-   Returns 0 if we succeed, -2 if an internal error.   */
-
-int
-re_compile_fastmap (bufp)
-     struct re_pattern_buffer *bufp;
-{
-  int j, k;
-  fail_stack_type fail_stack;
-#ifndef REGEX_MALLOC
-  char *destination;
-#endif
-  /* We don't push any register information onto the failure stack.  */
-  unsigned num_regs = 0;
-
-  register char *fastmap = bufp->fastmap;
-  unsigned char *pattern = bufp->buffer;
-  unsigned long size = bufp->used;
-  const unsigned char *p = pattern;
-  register unsigned char *pend = pattern + size;
-
-  /* Assume that each path through the pattern can be null until
-     proven otherwise.  We set this false at the bottom of switch
-     statement, to which we get only if a particular path doesn't
-     match the empty string.  */
-  boolean path_can_be_null = true;
-
-  /* We aren't doing a `succeed_n' to begin with.  */
-  boolean succeed_n_p = false;
-
-  assert (fastmap != NULL && p != NULL);
-
-  INIT_FAIL_STACK ();
-  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
-  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
-  bufp->can_be_null = 0;
-
-  while (p != pend || !FAIL_STACK_EMPTY ())
-    {
-      if (p == pend)
-	{
-	  bufp->can_be_null |= path_can_be_null;
-
-	  /* Reset for next path.  */
-	  path_can_be_null = true;
-
-	  p = fail_stack.stack[--fail_stack.avail];
-	}
-
-      /* We should never be about to go beyond the end of the pattern.  */
-      assert (p < pend);
-
-#ifdef SWITCH_ENUM_BUG
-      switch ((int) ((re_opcode_t) *p++))
-#else
-      switch ((re_opcode_t) *p++)
-#endif
-	{
-
-	/* I guess the idea here is to simply not bother with a fastmap
-	   if a backreference is used, since it's too hard to figure out
-	   the fastmap for the corresponding group.  Setting
-	   `can_be_null' stops `re_search_2' from using the fastmap, so
-	   that is all we do.  */
-	case duplicate:
-	  bufp->can_be_null = 1;
-	  return 0;
-
-
-      /* Following are the cases which match a character.  These end
-	 with `break'.  */
-
-	case exactn:
-	  fastmap[p[1]] = 1;
-	  break;
-
-
-	case charset:
-	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
-	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
-	      fastmap[j] = 1;
-	  break;
-
-
-	case charset_not:
-	  /* Chars beyond end of map must be allowed.  */
-	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
-	    fastmap[j] = 1;
-
-	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
-	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
-	      fastmap[j] = 1;
-	  break;
-
-
-	case wordchar:
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
-	    if (SYNTAX (j) == Sword)
-	      fastmap[j] = 1;
-	  break;
-
-
-	case notwordchar:
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
-	    if (SYNTAX (j) != Sword)
-	      fastmap[j] = 1;
-	  break;
-
-
-	case anychar:
-	  /* `.' matches anything ...  */
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
-	    fastmap[j] = 1;
-
-	  /* ... except perhaps newline.  */
-	  if (!(bufp->syntax & RE_DOT_NEWLINE))
-	    fastmap['\n'] = 0;
-
-	  /* Return if we have already set `can_be_null'; if we have,
-	     then the fastmap is irrelevant.  Something's wrong here.  */
-	  else if (bufp->can_be_null)
-	    return 0;
-
-	  /* Otherwise, have to check alternative paths.  */
-	  break;
-
-
-#ifdef emacs
-	case syntaxspec:
-	  k = *p++;
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
-	    if (SYNTAX (j) == (enum syntaxcode) k)
-	      fastmap[j] = 1;
-	  break;
-
-
-	case notsyntaxspec:
-	  k = *p++;
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
-	    if (SYNTAX (j) != (enum syntaxcode) k)
-	      fastmap[j] = 1;
-	  break;
-
-
-      /* All cases after this match the empty string.  These end with
-	 `continue'.  */
-
-
-	case before_dot:
-	case at_dot:
-	case after_dot:
-	  continue;
-#endif /* not emacs */
-
-
-	case no_op:
-	case begline:
-	case endline:
-	case begbuf:
-	case endbuf:
-	case wordbound:
-	case notwordbound:
-	case wordbeg:
-	case wordend:
-	case push_dummy_failure:
-	  continue;
-
-
-	case jump_n:
-	case pop_failure_jump:
-	case maybe_pop_jump:
-	case jump:
-	case jump_past_alt:
-	case dummy_failure_jump:
-	  EXTRACT_NUMBER_AND_INCR (j, p);
-	  p += j;
-	  if (j > 0)
-	    continue;
-
-	  /* Jump backward implies we just went through the body of a
-	     loop and matched nothing.  Opcode jumped to should be
-	     `on_failure_jump' or `succeed_n'.  Just treat it like an
-	     ordinary jump.  For a * loop, it has pushed its failure
-	     point already; if so, discard that as redundant.  */
-	  if ((re_opcode_t) *p != on_failure_jump
-	      && (re_opcode_t) *p != succeed_n)
-	    continue;
-
-	  p++;
-	  EXTRACT_NUMBER_AND_INCR (j, p);
-	  p += j;
-
-	  /* If what's on the stack is where we are now, pop it.  */
-	  if (!FAIL_STACK_EMPTY ()
-	      && fail_stack.stack[fail_stack.avail - 1] == p)
-	    fail_stack.avail--;
-
-	  continue;
-
-
-	case on_failure_jump:
-	case on_failure_keep_string_jump:
-	handle_on_failure_jump:
-	  EXTRACT_NUMBER_AND_INCR (j, p);
-
-	  /* For some patterns, e.g., `(a?)?', `p+j' here points to the
-	     end of the pattern.  We don't want to push such a point,
-	     since when we restore it above, entering the switch will
-	     increment `p' past the end of the pattern.  We don't need
-	     to push such a point since we obviously won't find any more
-	     fastmap entries beyond `pend'.  Such a pattern can match
-	     the null string, though.  */
-	  if (p + j < pend)
-	    {
-	      if (!PUSH_PATTERN_OP (p + j, fail_stack))
-		return -2;
-	    }
-	  else
-	    bufp->can_be_null = 1;
-
-	  if (succeed_n_p)
-	    {
-	      EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
-	      succeed_n_p = false;
-	    }
-
-	  continue;
-
-
-	case succeed_n:
-	  /* Get to the number of times to succeed.  */
-	  p += 2;
-
-	  /* Increment p past the n for when k != 0.  */
-	  EXTRACT_NUMBER_AND_INCR (k, p);
-	  if (k == 0)
-	    {
-	      p -= 4;
-	      succeed_n_p = true;  /* Spaghetti code alert.  */
-	      goto handle_on_failure_jump;
-	    }
-	  continue;
-
-
-	case set_number_at:
-	  p += 4;
-	  continue;
-
-
-	case start_memory:
-	case stop_memory:
-	  p += 2;
-	  continue;
-
-
-	default:
-	  abort (); /* We have listed all the cases.  */
-	} /* switch *p++ */
-
-      /* Getting here means we have found the possible starting
-	 characters for one path of the pattern -- and that the empty
-	 string does not match.  We need not follow this path further.
-	 Instead, look at the next alternative (remembered on the
-	 stack), or quit if no more.  The test at the top of the loop
-	 does these things.  */
-      path_can_be_null = false;
-      p = pend;
-    } /* while p */
-
-  /* Set `can_be_null' for the last path (also the first path, if the
-     pattern is empty).  */
-  bufp->can_be_null |= path_can_be_null;
-  return 0;
-} /* re_compile_fastmap */
-
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
-   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
-   this memory for recording register information.  STARTS and ENDS
-   must be allocated using the malloc library routine, and must each
-   be at least NUM_REGS * sizeof (regoff_t) bytes long.
-
-   If NUM_REGS == 0, then subsequent matches should allocate their own
-   register data.
-
-   Unless this function is called, the first search or match using
-   PATTERN_BUFFER will allocate its own register data, without
-   freeing the old data.  */
-
-void
-re_set_registers (bufp, regs, num_regs, starts, ends)
-    struct re_pattern_buffer *bufp;
-    struct re_registers *regs;
-    unsigned num_regs;
-    regoff_t *starts, *ends;
-{
-  if (num_regs)
-    {
-      bufp->regs_allocated = REGS_REALLOCATE;
-      regs->num_regs = num_regs;
-      regs->start = starts;
-      regs->end = ends;
-    }
-  else
-    {
-      bufp->regs_allocated = REGS_UNALLOCATED;
-      regs->num_regs = 0;
-      regs->start = regs->end = (regoff_t *) 0;
-    }
-}
-
-/* Searching routines.  */
-
-/* Like re_search_2, below, but only one string is specified, and
-   doesn't let you say where to stop matching. */
-
-int
-re_search (bufp, string, size, startpos, range, regs)
-     struct re_pattern_buffer *bufp;
-     const char *string;
-     int size, startpos, range;
-     struct re_registers *regs;
-{
-  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
-		      regs, size);
-}
-
-
-/* Using the compiled pattern in BUFP->buffer, first tries to match the
-   virtual concatenation of STRING1 and STRING2, starting first at index
-   STARTPOS, then at STARTPOS + 1, and so on.
-
-   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
-
-   RANGE is how far to scan while trying to match.  RANGE = 0 means try
-   only at STARTPOS; in general, the last start tried is STARTPOS +
-   RANGE.
-
-   In REGS, return the indices of the virtual concatenation of STRING1
-   and STRING2 that matched the entire BUFP->buffer and its contained
-   subexpressions.
-
-   Do not consider matching one past the index STOP in the virtual
-   concatenation of STRING1 and STRING2.
-
-   We return either the position in the strings at which the match was
-   found, -1 if no match, or -2 if error (such as failure
-   stack overflow).  */
-
-int
-re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
-     struct re_pattern_buffer *bufp;
-     const char *string1, *string2;
-     int size1, size2;
-     int startpos;
-     int range;
-     struct re_registers *regs;
-     int stop;
-{
-  int val;
-  register char *fastmap = bufp->fastmap;
-  register char *translate = bufp->translate;
-  int total_size = size1 + size2;
-  int endpos = startpos + range;
-
-  /* Check for out-of-range STARTPOS.  */
-  if (startpos < 0 || startpos > total_size)
-    return -1;
-
-  /* Fix up RANGE if it might eventually take us outside
-     the virtual concatenation of STRING1 and STRING2.  */
-  if (endpos < -1)
-    range = -1 - startpos;
-  else if (endpos > total_size)
-    range = total_size - startpos;
-
-  /* If the search isn't to be a backwards one, don't waste time in a
-     search for a pattern that must be anchored.  */
-  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
-    {
-      if (startpos > 0)
-	return -1;
-      else
-	range = 1;
-    }
-
-  /* Update the fastmap now if not correct already.  */
-  if (fastmap && !bufp->fastmap_accurate)
-    if (re_compile_fastmap (bufp) == -2)
-      return -2;
-
-  /* Loop through the string, looking for a place to start matching.  */
-  for (;;)
-    {
-      /* If a fastmap is supplied, skip quickly over characters that
-	 cannot be the start of a match.  If the pattern can match the
-	 null string, however, we don't need to skip characters; we want
-	 the first null string.  */
-      if (fastmap && startpos < total_size && !bufp->can_be_null)
-	{
-	  if (range > 0)	/* Searching forwards.  */
-	    {
-	      register const char *d;
-	      register int lim = 0;
-	      int irange = range;
-
-	      if (startpos < size1 && startpos + range >= size1)
-		lim = range - (size1 - startpos);
-
-	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
-
-	      /* Written out as an if-else to avoid testing `translate'
-		 inside the loop.  */
-	      if (translate)
-		while (range > lim
-		       && !fastmap[(unsigned char)
-				   translate[(unsigned char) *d++]])
-		  range--;
-	      else
-		while (range > lim && !fastmap[(unsigned char) *d++])
-		  range--;
-
-	      startpos += irange - range;
-	    }
-	  else				/* Searching backwards.  */
-	    {
-	      register char c = (size1 == 0 || startpos >= size1
-				 ? string2[startpos - size1]
-				 : string1[startpos]);
-
-	      if (!fastmap[(unsigned char) TRANSLATE (c)])
-		goto advance;
-	    }
-	}
-
-      /* If can't match the null string, and that's all we have left, fail.  */
-      if (range >= 0 && startpos == total_size && fastmap
-	  && !bufp->can_be_null)
-	return -1;
-
-      val = re_match_2 (bufp, string1, size1, string2, size2,
-			startpos, regs, stop);
-      if (val >= 0)
-	return startpos;
-
-      if (val == -2)
-	return -2;
-
-    advance:
-      if (!range)
-	break;
-      else if (range > 0)
-	{
-	  range--;
-	  startpos++;
-	}
-      else
-	{
-	  range++;
-	  startpos--;
-	}
-    }
-  return -1;
-} /* re_search_2 */
-
-/* Declarations and macros for re_match_2.  */
-
-static int bcmp_translate ();
-static boolean alt_match_null_string_p (),
-	       common_op_match_null_string_p (),
-	       group_match_null_string_p ();
-
-/* Structure for per-register (a.k.a. per-group) information.
-   This must not be longer than one word, because we push this value
-   onto the failure stack.  Other register information, such as the
-   starting and ending positions (which are addresses), and the list of
-   inner groups (which is a bits list) are maintained in separate
-   variables.
-
-   We are making a (strictly speaking) nonportable assumption here: that
-   the compiler will pack our bit fields into something that fits into
-   the type of `word', i.e., is something that fits into one item on the
-   failure stack.  */
-typedef union
-{
-  fail_stack_elt_t word;
-  struct
-  {
-      /* This field is one if this group can match the empty string,
-	 zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
-#define MATCH_NULL_UNSET_VALUE 3
-    unsigned match_null_string_p : 2;
-    unsigned is_active : 1;
-    unsigned matched_something : 1;
-    unsigned ever_matched_something : 1;
-  } bits;
-} register_info_type;
-
-#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
-#define IS_ACTIVE(R)  ((R).bits.is_active)
-#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
-#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
-
-
-/* Call this when have matched a real character; it sets `matched' flags
-   for the subexpressions which we are currently inside.  Also records
-   that those subexprs have matched.  */
-#define SET_REGS_MATCHED()						\
-  do									\
-    {									\
-      unsigned r;							\
-      for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
-	{								\
-	  MATCHED_SOMETHING (reg_info[r])				\
-	    = EVER_MATCHED_SOMETHING (reg_info[r])			\
-	    = 1;							\
-	}								\
-    }									\
-  while (0)
-
-
-/* This converts PTR, a pointer into one of the search strings `string1'
-   and `string2' into an offset from the beginning of that string.  */
-#define POINTER_TO_OFFSET(ptr)						\
-  (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
-
-/* Registers are set to a sentinel when they haven't yet matched.  */
-#define REG_UNSET_VALUE ((char *) -1)
-#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
-
-
-/* Macros for dealing with the split strings in re_match_2.  */
-
-#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
-
-/* Call before fetching a character with *d.  This switches over to
-   string2 if necessary.  */
-#define PREFETCH()							\
-  while (d == dend)						    	\
-    {									\
-      /* End of string2 => fail.  */					\
-      if (dend == end_match_2) 						\
-	goto fail;							\
-      /* End of string1 => advance to string2.  */ 			\
-      d = string2;						        \
-      dend = end_match_2;						\
-    }
-
-
-/* Test if at very beginning or at very end of the virtual concatenation
-   of `string1' and `string2'.  If only one string, it's `string2'.  */
-#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
-#define AT_STRINGS_END(d) ((d) == end2)
-
-
-/* Test if D points to a character which is word-constituent.  We have
-   two special cases to check for: if past the end of string1, look at
-   the first character in string2; and if before the beginning of
-   string2, look at the last character in string1.  */
-#define WORDCHAR_P(d)							\
-  (SYNTAX ((d) == end1 ? *string2					\
-	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
-   == Sword)
-
-/* Test if the character before D and the one at D differ with respect
-   to being word-constituent.  */
-#define AT_WORD_BOUNDARY(d)						\
-  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
-   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
-
-
-/* Free everything we malloc.  */
-#ifdef REGEX_MALLOC
-#define FREE_VAR(var) if (var) free (var); var = NULL
-#define FREE_VARIABLES()						\
-  do {									\
-    FREE_VAR (fail_stack.stack);					\
-    FREE_VAR (regstart);						\
-    FREE_VAR (regend);							\
-    FREE_VAR (old_regstart);						\
-    FREE_VAR (old_regend);						\
-    FREE_VAR (best_regstart);						\
-    FREE_VAR (best_regend);						\
-    FREE_VAR (reg_info);						\
-    FREE_VAR (reg_dummy);						\
-    FREE_VAR (reg_info_dummy);						\
-  } while (0)
-#else /* not REGEX_MALLOC */
-/* Some MIPS systems (at least) want this to free alloca'd storage.  */
-#define FREE_VARIABLES() alloca (0)
-#endif /* not REGEX_MALLOC */
-
-
-/* These values must meet several constraints.  They must not be valid
-   register values; since we have a limit of 255 registers (because
-   we use only one byte in the pattern for the register number), we can
-   use numbers larger than 255.  They must differ by 1, because of
-   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
-   be larger than the value for the highest register, so we do not try
-   to actually save any registers when none are active.  */
-#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
-#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
-
-/* Matching routines.  */
-
-#ifndef emacs   /* Emacs never uses this.  */
-/* re_match is like re_match_2 except it takes only a single string.  */
-
-int
-re_match (bufp, string, size, pos, regs)
-     struct re_pattern_buffer *bufp;
-     const char *string;
-     int size, pos;
-     struct re_registers *regs;
- {
-  return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
-}
-#endif /* not emacs */
-
-
-/* re_match_2 matches the compiled pattern in BUFP against the
-   (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
-   and SIZE2, respectively).  We start matching at POS, and stop
-   matching at STOP.
-
-   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
-   store offsets for the substring each group matched in REGS.  See the
-   documentation for exactly how many groups we fill.
-
-   We return -1 if no match, -2 if an internal error (such as the
-   failure stack overflowing).  Otherwise, we return the length of the
-   matched substring.  */
-
-int
-re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
-     struct re_pattern_buffer *bufp;
-     const char *string1, *string2;
-     int size1, size2;
-     int pos;
-     struct re_registers *regs;
-     int stop;
-{
-  /* General temporaries.  */
-  int mcnt;
-  unsigned char *p1;
-
-  /* Just past the end of the corresponding string.  */
-  const char *end1, *end2;
-
-  /* Pointers into string1 and string2, just past the last characters in
-     each to consider matching.  */
-  const char *end_match_1, *end_match_2;
-
-  /* Where we are in the data, and the end of the current string.  */
-  const char *d, *dend;
-
-  /* Where we are in the pattern, and the end of the pattern.  */
-  unsigned char *p = bufp->buffer;
-  register unsigned char *pend = p + bufp->used;
-
-  /* We use this to map every character in the string.  */
-  char *translate = bufp->translate;
-
-  /* Failure point stack.  Each place that can handle a failure further
-     down the line pushes a failure point on this stack.  It consists of
-     restart, regend, and reg_info for all registers corresponding to
-     the subexpressions we're currently inside, plus the number of such
-     registers, and, finally, two char *'s.  The first char * is where
-     to resume scanning the pattern; the second one is where to resume
-     scanning the strings.  If the latter is zero, the failure point is
-     a ``dummy''; if a failure happens and the failure point is a dummy,
-     it gets discarded and the next next one is tried.  */
-  fail_stack_type fail_stack;
-#ifdef DEBUG
-  static unsigned failure_id = 0;
-  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
-#endif
-
-  /* We fill all the registers internally, independent of what we
-     return, for use in backreferences.  The number here includes
-     an element for register zero.  */
-  unsigned num_regs = bufp->re_nsub + 1;
-
-  /* The currently active registers.  */
-  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
-  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
-
-  /* Information on the contents of registers. These are pointers into
-     the input strings; they record just what was matched (on this
-     attempt) by a subexpression part of the pattern, that is, the
-     regnum-th regstart pointer points to where in the pattern we began
-     matching and the regnum-th regend points to right after where we
-     stopped matching the regnum-th subexpression.  (The zeroth register
-     keeps track of what the whole pattern matches.)  */
-  const char **regstart = NULL, **regend = NULL;
-
-  /* If a group that's operated upon by a repetition operator fails to
-     match anything, then the register for its start will need to be
-     restored because it will have been set to wherever in the string we
-     are when we last see its open-group operator.  Similarly for a
-     register's end.  */
-  const char **old_regstart = NULL, **old_regend = NULL;
-
-  /* The is_active field of reg_info helps us keep track of which (possibly
-     nested) subexpressions we are currently in. The matched_something
-     field of reg_info[reg_num] helps us tell whether or not we have
-     matched any of the pattern so far this time through the reg_num-th
-     subexpression.  These two fields get reset each time through any
-     loop their register is in.  */
-  register_info_type *reg_info = NULL;
-
-  /* The following record the register info as found in the above
-     variables when we find a match better than any we've seen before.
-     This happens as we backtrack through the failure points, which in
-     turn happens only if we have not yet matched the entire string. */
-  unsigned best_regs_set = false;
-  const char **best_regstart = NULL, **best_regend = NULL;
-
-  /* Logically, this is `best_regend[0]'.  But we don't want to have to
-     allocate space for that if we're not allocating space for anything
-     else (see below).  Also, we never need info about register 0 for
-     any of the other register vectors, and it seems rather a kludge to
-     treat `best_regend' differently than the rest.  So we keep track of
-     the end of the best match so far in a separate variable.  We
-     initialize this to NULL so that when we backtrack the first time
-     and need to test it, it's not garbage.  */
-  const char *match_end = NULL;
-
-  /* Used when we pop values we don't care about.  */
-  const char **reg_dummy = NULL;
-  register_info_type *reg_info_dummy = NULL;
-
-#ifdef DEBUG
-  /* Counts the total number of registers pushed.  */
-  unsigned num_regs_pushed = 0;
-#endif
-
-  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
-
-  INIT_FAIL_STACK ();
-
-  /* Do not bother to initialize all the register variables if there are
-     no groups in the pattern, as it takes a fair amount of time.  If
-     there are groups, we include space for register 0 (the whole
-     pattern), even though we never use it, since it simplifies the
-     array indexing.  We should fix this.  */
-  if (bufp->re_nsub)
-    {
-      regstart = REGEX_TALLOC (num_regs, const char *);
-      regend = REGEX_TALLOC (num_regs, const char *);
-      old_regstart = REGEX_TALLOC (num_regs, const char *);
-      old_regend = REGEX_TALLOC (num_regs, const char *);
-      best_regstart = REGEX_TALLOC (num_regs, const char *);
-      best_regend = REGEX_TALLOC (num_regs, const char *);
-      reg_info = REGEX_TALLOC (num_regs, register_info_type);
-      reg_dummy = REGEX_TALLOC (num_regs, const char *);
-      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
-
-      if (!(regstart && regend && old_regstart && old_regend && reg_info
-	    && best_regstart && best_regend && reg_dummy && reg_info_dummy))
-	{
-	  FREE_VARIABLES ();
-	  return -2;
-	}
-    }
-#ifdef REGEX_MALLOC
-  else
-    {
-      /* We must initialize all our variables to NULL, so that
-	 `FREE_VARIABLES' doesn't try to free them.  */
-      regstart = regend = old_regstart = old_regend = best_regstart
-	= best_regend = reg_dummy = NULL;
-      reg_info = reg_info_dummy = (register_info_type *) NULL;
-    }
-#endif /* REGEX_MALLOC */
-
-  /* The starting position is bogus.  */
-  if (pos < 0 || pos > size1 + size2)
-    {
-      FREE_VARIABLES ();
-      return -1;
-    }
-
-  /* Initialize subexpression text positions to -1 to mark ones that no
-     start_memory/stop_memory has been seen for. Also initialize the
-     register information struct.  */
-  for (mcnt = 1; mcnt < num_regs; mcnt++)
-    {
-      regstart[mcnt] = regend[mcnt]
-	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
-
-      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
-      IS_ACTIVE (reg_info[mcnt]) = 0;
-      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
-      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
-    }
-
-  /* We move `string1' into `string2' if the latter's empty -- but not if
-     `string1' is null.  */
-  if (size2 == 0 && string1 != NULL)
-    {
-      string2 = string1;
-      size2 = size1;
-      string1 = 0;
-      size1 = 0;
-    }
-  end1 = string1 + size1;
-  end2 = string2 + size2;
-
-  /* Compute where to stop matching, within the two strings.  */
-  if (stop <= size1)
-    {
-      end_match_1 = string1 + stop;
-      end_match_2 = string2;
-    }
-  else
-    {
-      end_match_1 = end1;
-      end_match_2 = string2 + stop - size1;
-    }
-
-  /* `p' scans through the pattern as `d' scans through the data.
-     `dend' is the end of the input string that `d' points within.  `d'
-     is advanced into the following input string whenever necessary, but
-     this happens before fetching; therefore, at the beginning of the
-     loop, `d' can be pointing at the end of a string, but it cannot
-     equal `string2'.  */
-  if (size1 > 0 && pos <= size1)
-    {
-      d = string1 + pos;
-      dend = end_match_1;
-    }
-  else
-    {
-      d = string2 + pos - size1;
-      dend = end_match_2;
-    }
-
-  DEBUG_PRINT1 ("The compiled pattern is: ");
-  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
-  DEBUG_PRINT1 ("The string to match is: `");
-  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
-  DEBUG_PRINT1 ("'\n");
-
-  /* This loops over pattern commands.  It exits by returning from the
-     function if the match is complete, or it drops through if the match
-     fails at this starting point in the input data.  */
-  for (;;)
-    {
-      DEBUG_PRINT2 ("\n0x%x: ", p);
-
-      if (p == pend)
-	{ /* End of pattern means we might have succeeded.  */
-	  DEBUG_PRINT1 ("end of pattern ... ");
-
-	  /* If we haven't matched the entire string, and we want the
-	     longest match, try backtracking.  */
-	  if (d != end_match_2)
-	    {
-	      DEBUG_PRINT1 ("backtracking.\n");
-
-	      if (!FAIL_STACK_EMPTY ())
-		{ /* More failure points to try.  */
-		  boolean same_str_p = (FIRST_STRING_P (match_end)
-					== MATCHING_IN_FIRST_STRING);
-
-		  /* If exceeds best match so far, save it.  */
-		  if (!best_regs_set
-		      || (same_str_p && d > match_end)
-		      || (!same_str_p && !MATCHING_IN_FIRST_STRING))
-		    {
-		      best_regs_set = true;
-		      match_end = d;
-
-		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
-
-		      for (mcnt = 1; mcnt < num_regs; mcnt++)
-			{
-			  best_regstart[mcnt] = regstart[mcnt];
-			  best_regend[mcnt] = regend[mcnt];
-			}
-		    }
-		  goto fail;
-		}
-
-	      /* If no failure points, don't restore garbage.  */
-	      else if (best_regs_set)
-		{
-		restore_best_regs:
-		  /* Restore best match.  It may happen that `dend ==
-		     end_match_1' while the restored d is in string2.
-		     For example, the pattern `x.*y.*z' against the
-		     strings `x-' and `y-z-', if the two strings are
-		     not consecutive in memory.  */
-		  DEBUG_PRINT1 ("Restoring best registers.\n");
-
-		  d = match_end;
-		  dend = ((d >= string1 && d <= end1)
-			   ? end_match_1 : end_match_2);
-
-		  for (mcnt = 1; mcnt < num_regs; mcnt++)
-		    {
-		      regstart[mcnt] = best_regstart[mcnt];
-		      regend[mcnt] = best_regend[mcnt];
-		    }
-		}
-	    } /* d != end_match_2 */
-
-	  DEBUG_PRINT1 ("Accepting match.\n");
-
-	  /* If caller wants register contents data back, do it.  */
-	  if (regs && !bufp->no_sub)
-	    {
-	      /* Have the register data arrays been allocated?  */
-	      if (bufp->regs_allocated == REGS_UNALLOCATED)
-		{ /* No.  So allocate them with malloc.  We need one
-		     extra element beyond `num_regs' for the `-1' marker
-		     GNU code uses.  */
-		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
-		  regs->start = TALLOC (regs->num_regs, regoff_t);
-		  regs->end = TALLOC (regs->num_regs, regoff_t);
-		  if (regs->start == NULL || regs->end == NULL)
-		    return -2;
-		  bufp->regs_allocated = REGS_REALLOCATE;
-		}
-	      else if (bufp->regs_allocated == REGS_REALLOCATE)
-		{ /* Yes.  If we need more elements than were already
-		     allocated, reallocate them.  If we need fewer, just
-		     leave it alone.  */
-		  if (regs->num_regs < num_regs + 1)
-		    {
-		      regs->num_regs = num_regs + 1;
-		      RETALLOC (regs->start, regs->num_regs, regoff_t);
-		      RETALLOC (regs->end, regs->num_regs, regoff_t);
-		      if (regs->start == NULL || regs->end == NULL)
-			return -2;
-		    }
-		}
-	      else
-		assert (bufp->regs_allocated == REGS_FIXED);
-
-	      /* Convert the pointer data in `regstart' and `regend' to
-		 indices.  Register zero has to be set differently,
-		 since we haven't kept track of any info for it.  */
-	      if (regs->num_regs > 0)
-		{
-		  regs->start[0] = pos;
-		  regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
-				  : d - string2 + size1);
-		}
-
-	      /* Go through the first `min (num_regs, regs->num_regs)'
-		 registers, since that is all we initialized.  */
-	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
-		{
-		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
-		    regs->start[mcnt] = regs->end[mcnt] = -1;
-		  else
-		    {
-		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
-		      regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
-		    }
-		}
-
-	      /* If the regs structure we return has more elements than
-		 were in the pattern, set the extra elements to -1.  If
-		 we (re)allocated the registers, this is the case,
-		 because we always allocate enough to have at least one
-		 -1 at the end.  */
-	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
-		regs->start[mcnt] = regs->end[mcnt] = -1;
-	    } /* regs && !bufp->no_sub */
-
-	  FREE_VARIABLES ();
-	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
-			nfailure_points_pushed, nfailure_points_popped,
-			nfailure_points_pushed - nfailure_points_popped);
-	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
-
-	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
-			    ? string1
-			    : string2 - size1);
-
-	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
-
-	  return mcnt;
-	}
-
-      /* Otherwise match next pattern command.  */
-#ifdef SWITCH_ENUM_BUG
-      switch ((int) ((re_opcode_t) *p++))
-#else
-      switch ((re_opcode_t) *p++)
-#endif
-	{
-	/* Ignore these.  Used to ignore the n of succeed_n's which
-	   currently have n == 0.  */
-	case no_op:
-	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
-	  break;
-
-
-	/* Match the next n pattern characters exactly.  The following
-	   byte in the pattern defines n, and the n bytes after that
-	   are the characters to match.  */
-	case exactn:
-	  mcnt = *p++;
-	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
-
-	  /* This is written out as an if-else so we don't waste time
-	     testing `translate' inside the loop.  */
-	  if (translate)
-	    {
-	      do
-		{
-		  PREFETCH ();
-		  if (translate[(unsigned char) *d++] != (char) *p++)
-		    goto fail;
-		}
-	      while (--mcnt);
-	    }
-	  else
-	    {
-	      do
-		{
-		  PREFETCH ();
-		  if (*d++ != (char) *p++) goto fail;
-		}
-	      while (--mcnt);
-	    }
-	  SET_REGS_MATCHED ();
-	  break;
-
-
-	/* Match any character except possibly a newline or a null.  */
-	case anychar:
-	  DEBUG_PRINT1 ("EXECUTING anychar.\n");
-
-	  PREFETCH ();
-
-	  if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
-	      || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
-	    goto fail;
-
-	  SET_REGS_MATCHED ();
-	  DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
-	  d++;
-	  break;
-
-
-	case charset:
-	case charset_not:
-	  {
-	    register unsigned char c;
-	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
-
-	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
-
-	    PREFETCH ();
-	    c = TRANSLATE (*d); /* The character to match.  */
-
-	    /* Cast to `unsigned' instead of `unsigned char' in case the
-	       bit list is a full 32 bytes long.  */
-	    if (c < (unsigned) (*p * BYTEWIDTH)
-		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
-	      not = !not;
-
-	    p += 1 + *p;
-
-	    if (!not) goto fail;
-
-	    SET_REGS_MATCHED ();
-	    d++;
-	    break;
-	  }
-
-
-	/* The beginning of a group is represented by start_memory.
-	   The arguments are the register number in the next byte, and the
-	   number of groups inner to this one in the next.  The text
-	   matched within the group is recorded (in the internal
-	   registers data structure) under the register number.  */
-	case start_memory:
-	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
-
-	  /* Find out if this group can match the empty string.  */
-	  p1 = p;		/* To send to group_match_null_string_p.  */
-
-	  if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
-	    REG_MATCH_NULL_STRING_P (reg_info[*p])
-	      = group_match_null_string_p (&p1, pend, reg_info);
-
-	  /* Save the position in the string where we were the last time
-	     we were at this open-group operator in case the group is
-	     operated upon by a repetition operator, e.g., with `(a*)*b'
-	     against `ab'; then we want to ignore where we are now in
-	     the string in case this attempt to match fails.  */
-	  old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
-			     ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
-			     : regstart[*p];
-	  DEBUG_PRINT2 ("  old_regstart: %d\n",
-			 POINTER_TO_OFFSET (old_regstart[*p]));
-
-	  regstart[*p] = d;
-	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
-
-	  IS_ACTIVE (reg_info[*p]) = 1;
-	  MATCHED_SOMETHING (reg_info[*p]) = 0;
-
-	  /* This is the new highest active register.  */
-	  highest_active_reg = *p;
-
-	  /* If nothing was active before, this is the new lowest active
-	     register.  */
-	  if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
-	    lowest_active_reg = *p;
-
-	  /* Move past the register number and inner group count.  */
-	  p += 2;
-	  break;
-
-
-	/* The stop_memory opcode represents the end of a group.  Its
-	   arguments are the same as start_memory's: the register
-	   number, and the number of inner groups.  */
-	case stop_memory:
-	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
-
-	  /* We need to save the string position the last time we were at
-	     this close-group operator in case the group is operated
-	     upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
-	     against `aba'; then we want to ignore where we are now in
-	     the string in case this attempt to match fails.  */
-	  old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
-			   ? REG_UNSET (regend[*p]) ? d : regend[*p]
-			   : regend[*p];
-	  DEBUG_PRINT2 ("      old_regend: %d\n",
-			 POINTER_TO_OFFSET (old_regend[*p]));
-
-	  regend[*p] = d;
-	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
-
-	  /* This register isn't active anymore.  */
-	  IS_ACTIVE (reg_info[*p]) = 0;
-
-	  /* If this was the only register active, nothing is active
-	     anymore.  */
-	  if (lowest_active_reg == highest_active_reg)
-	    {
-	      lowest_active_reg = NO_LOWEST_ACTIVE_REG;
-	      highest_active_reg = NO_HIGHEST_ACTIVE_REG;
-	    }
-	  else
-	    { /* We must scan for the new highest active register, since
-		 it isn't necessarily one less than now: consider
-		 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
-		 new highest active register is 1.  */
-	      unsigned char r = *p - 1;
-	      while (r > 0 && !IS_ACTIVE (reg_info[r]))
-		r--;
-
-	      /* If we end up at register zero, that means that we saved
-		 the registers as the result of an `on_failure_jump', not
-		 a `start_memory', and we jumped to past the innermost
-		 `stop_memory'.  For example, in ((.)*) we save
-		 registers 1 and 2 as a result of the *, but when we pop
-		 back to the second ), we are at the stop_memory 1.
-		 Thus, nothing is active.  */
-	      if (r == 0)
-		{
-		  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
-		  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
-		}
-	      else
-		highest_active_reg = r;
-	    }
-
-	  /* If just failed to match something this time around with a
-	     group that's operated on by a repetition operator, try to
-	     force exit from the ``loop'', and restore the register
-	     information for this group that we had before trying this
-	     last match.  */
-	  if ((!MATCHED_SOMETHING (reg_info[*p])
-	       || (re_opcode_t) p[-3] == start_memory)
-	      && (p + 2) < pend)
-	    {
-	      boolean is_a_jump_n = false;
-
-	      p1 = p + 2;
-	      mcnt = 0;
-	      switch ((re_opcode_t) *p1++)
-		{
-		  case jump_n:
-		    is_a_jump_n = true;
-		  case pop_failure_jump:
-		  case maybe_pop_jump:
-		  case jump:
-		  case dummy_failure_jump:
-		    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-		    if (is_a_jump_n)
-		      p1 += 2;
-		    break;
-
-		  default:
-		    /* do nothing */ ;
-		}
-	      p1 += mcnt;
-
-	      /* If the next operation is a jump backwards in the pattern
-		 to an on_failure_jump right before the start_memory
-		 corresponding to this stop_memory, exit from the loop
-		 by forcing a failure after pushing on the stack the
-		 on_failure_jump's jump in the pattern, and d.  */
-	      if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
-		  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
-		{
-		  /* If this group ever matched anything, then restore
-		     what its registers were before trying this last
-		     failed match, e.g., with `(a*)*b' against `ab' for
-		     regstart[1], and, e.g., with `((a*)*(b*)*)*'
-		     against `aba' for regend[3].
-
-		     Also restore the registers for inner groups for,
-		     e.g., `((a*)(b*))*' against `aba' (register 3 would
-		     otherwise get trashed).  */
-
-		  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
-		    {
-		      unsigned r;
-
-		      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
-
-		      /* Restore this and inner groups' (if any) registers.  */
-		      for (r = *p; r < *p + *(p + 1); r++)
-			{
-			  regstart[r] = old_regstart[r];
-
-			  /* xx why this test?  */
-			  if ((int) old_regend[r] >= (int) regstart[r])
-			    regend[r] = old_regend[r];
-			}
-		    }
-		  p1++;
-		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-		  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
-
-		  goto fail;
-		}
-	    }
-
-	  /* Move past the register number and the inner group count.  */
-	  p += 2;
-	  break;
-
-
-	/* \<digit> has been turned into a `duplicate' command which is
-	   followed by the numeric value of <digit> as the register number.  */
-	case duplicate:
-	  {
-	    register const char *d2, *dend2;
-	    int regno = *p++;   /* Get which register to match against.  */
-	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
-
-	    /* Can't back reference a group which we've never matched.  */
-	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
-	      goto fail;
-
-	    /* Where in input to try to start matching.  */
-	    d2 = regstart[regno];
-
-	    /* Where to stop matching; if both the place to start and
-	       the place to stop matching are in the same string, then
-	       set to the place to stop, otherwise, for now have to use
-	       the end of the first string.  */
-
-	    dend2 = ((FIRST_STRING_P (regstart[regno])
-		      == FIRST_STRING_P (regend[regno]))
-		     ? regend[regno] : end_match_1);
-	    for (;;)
-	      {
-		/* If necessary, advance to next segment in register
-		   contents.  */
-		while (d2 == dend2)
-		  {
-		    if (dend2 == end_match_2) break;
-		    if (dend2 == regend[regno]) break;
-
-		    /* End of string1 => advance to string2. */
-		    d2 = string2;
-		    dend2 = regend[regno];
-		  }
-		/* At end of register contents => success */
-		if (d2 == dend2) break;
-
-		/* If necessary, advance to next segment in data.  */
-		PREFETCH ();
-
-		/* How many characters left in this segment to match.  */
-		mcnt = dend - d;
-
-		/* Want how many consecutive characters we can match in
-		   one shot, so, if necessary, adjust the count.  */
-		if (mcnt > dend2 - d2)
-		  mcnt = dend2 - d2;
-
-		/* Compare that many; failure if mismatch, else move
-		   past them.  */
-		if (translate
-		    ? bcmp_translate (d, d2, mcnt, translate)
-		    : bcmp (d, d2, mcnt))
-		  goto fail;
-		d += mcnt, d2 += mcnt;
-	      }
-	  }
-	  break;
-
-
-	/* begline matches the empty string at the beginning of the string
-	   (unless `not_bol' is set in `bufp'), and, if
-	   `newline_anchor' is set, after newlines.  */
-	case begline:
-	  DEBUG_PRINT1 ("EXECUTING begline.\n");
-
-	  if (AT_STRINGS_BEG (d))
-	    {
-	      if (!bufp->not_bol) break;
-	    }
-	  else if (d[-1] == '\n' && bufp->newline_anchor)
-	    {
-	      break;
-	    }
-	  /* In all other cases, we fail.  */
-	  goto fail;
-
-
-	/* endline is the dual of begline.  */
-	case endline:
-	  DEBUG_PRINT1 ("EXECUTING endline.\n");
-
-	  if (AT_STRINGS_END (d))
-	    {
-	      if (!bufp->not_eol) break;
-	    }
-
-	  /* We have to ``prefetch'' the next character.  */
-	  else if ((d == end1 ? *string2 : *d) == '\n'
-		   && bufp->newline_anchor)
-	    {
-	      break;
-	    }
-	  goto fail;
-
-
-	/* Match at the very beginning of the data.  */
-	case begbuf:
-	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
-	  if (AT_STRINGS_BEG (d))
-	    break;
-	  goto fail;
-
-
-	/* Match at the very end of the data.  */
-	case endbuf:
-	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
-	  if (AT_STRINGS_END (d))
-	    break;
-	  goto fail;
-
-
-	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
-	   pushes NULL as the value for the string on the stack.  Then
-	   `pop_failure_point' will keep the current value for the
-	   string, instead of restoring it.  To see why, consider
-	   matching `foo\nbar' against `.*\n'.  The .* matches the foo;
-	   then the . fails against the \n.  But the next thing we want
-	   to do is match the \n against the \n; if we restored the
-	   string value, we would be back at the foo.
-
-	   Because this is used only in specific cases, we don't need to
-	   check all the things that `on_failure_jump' does, to make
-	   sure the right things get saved on the stack.  Hence we don't
-	   share its code.  The only reason to push anything on the
-	   stack at all is that otherwise we would have to change
-	   `anychar's code to do something besides goto fail in this
-	   case; that seems worse than this.  */
-	case on_failure_keep_string_jump:
-	  DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
-
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	  DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
-
-	  PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
-	  break;
-
-
-	/* Uses of on_failure_jump:
-
-	   Each alternative starts with an on_failure_jump that points
-	   to the beginning of the next alternative.  Each alternative
-	   except the last ends with a jump that in effect jumps past
-	   the rest of the alternatives.  (They really jump to the
-	   ending jump of the following alternative, because tensioning
-	   these jumps is a hassle.)
-
-	   Repeats start with an on_failure_jump that points past both
-	   the repetition text and either the following jump or
-	   pop_failure_jump back to this on_failure_jump.  */
-	case on_failure_jump:
-	on_failure:
-	  DEBUG_PRINT1 ("EXECUTING on_failure_jump");
-
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	  DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
-
-	  /* If this on_failure_jump comes right before a group (i.e.,
-	     the original * applied to a group), save the information
-	     for that group and all inner ones, so that if we fail back
-	     to this point, the group's information will be correct.
-	     For example, in \(a*\)*\1, we need the preceding group,
-	     and in \(\(a*\)b*\)\2, we need the inner group.  */
-
-	  /* We can't use `p' to check ahead because we push
-	     a failure point to `p + mcnt' after we do this.  */
-	  p1 = p;
-
-	  /* We need to skip no_op's before we look for the
-	     start_memory in case this on_failure_jump is happening as
-	     the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
-	     against aba.  */
-	  while (p1 < pend && (re_opcode_t) *p1 == no_op)
-	    p1++;
-
-	  if (p1 < pend && (re_opcode_t) *p1 == start_memory)
-	    {
-	      /* We have a new highest active register now.  This will
-		 get reset at the start_memory we are about to get to,
-		 but we will have saved all the registers relevant to
-		 this repetition op, as described above.  */
-	      highest_active_reg = *(p1 + 1) + *(p1 + 2);
-	      if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
-		lowest_active_reg = *(p1 + 1);
-	    }
-
-	  DEBUG_PRINT1 (":\n");
-	  PUSH_FAILURE_POINT (p + mcnt, d, -2);
-	  break;
-
-
-	/* A smart repeat ends with `maybe_pop_jump'.
-	   We change it to either `pop_failure_jump' or `jump'.  */
-	case maybe_pop_jump:
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	  DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
-	  {
-	    register unsigned char *p2 = p;
-
-	    /* Compare the beginning of the repeat with what in the
-	       pattern follows its end. If we can establish that there
-	       is nothing that they would both match, i.e., that we
-	       would have to backtrack because of (as in, e.g., `a*a')
-	       then we can change to pop_failure_jump, because we'll
-	       never have to backtrack.
-
-	       This is not true in the case of alternatives: in
-	       `(a|ab)*' we do need to backtrack to the `ab' alternative
-	       (e.g., if the string was `ab').  But instead of trying to
-	       detect that here, the alternative has put on a dummy
-	       failure point which is what we will end up popping.  */
-
-	    /* Skip over open/close-group commands.  */
-	    while (p2 + 2 < pend
-		   && ((re_opcode_t) *p2 == stop_memory
-		       || (re_opcode_t) *p2 == start_memory))
-	      p2 += 3;			/* Skip over args, too.  */
-
-	    /* If we're at the end of the pattern, we can change.  */
-	    if (p2 == pend)
-	      {
-		/* Consider what happens when matching ":\(.*\)"
-		   against ":/".  I don't really understand this code
-		   yet.  */
-		p[-3] = (unsigned char) pop_failure_jump;
-		DEBUG_PRINT1
-		  ("  End of pattern: change to `pop_failure_jump'.\n");
-	      }
-
-	    else if ((re_opcode_t) *p2 == exactn
-		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
-	      {
-		register unsigned char c
-		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
-		p1 = p + mcnt;
-
-		/* p1[0] ... p1[2] are the `on_failure_jump' corresponding
-		   to the `maybe_finalize_jump' of this case.  Examine what
-		   follows.  */
-		if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
-		  {
-		    p[-3] = (unsigned char) pop_failure_jump;
-		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
-				  c, p1[5]);
-		  }
-
-		else if ((re_opcode_t) p1[3] == charset
-			 || (re_opcode_t) p1[3] == charset_not)
-		  {
-		    int not = (re_opcode_t) p1[3] == charset_not;
-
-		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
-			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
-		      not = !not;
-
-		    /* `not' is equal to 1 if c would match, which means
-			that we can't change to pop_failure_jump.  */
-		    if (!not)
-		      {
-			p[-3] = (unsigned char) pop_failure_jump;
-			DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
-		      }
-		  }
-	      }
-	  }
-	  p -= 2;		/* Point at relative address again.  */
-	  if ((re_opcode_t) p[-1] != pop_failure_jump)
-	    {
-	      p[-1] = (unsigned char) jump;
-	      DEBUG_PRINT1 ("  Match => jump.\n");
-	      goto unconditional_jump;
-	    }
-	/* Note fall through.  */
-
-
-	/* The end of a simple repeat has a pop_failure_jump back to
-	   its matching on_failure_jump, where the latter will push a
-	   failure point.  The pop_failure_jump takes off failure
-	   points put on by this pop_failure_jump's matching
-	   on_failure_jump; we got through the pattern to here from the
-	   matching on_failure_jump, so didn't fail.  */
-	case pop_failure_jump:
-	  {
-	    /* We need to pass separate storage for the lowest and
-	       highest registers, even though we don't care about the
-	       actual values.  Otherwise, we will restore only one
-	       register from the stack, since lowest will == highest in
-	       `pop_failure_point'.  */
-	    unsigned dummy_low_reg, dummy_high_reg;
-	    unsigned char *pdummy;
-	    const char *sdummy;
-
-	    DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
-	    POP_FAILURE_POINT (sdummy, pdummy,
-			       dummy_low_reg, dummy_high_reg,
-			       reg_dummy, reg_dummy, reg_info_dummy);
-	  }
-	  /* Note fall through.  */
-
-
-	/* Unconditionally jump (without popping any failure points).  */
-	case jump:
-	unconditional_jump:
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
-	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
-	  p += mcnt;				/* Do the jump.  */
-	  DEBUG_PRINT2 ("(to 0x%x).\n", p);
-	  break;
-
-
-	/* We need this opcode so we can detect where alternatives end
-	   in `group_match_null_string_p' et al.  */
-	case jump_past_alt:
-	  DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
-	  goto unconditional_jump;
-
-
-	/* Normally, the on_failure_jump pushes a failure point, which
-	   then gets popped at pop_failure_jump.  We will end up at
-	   pop_failure_jump, also, and with a pattern of, say, `a+', we
-	   are skipping over the on_failure_jump, so we have to push
-	   something meaningless for pop_failure_jump to pop.  */
-	case dummy_failure_jump:
-	  DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
-	  /* It doesn't matter what we push for the string here.  What
-	     the code at `fail' tests is the value for the pattern.  */
-	  PUSH_FAILURE_POINT (0, 0, -2);
-	  goto unconditional_jump;
-
-
-	/* At the end of an alternative, we need to push a dummy failure
-	   point in case we are followed by a `pop_failure_jump', because
-	   we don't want the failure point for the alternative to be
-	   popped.  For example, matching `(a|ab)*' against `aab'
-	   requires that we match the `ab' alternative.  */
-	case push_dummy_failure:
-	  DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
-	  /* See comments just above at `dummy_failure_jump' about the
-	     two zeroes.  */
-	  PUSH_FAILURE_POINT (0, 0, -2);
-	  break;
-
-	/* Have to succeed matching what follows at least n times.
-	   After that, handle like `on_failure_jump'.  */
-	case succeed_n:
-	  EXTRACT_NUMBER (mcnt, p + 2);
-	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
-
-	  assert (mcnt >= 0);
-	  /* Originally, this is how many times we HAVE to succeed.  */
-	  if (mcnt > 0)
-	    {
-	       mcnt--;
-	       p += 2;
-	       STORE_NUMBER_AND_INCR (p, mcnt);
-	       DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
-	    }
-	  else if (mcnt == 0)
-	    {
-	      DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
-	      p[2] = (unsigned char) no_op;
-	      p[3] = (unsigned char) no_op;
-	      goto on_failure;
-	    }
-	  break;
-
-	case jump_n:
-	  EXTRACT_NUMBER (mcnt, p + 2);
-	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
-
-	  /* Originally, this is how many times we CAN jump.  */
-	  if (mcnt)
-	    {
-	       mcnt--;
-	       STORE_NUMBER (p + 2, mcnt);
-	       goto unconditional_jump;
-	    }
-	  /* If don't have to jump any more, skip over the rest of command.  */
-	  else
-	    p += 4;
-	  break;
-
-	case set_number_at:
-	  {
-	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
-
-	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	    p1 = p + mcnt;
-	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	    DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
-	    STORE_NUMBER (p1, mcnt);
-	    break;
-	  }
-
-	case wordbound:
-	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
-	  if (AT_WORD_BOUNDARY (d))
-	    break;
-	  goto fail;
-
-	case notwordbound:
-	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
-	  if (AT_WORD_BOUNDARY (d))
-	    goto fail;
-	  break;
-
-	case wordbeg:
-	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
-	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
-	    break;
-	  goto fail;
-
-	case wordend:
-	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
-	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
-	      && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
-	    break;
-	  goto fail;
-
-#ifdef emacs
-#ifdef emacs19
-	case before_dot:
-	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
-	    goto fail;
-	  break;
-
-	case at_dot:
-	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
-	    goto fail;
-	  break;
-
-	case after_dot:
-	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) <= point)
-	    goto fail;
-	  break;
-#else /* not emacs19 */
-	case at_dot:
-	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
-	    goto fail;
-	  break;
-#endif /* not emacs19 */
-
-	case syntaxspec:
-	  DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
-	  mcnt = *p++;
-	  goto matchsyntax;
-
-	case wordchar:
-	  DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
-	  mcnt = (int) Sword;
-	matchsyntax:
-	  PREFETCH ();
-	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
-	    goto fail;
-	  SET_REGS_MATCHED ();
-	  break;
-
-	case notsyntaxspec:
-	  DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
-	  mcnt = *p++;
-	  goto matchnotsyntax;
-
-	case notwordchar:
-	  DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
-	  mcnt = (int) Sword;
-	matchnotsyntax:
-	  PREFETCH ();
-	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
-	    goto fail;
-	  SET_REGS_MATCHED ();
-	  break;
-
-#else /* not emacs */
-	case wordchar:
-	  DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
-	  PREFETCH ();
-	  if (!WORDCHAR_P (d))
-	    goto fail;
-	  SET_REGS_MATCHED ();
-	  d++;
-	  break;
-
-	case notwordchar:
-	  DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
-	  PREFETCH ();
-	  if (WORDCHAR_P (d))
-	    goto fail;
-	  SET_REGS_MATCHED ();
-	  d++;
-	  break;
-#endif /* not emacs */
-
-	default:
-	  abort ();
-	}
-      continue;  /* Successfully executed one pattern command; keep going.  */
-
-
-    /* We goto here if a matching operation fails. */
-    fail:
-      if (!FAIL_STACK_EMPTY ())
-	{ /* A restart point is known.  Restore to that state.  */
-	  DEBUG_PRINT1 ("\nFAIL:\n");
-	  POP_FAILURE_POINT (d, p,
-			     lowest_active_reg, highest_active_reg,
-			     regstart, regend, reg_info);
-
-	  /* If this failure point is a dummy, try the next one.  */
-	  if (!p)
-	    goto fail;
-
-	  /* If we failed to the end of the pattern, don't examine *p.  */
-	  assert (p <= pend);
-	  if (p < pend)
-	    {
-	      boolean is_a_jump_n = false;
-
-	      /* If failed to a backwards jump that's part of a repetition
-		 loop, need to pop this failure point and use the next one.  */
-	      switch ((re_opcode_t) *p)
-		{
-		case jump_n:
-		  is_a_jump_n = true;
-		case maybe_pop_jump:
-		case pop_failure_jump:
-		case jump:
-		  p1 = p + 1;
-		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-		  p1 += mcnt;
-
-		  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
-		      || (!is_a_jump_n
-			  && (re_opcode_t) *p1 == on_failure_jump))
-		    goto fail;
-		  break;
-		default:
-		  /* do nothing */ ;
-		}
-	    }
-
-	  if (d >= string1 && d <= end1)
-	    dend = end_match_1;
-	}
-      else
-	break;   /* Matching at this starting point really fails.  */
-    } /* for (;;) */
-
-  if (best_regs_set)
-    goto restore_best_regs;
-
-  FREE_VARIABLES ();
-
-  return -1;         			/* Failure to match.  */
-} /* re_match_2 */
-
-/* Subroutine definitions for re_match_2.  */
-
-
-/* We are passed P pointing to a register number after a start_memory.
-
-   Return true if the pattern up to the corresponding stop_memory can
-   match the empty string, and false otherwise.
-
-   If we find the matching stop_memory, sets P to point to one past its number.
-   Otherwise, sets P to an undefined byte less than or equal to END.
-
-   We don't handle duplicates properly (yet).  */
-
-static boolean
-group_match_null_string_p (p, end, reg_info)
-    unsigned char **p, *end;
-    register_info_type *reg_info;
-{
-  int mcnt;
-  /* Point to after the args to the start_memory.  */
-  unsigned char *p1 = *p + 2;
-
-  while (p1 < end)
-    {
-      /* Skip over opcodes that can match nothing, and return true or
-	 false, as appropriate, when we get to one that can't, or to the
-	 matching stop_memory.  */
-
-      switch ((re_opcode_t) *p1)
-	{
-	/* Could be either a loop or a series of alternatives.  */
-	case on_failure_jump:
-	  p1++;
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-
-	  /* If the next operation is not a jump backwards in the
-	     pattern.  */
-
-	  if (mcnt >= 0)
-	    {
-	      /* Go through the on_failure_jumps of the alternatives,
-		 seeing if any of the alternatives cannot match nothing.
-		 The last alternative starts with only a jump,
-		 whereas the rest start with on_failure_jump and end
-		 with a jump, e.g., here is the pattern for `a|b|c':
-
-		 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
-		 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
-		 /exactn/1/c
-
-		 So, we have to first go through the first (n-1)
-		 alternatives and then deal with the last one separately.  */
-
-
-	      /* Deal with the first (n-1) alternatives, which start
-		 with an on_failure_jump (see above) that jumps to right
-		 past a jump_past_alt.  */
-
-	      while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
-		{
-		  /* `mcnt' holds how many bytes long the alternative
-		     is, including the ending `jump_past_alt' and
-		     its number.  */
-
-		  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
-						      reg_info))
-		    return false;
-
-		  /* Move to right after this alternative, including the
-		     jump_past_alt.  */
-		  p1 += mcnt;
-
-		  /* Break if it's the beginning of an n-th alternative
-		     that doesn't begin with an on_failure_jump.  */
-		  if ((re_opcode_t) *p1 != on_failure_jump)
-		    break;
-
-		  /* Still have to check that it's not an n-th
-		     alternative that starts with an on_failure_jump.  */
-		  p1++;
-		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-		  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
-		    {
-		      /* Get to the beginning of the n-th alternative.  */
-		      p1 -= 3;
-		      break;
-		    }
-		}
-
-	      /* Deal with the last alternative: go back and get number
-		 of the `jump_past_alt' just before it.  `mcnt' contains
-		 the length of the alternative.  */
-	      EXTRACT_NUMBER (mcnt, p1 - 2);
-
-	      if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
-		return false;
-
-	      p1 += mcnt;	/* Get past the n-th alternative.  */
-	    } /* if mcnt > 0 */
-	  break;
-
-
-	case stop_memory:
-	  assert (p1[1] == **p);
-	  *p = p1 + 2;
-	  return true;
-
-
-	default:
-	  if (!common_op_match_null_string_p (&p1, end, reg_info))
-	    return false;
-	}
-    } /* while p1 < end */
-
-  return false;
-} /* group_match_null_string_p */
-
-
-/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
-   It expects P to be the first byte of a single alternative and END one
-   byte past the last. The alternative can contain groups.  */
-
-static boolean
-alt_match_null_string_p (p, end, reg_info)
-    unsigned char *p, *end;
-    register_info_type *reg_info;
-{
-  int mcnt;
-  unsigned char *p1 = p;
-
-  while (p1 < end)
-    {
-      /* Skip over opcodes that can match nothing, and break when we get
-	 to one that can't.  */
-
-      switch ((re_opcode_t) *p1)
-	{
-	/* It's a loop.  */
-	case on_failure_jump:
-	  p1++;
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-	  p1 += mcnt;
-	  break;
-
-	default:
-	  if (!common_op_match_null_string_p (&p1, end, reg_info))
-	    return false;
-	}
-    }  /* while p1 < end */
-
-  return true;
-} /* alt_match_null_string_p */
-
-
-/* Deals with the ops common to group_match_null_string_p and
-   alt_match_null_string_p.
-
-   Sets P to one after the op and its arguments, if any.  */
-
-static boolean
-common_op_match_null_string_p (p, end, reg_info)
-    unsigned char **p, *end;
-    register_info_type *reg_info;
-{
-  int mcnt;
-  boolean ret;
-  int reg_no;
-  unsigned char *p1 = *p;
-
-  switch ((re_opcode_t) *p1++)
-    {
-    case no_op:
-    case begline:
-    case endline:
-    case begbuf:
-    case endbuf:
-    case wordbeg:
-    case wordend:
-    case wordbound:
-    case notwordbound:
-#ifdef emacs
-    case before_dot:
-    case at_dot:
-    case after_dot:
-#endif
-      break;
-
-    case start_memory:
-      reg_no = *p1;
-      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
-      ret = group_match_null_string_p (&p1, end, reg_info);
-
-      /* Have to set this here in case we're checking a group which
-	 contains a group and a back reference to it.  */
-
-      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
-	REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
-
-      if (!ret)
-	return false;
-      break;
-
-    /* If this is an optimized succeed_n for zero times, make the jump.  */
-    case jump:
-      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-      if (mcnt >= 0)
-	p1 += mcnt;
-      else
-	return false;
-      break;
-
-    case succeed_n:
-      /* Get to the number of times to succeed.  */
-      p1 += 2;
-      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-
-      if (mcnt == 0)
-	{
-	  p1 -= 4;
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-	  p1 += mcnt;
-	}
-      else
-	return false;
-      break;
-
-    case duplicate:
-      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
-	return false;
-      break;
-
-    case set_number_at:
-      p1 += 4;
-
-    default:
-      /* All other opcodes mean we cannot match the empty string.  */
-      return false;
-  }
-
-  *p = p1;
-  return true;
-} /* common_op_match_null_string_p */
-
-
-/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
-   bytes; nonzero otherwise.  */
-
-static int
-bcmp_translate(
-     unsigned char *s1,
-     unsigned char *s2,
-     int len,
-     char *translate
-)
-{
-  register unsigned char *p1 = s1, *p2 = s2;
-  while (len)
-    {
-      if (translate[*p1++] != translate[*p2++]) return 1;
-      len--;
-    }
-  return 0;
-}
-
-/* Entry points for GNU code.  */
-
-/* re_compile_pattern is the GNU regular expression compiler: it
-   compiles PATTERN (of length SIZE) and puts the result in BUFP.
-   Returns 0 if the pattern was valid, otherwise an error string.
-
-   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
-   are set in BUFP on entry.
-
-   We call regex_compile to do the actual compilation.  */
-
-const char *
-re_compile_pattern (pattern, length, bufp)
-     const char *pattern;
-     int length;
-     struct re_pattern_buffer *bufp;
-{
-  reg_errcode_t ret;
-
-  /* GNU code is written to assume at least RE_NREGS registers will be set
-     (and at least one extra will be -1).  */
-  bufp->regs_allocated = REGS_UNALLOCATED;
-
-  /* And GNU code determines whether or not to get register information
-     by passing null for the REGS argument to re_match, etc., not by
-     setting no_sub.  */
-  bufp->no_sub = 0;
-
-  /* Match anchors at newline.  */
-  bufp->newline_anchor = 1;
-
-  ret = regex_compile (pattern, length, re_syntax_options, bufp);
-
-  return re_error_msg[(int) ret];
-}
-
-/* Entry points compatible with 4.2 BSD regex library.  We don't define
-   them if this is an Emacs or POSIX compilation.  */
-
-#if !defined (emacs) && !defined (_POSIX_SOURCE)
-
-/* BSD has one and only one pattern buffer.  */
-static struct re_pattern_buffer re_comp_buf;
-
-char *
-re_comp (s)
-    const char *s;
-{
-  reg_errcode_t ret;
-
-  if (!s)
-    {
-      if (!re_comp_buf.buffer)
-	return "No previous regular expression";
-      return 0;
-    }
-
-  if (!re_comp_buf.buffer)
-    {
-      re_comp_buf.buffer = (unsigned char *) malloc (200);
-      if (re_comp_buf.buffer == NULL)
-	return "Memory exhausted";
-      re_comp_buf.allocated = 200;
-
-      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
-      if (re_comp_buf.fastmap == NULL)
-	return "Memory exhausted";
-    }
-
-  /* Since `re_exec' always passes NULL for the `regs' argument, we
-     don't need to initialize the pattern buffer fields which affect it.  */
-
-  /* Match anchors at newlines.  */
-  re_comp_buf.newline_anchor = 1;
-
-  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
-
-  /* Yes, we're discarding `const' here.  */
-  return (char *) re_error_msg[(int) ret];
-}
-
-
-int
-re_exec (s)
-    const char *s;
-{
-  const int len = strlen (s);
-  return
-    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
-}
-#endif /* not emacs and not _POSIX_SOURCE */
-
-/* POSIX.2 functions.  Don't define these for Emacs.  */
-
-#ifndef emacs
-
-/* regcomp takes a regular expression as a string and compiles it.
-
-   PREG is a regex_t *.  We do not expect any fields to be initialized,
-   since POSIX says we shouldn't.  Thus, we set
-
-     `buffer' to the compiled pattern;
-     `used' to the length of the compiled pattern;
-     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
-       REG_EXTENDED bit in CFLAGS is set; otherwise, to
-       RE_SYNTAX_POSIX_BASIC;
-     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
-     `fastmap' and `fastmap_accurate' to zero;
-     `re_nsub' to the number of subexpressions in PATTERN.
-
-   PATTERN is the address of the pattern string.
-
-   CFLAGS is a series of bits which affect compilation.
-
-     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
-     use POSIX basic syntax.
-
-     If REG_NEWLINE is set, then . and [^...] don't match newline.
-     Also, regexec will try a match beginning after every newline.
-
-     If REG_ICASE is set, then we considers upper- and lowercase
-     versions of letters to be equivalent when matching.
-
-     If REG_NOSUB is set, then when PREG is passed to regexec, that
-     routine will report only success or failure, and nothing about the
-     registers.
-
-   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
-   the return codes and their meanings.)  */
-
-int
-regcomp (preg, pattern, cflags)
-    regex_t *preg;
-    const char *pattern;
-    int cflags;
-{
-  reg_errcode_t ret;
-  unsigned syntax
-    = (cflags & REG_EXTENDED) ?
-      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
-
-  /* regex_compile will allocate the space for the compiled pattern.  */
-  preg->buffer = 0;
-  preg->allocated = 0;
-
-  /* Don't bother to use a fastmap when searching.  This simplifies the
-     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
-     characters after newlines into the fastmap.  This way, we just try
-     every character.  */
-  preg->fastmap = 0;
-
-  if (cflags & REG_ICASE)
-    {
-      unsigned i;
-
-      preg->translate = (char *) malloc (CHAR_SET_SIZE);
-      if (preg->translate == NULL)
-	return (int) REG_ESPACE;
-
-      /* Map uppercase characters to corresponding lowercase ones.  */
-      for (i = 0; i < CHAR_SET_SIZE; i++)
-	preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
-    }
-  else
-    preg->translate = NULL;
-
-  /* If REG_NEWLINE is set, newlines are treated differently.  */
-  if (cflags & REG_NEWLINE)
-    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
-      syntax &= ~RE_DOT_NEWLINE;
-      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
-      /* It also changes the matching behavior.  */
-      preg->newline_anchor = 1;
-    }
-  else
-    preg->newline_anchor = 0;
-
-  preg->no_sub = !!(cflags & REG_NOSUB);
-
-  /* POSIX says a null character in the pattern terminates it, so we
-     can use strlen here in compiling the pattern.  */
-  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
-
-  /* POSIX doesn't distinguish between an unmatched open-group and an
-     unmatched close-group: both are REG_EPAREN.  */
-  if (ret == REG_ERPAREN) ret = REG_EPAREN;
-
-  return (int) ret;
-}
-
-
-/* regexec searches for a given pattern, specified by PREG, in the
-   string STRING.
-
-   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
-   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
-   least NMATCH elements, and we set them to the offsets of the
-   corresponding matched substrings.
-
-   EFLAGS specifies `execution flags' which affect matching: if
-   REG_NOTBOL is set, then ^ does not match at the beginning of the
-   string; if REG_NOTEOL is set, then $ does not match at the end.
-
-   We return 0 if we find a match and REG_NOMATCH if not.  */
-
-int
-regexec (preg, string, nmatch, pmatch, eflags)
-    const regex_t *preg;
-    const char *string;
-    size_t nmatch;
-    regmatch_t pmatch[];
-    int eflags;
-{
-  int ret;
-  struct re_registers regs;
-  regex_t private_preg;
-  int len = strlen (string);
-  boolean want_reg_info = !preg->no_sub && nmatch > 0;
-
-  private_preg = *preg;
-
-  private_preg.not_bol = !!(eflags & REG_NOTBOL);
-  private_preg.not_eol = !!(eflags & REG_NOTEOL);
-
-  /* The user has told us exactly how many registers to return
-     information about, via `nmatch'.  We have to pass that on to the
-     matching routines.  */
-  private_preg.regs_allocated = REGS_FIXED;
-
-  if (want_reg_info)
-    {
-      regs.num_regs = nmatch;
-      regs.start = TALLOC (nmatch, regoff_t);
-      regs.end = TALLOC (nmatch, regoff_t);
-      if (regs.start == NULL || regs.end == NULL)
-	return (int) REG_NOMATCH;
-    }
-
-  /* Perform the searching operation.  */
-  ret = re_search (&private_preg, string, len,
-		   /* start: */ 0, /* range: */ len,
-		   want_reg_info ? &regs : (struct re_registers *) 0);
-
-  /* Copy the register information to the POSIX structure.  */
-  if (want_reg_info)
-    {
-      if (ret >= 0)
-	{
-	  unsigned r;
-
-	  for (r = 0; r < nmatch; r++)
-	    {
-	      pmatch[r].rm_so = regs.start[r];
-	      pmatch[r].rm_eo = regs.end[r];
-	    }
-	}
-
-      /* If we needed the temporary register info, free the space now.  */
-      free (regs.start);
-      free (regs.end);
-    }
-
-  /* We want zero return to mean success, unlike `re_search'.  */
-  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
-}
-
-
-/* Returns a message corresponding to an error code, ERRCODE, returned
-   from either regcomp or regexec.   We don't use PREG here.  */
-
-size_t
-regerror(int errcode, const regex_t *preg,
-	 char *errbuf, size_t errbuf_size)
-{
-  const char *msg;
-  size_t msg_size;
-
-  if (errcode < 0
-      || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
-    /* Only error codes returned by the rest of the code should be passed
-       to this routine.  If we are given anything else, or if other regex
-       code generates an invalid error code, then the program has a bug.
-       Dump core so we can fix it.  */
-    abort ();
-
-  msg = re_error_msg[errcode];
-
-  /* POSIX doesn't require that we do anything in this case, but why
-     not be nice.  */
-  if (! msg)
-    msg = "Success";
-
-  msg_size = strlen (msg) + 1; /* Includes the null.  */
-
-  if (errbuf_size != 0)
-    {
-      if (msg_size > errbuf_size)
-	{
-	  strncpy (errbuf, msg, errbuf_size - 1);
-	  errbuf[errbuf_size - 1] = 0;
-	}
-      else
-	strcpy (errbuf, msg);
-    }
-
-  return msg_size;
-}
-
-
-/* Free dynamically allocated space used by PREG.  */
-
-void
-regfree (preg)
-    regex_t *preg;
-{
-  if (preg->buffer != NULL)
-    free (preg->buffer);
-  preg->buffer = NULL;
-
-  preg->allocated = 0;
-  preg->used = 0;
-
-  if (preg->fastmap != NULL)
-    free (preg->fastmap);
-  preg->fastmap = NULL;
-  preg->fastmap_accurate = 0;
-
-  if (preg->translate != NULL)
-    free (preg->translate);
-  preg->translate = NULL;
-}
-
-#endif /* not emacs  */
-
-/*
-Local variables:
-make-backup-files: t
-version-control: t
-trim-versions-without-asking: nil
-End:
-*/
diff --git a/compat/regex/regex.h b/compat/regex/regex.h
index 6eb64f1..61c9683 100644
--- a/compat/regex/regex.h
+++ b/compat/regex/regex.h
@@ -1,70 +1,90 @@
+#include <stdio.h>
+#include <stddef.h>
+
 /* Definitions for data structures and routines for the regular
-   expression library, version 0.12.
+   expression library.
+   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-   Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   This program is distributed in the hope that it will be useful,
+   The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301 USA.  */
 
-#ifndef __REGEXP_LIBRARY_H__
-#define __REGEXP_LIBRARY_H__
+#ifndef _REGEX_H
+#define _REGEX_H 1
 
-/* POSIX says that <sys/types.h> must be included (by the caller) before
-   <regex.h>.  */
-
-#ifdef VMS
-/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
-   should be there.  */
+#ifdef HAVE_STDDEF_H
 #include <stddef.h>
 #endif
 
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifndef _LIBC
+#define __USE_GNU	1
+#endif
+
+/* Allow the use in C++ code.  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+   wide enough to hold a value of a pointer.  For most ANSI compilers
+   ptrdiff_t and size_t should be likely OK.  Still size of these two
+   types is 2 for Microsoft C.  Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
 
 /* The following bits are used to determine the regexp syntax we
    recognize.  The set/not-set meanings are chosen so that Emacs syntax
    remains the value 0.  The bits are given in alphabetical order, and
    the definitions shifted by one from the previous bit; thus, when we
    add or remove a bit, only one other definition need change.  */
-typedef unsigned reg_syntax_t;
+typedef unsigned long int reg_syntax_t;
 
+#ifdef __USE_GNU
 /* If this bit is not set, then \ inside a bracket expression is literal.
    If set, then such a \ quotes the following character.  */
-#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
 
 /* If this bit is not set, then + and ? are operators, and \+ and \? are
      literals.
    If set, then \+ and \? are operators and + and ? are literals.  */
-#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
 
 /* If this bit is set, then character classes are supported.  They are:
      [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
    If not set, then character classes are not supported.  */
-#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
 
 /* If this bit is set, then ^ and $ are always anchors (outside bracket
      expressions, of course).
    If this bit is not set, then it depends:
-	^  is an anchor if it is at the beginning of a regular
-	   expression or after an open-group or an alternation operator;
-	$  is an anchor if it is at the end of a regular expression, or
-	   before a close-group or an alternation operator.
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.
 
    This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
    POSIX draft 11.2 says that * etc. in leading positions is undefined.
    We already implemented a previous draft which made those constructs
    invalid, though, so we haven't changed the code back.  */
-#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
 
 /* If this bit is set, then special characters are always special
      regardless of where they are in the pattern.
@@ -72,63 +92,94 @@
      some contexts; otherwise they are ordinary.  Specifically,
      * + ? and intervals are only special when not after the beginning,
      open-group, or alternation operator.  */
-#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
 
 /* If this bit is set, then *, +, ?, and { cannot be first in an re or
      immediately after an alternation or begin-group operator.  */
-#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
 
 /* If this bit is set, then . matches newline.
    If not set, then it doesn't.  */
-#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
 
 /* If this bit is set, then . doesn't match NUL.
    If not set, then it does.  */
-#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
 
 /* If this bit is set, nonmatching lists [^...] do not match newline.
    If not set, they do.  */
-#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
 
 /* If this bit is set, either \{...\} or {...} defines an
      interval, depending on RE_NO_BK_BRACES.
    If not set, \{, \}, {, and } are literals.  */
-#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
 
 /* If this bit is set, +, ? and | aren't recognized as operators.
    If not set, they are.  */
-#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+# define RE_LIMITED_OPS (RE_INTERVALS << 1)
 
 /* If this bit is set, newline is an alternation operator.
    If not set, newline is literal.  */
-#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
 
 /* If this bit is set, then `{...}' defines an interval, and \{ and \}
      are literals.
   If not set, then `\{...\}' defines an interval.  */
-#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
 
 /* If this bit is set, (...) defines a group, and \( and \) are literals.
    If not set, \(...\) defines a group, and ( and ) are literals.  */
-#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
 
 /* If this bit is set, then \<digit> matches <digit>.
    If not set, then \<digit> is a back-reference.  */
-#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
 
 /* If this bit is set, then | is an alternation operator, and \| is literal.
    If not set, then \| is an alternation operator, and | is literal.  */
-#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
 
 /* If this bit is set, then an ending range point collating higher
      than the starting range point, as in [z-a], is invalid.
    If not set, then when ending range point collates higher than the
      starting range point, the range is ignored.  */
-#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
 
 /* If this bit is set, then an unmatched ) is ordinary.
    If not set, then an unmatched ) is invalid.  */
-#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+   without further backtracking.  */
+# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+   If not set, then the GNU regex operators are recognized. */
+# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+   a string of ordinary characters.  For example, the ERE 'a{1' is
+   treated as 'a\{1'.  */
+# define RE_INVALID_INTERVAL_ORD (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+   for ^, because it is difficult to scan the regex backwards to find
+   whether ^ should be special.  */
+# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+   immediately after an alternation or begin-group operator.  */
+# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+   re_compile_pattern.  */
+#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+#endif
 
 /* This global variable defines the particular regexp syntax to use (for
    some interfaces).  When a regexp is compiled, the syntax used is
@@ -136,6 +187,7 @@
    already-compiled regexps.  */
 extern reg_syntax_t re_syntax_options;
 
+#ifdef __USE_GNU
 /* Define combinations of the above bits for the standard possibilities.
    (The [[[ comments delimit what gets put into the Texinfo file, so
    don't delete them!)  */
@@ -143,13 +195,22 @@
 #define RE_SYNTAX_EMACS 0
 
 #define RE_SYNTAX_AWK							\
-  (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL			\
-   | RE_NO_BK_PARENS            | RE_NO_BK_REFS				\
-   | RE_NO_BK_VBAR               | RE_NO_EMPTY_RANGES			\
-   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
+   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
+   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
 
-#define RE_SYNTAX_POSIX_AWK 						\
-  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+#define RE_SYNTAX_GNU_AWK						\
+  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
+   | RE_INVALID_INTERVAL_ORD)						\
+   & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS				\
+       | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
+   | RE_INTERVALS	    | RE_NO_GNU_OPS				\
+   | RE_INVALID_INTERVAL_ORD)
 
 #define RE_SYNTAX_GREP							\
   (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
@@ -163,7 +224,8 @@
    | RE_NO_BK_VBAR)
 
 #define RE_SYNTAX_POSIX_EGREP						\
-  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
+   | RE_INVALID_INTERVAL_ORD)
 
 /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
 #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
@@ -176,7 +238,7 @@
    | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
 
 #define RE_SYNTAX_POSIX_BASIC						\
-  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
 
 /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
    RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
@@ -185,13 +247,13 @@
   (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
 
 #define RE_SYNTAX_POSIX_EXTENDED					\
-  (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
-   | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
-   | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
-   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
+   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
 
-/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
-   replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+   removed and RE_NO_BK_REFS is added.  */
 #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
    | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
@@ -202,10 +264,12 @@
 /* Maximum number of duplicates an interval can allow.  Some systems
    (erroneously) define this in other header files, but we want our
    value, so remove any previous define.  */
-#ifdef RE_DUP_MAX
-#undef RE_DUP_MAX
+# ifdef RE_DUP_MAX
+#  undef RE_DUP_MAX
+# endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
+# define RE_DUP_MAX (0x7fff)
 #endif
-#define RE_DUP_MAX ((1 << 15) - 1)
 
 
 /* POSIX `cflags' bits (i.e., information for `regcomp').  */
@@ -240,18 +304,26 @@
 /* Like REG_NOTBOL, except for the end-of-line.  */
 #define REG_NOTEOL (1 << 1)
 
+/* Use PMATCH[0] to delimit the start and end of the search in the
+   buffer.  */
+#define REG_STARTEND (1 << 2)
+
 
 /* If any error codes are removed, changed, or added, update the
    `re_error_msg' table in regex.c.  */
 typedef enum
 {
+#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
+  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
+#endif
+
   REG_NOERROR = 0,	/* Success.  */
   REG_NOMATCH,		/* Didn't find a match (for regexec).  */
 
   /* POSIX regcomp return error codes.  (In the order listed in the
      standard.)  */
   REG_BADPAT,		/* Invalid pattern.  */
-  REG_ECOLLATE,		/* Not implemented.  */
+  REG_ECOLLATE,		/* Inalid collating element.  */
   REG_ECTYPE,		/* Invalid character class name.  */
   REG_EESCAPE,		/* Trailing backslash.  */
   REG_ESUBREG,		/* Invalid back reference.  */
@@ -275,85 +347,92 @@
    compiled, the `re_nsub' field is available.  All other fields are
    private to the regex routines.  */
 
+#ifndef RE_TRANSLATE_TYPE
+# define __RE_TRANSLATE_TYPE unsigned char *
+# ifdef __USE_GNU
+#  define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
+# endif
+#endif
+
+#ifdef __USE_GNU
+# define __REPB_PREFIX(name) name
+#else
+# define __REPB_PREFIX(name) __##name
+#endif
+
 struct re_pattern_buffer
 {
-/* [[[begin pattern_buffer]]] */
-	/* Space that holds the compiled pattern.  It is declared as
-	  `unsigned char *' because its elements are
-	   sometimes used as array indexes.  */
-  unsigned char *buffer;
+  /* Space that holds the compiled pattern.  It is declared as
+     `unsigned char *' because its elements are sometimes used as
+     array indexes.  */
+  unsigned char *__REPB_PREFIX(buffer);
 
-	/* Number of bytes to which `buffer' points.  */
-  unsigned long allocated;
+  /* Number of bytes to which `buffer' points.  */
+  unsigned long int __REPB_PREFIX(allocated);
 
-	/* Number of bytes actually used in `buffer'.  */
-  unsigned long used;
+  /* Number of bytes actually used in `buffer'.  */
+  unsigned long int __REPB_PREFIX(used);
 
-	/* Syntax setting with which the pattern was compiled.  */
-  reg_syntax_t syntax;
+  /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t __REPB_PREFIX(syntax);
 
-	/* Pointer to a fastmap, if any, otherwise zero.  re_search uses
-	   the fastmap, if there is one, to skip over impossible
-	   starting points for matches.  */
-  char *fastmap;
+  /* Pointer to a fastmap, if any, otherwise zero.  re_search uses the
+     fastmap, if there is one, to skip over impossible starting points
+     for matches.  */
+  char *__REPB_PREFIX(fastmap);
 
-	/* Either a translate table to apply to all characters before
-	   comparing them, or zero for no translation.  The translation
-	   is applied to a pattern when it is compiled and to a string
-	   when it is matched.  */
-  char *translate;
+  /* Either a translate table to apply to all characters before
+     comparing them, or zero for no translation.  The translation is
+     applied to a pattern when it is compiled and to a string when it
+     is matched.  */
+  __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
 
-	/* Number of subexpressions found by the compiler.  */
+  /* Number of subexpressions found by the compiler.  */
   size_t re_nsub;
 
-	/* Zero if this pattern cannot match the empty string, one else.
-	   Well, in truth it's used only in `re_search_2', to see
-	   whether or not we should use the fastmap, so we don't set
-	   this absolutely perfectly; see `re_compile_fastmap' (the
-	   `duplicate' case).  */
-  unsigned can_be_null : 1;
+  /* Zero if this pattern cannot match the empty string, one else.
+     Well, in truth it's used only in `re_search_2', to see whether or
+     not we should use the fastmap, so we don't set this absolutely
+     perfectly; see `re_compile_fastmap' (the `duplicate' case).  */
+  unsigned __REPB_PREFIX(can_be_null) : 1;
 
-	/* If REGS_UNALLOCATED, allocate space in the `regs' structure
-	     for `max (RE_NREGS, re_nsub + 1)' groups.
-	   If REGS_REALLOCATE, reallocate space if necessary.
-	   If REGS_FIXED, use what's there.  */
-#define REGS_UNALLOCATED 0
-#define REGS_REALLOCATE 1
-#define REGS_FIXED 2
-  unsigned regs_allocated : 2;
+  /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+     for `max (RE_NREGS, re_nsub + 1)' groups.
+     If REGS_REALLOCATE, reallocate space if necessary.
+     If REGS_FIXED, use what's there.  */
+#ifdef __USE_GNU
+# define REGS_UNALLOCATED 0
+# define REGS_REALLOCATE 1
+# define REGS_FIXED 2
+#endif
+  unsigned __REPB_PREFIX(regs_allocated) : 2;
 
-	/* Set to zero when `regex_compile' compiles a pattern; set to one
-	   by `re_compile_fastmap' if it updates the fastmap.  */
-  unsigned fastmap_accurate : 1;
+  /* Set to zero when `regex_compile' compiles a pattern; set to one
+     by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned __REPB_PREFIX(fastmap_accurate) : 1;
 
-	/* If set, `re_match_2' does not return information about
-	   subexpressions.  */
-  unsigned no_sub : 1;
+  /* If set, `re_match_2' does not return information about
+     subexpressions.  */
+  unsigned __REPB_PREFIX(no_sub) : 1;
 
-	/* If set, a beginning-of-line anchor doesn't match at the
-	   beginning of the string.  */
-  unsigned not_bol : 1;
+  /* If set, a beginning-of-line anchor doesn't match at the beginning
+     of the string.  */
+  unsigned __REPB_PREFIX(not_bol) : 1;
 
-	/* Similarly for an end-of-line anchor.  */
-  unsigned not_eol : 1;
+  /* Similarly for an end-of-line anchor.  */
+  unsigned __REPB_PREFIX(not_eol) : 1;
 
-	/* If true, an anchor at a newline matches.  */
-  unsigned newline_anchor : 1;
-
-/* [[[end pattern_buffer]]] */
+  /* If true, an anchor at a newline matches.  */
+  unsigned __REPB_PREFIX(newline_anchor) : 1;
 };
 
 typedef struct re_pattern_buffer regex_t;
-
-
-/* search.c (search_buffer) in Emacs needs this one opcode value.  It is
-   defined both in `regex.c' and here.  */
-#define RE_EXACTN_VALUE 1
 
 /* Type for byte offsets within the string.  POSIX mandates this.  */
 typedef int regoff_t;
 
 
+#ifdef __USE_GNU
 /* This is the structure we store register match data in.  See
    regex.texinfo for a full description of what registers match.  */
 struct re_registers
@@ -367,8 +446,9 @@
 /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
    `re_match_2' returns information about at least this many registers
    the first time a `regs' structure is passed.  */
-#ifndef RE_NREGS
-#define RE_NREGS 30
+# ifndef RE_NREGS
+#  define RE_NREGS 30
+# endif
 #endif
 
 
@@ -383,38 +463,22 @@
 
 /* Declarations for routines.  */
 
-/* To avoid duplicating every routine declaration -- once with a
-   prototype (if we are ANSI), and once without (if we aren't) -- we
-   use the following macro to declare argument types.  This
-   unfortunately clutters up the declarations a bit, but I think it's
-   worth it.  */
-
-#if __STDC__
-
-#define _RE_ARGS(args) args
-
-#else /* not __STDC__ */
-
-#define _RE_ARGS(args) ()
-
-#endif /* not __STDC__ */
-
+#ifdef __USE_GNU
 /* Sets the current default syntax to SYNTAX, and return the old syntax.
    You can also simply assign to the `re_syntax_options' variable.  */
-extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
 
 /* Compile the regular expression PATTERN, with length LENGTH
    and syntax given by the global `re_syntax_options', into the buffer
    BUFFER.  Return NULL if successful, and an error string if not.  */
-extern const char *re_compile_pattern
-  _RE_ARGS ((const char *pattern, int length,
-	     struct re_pattern_buffer *buffer));
+extern const char *re_compile_pattern (const char *__pattern, size_t __length,
+				       struct re_pattern_buffer *__buffer);
 
 
 /* Compile a fastmap for the compiled pattern in BUFFER; used to
    accelerate searches.  Return 0 if successful and -2 if was an
    internal error.  */
-extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
 
 
 /* Search in the string STRING (with length LENGTH) for the pattern
@@ -422,31 +486,30 @@
    characters.  Return the starting position of the match, -1 for no
    match, or -2 for an internal error.  Also return register
    information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
-extern int re_search
-  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
-	    int length, int start, int range, struct re_registers *regs));
+extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring,
+		      int __length, int __start, int __range,
+		      struct re_registers *__regs);
 
 
 /* Like `re_search', but search in the concatenation of STRING1 and
    STRING2.  Also, stop searching at index START + STOP.  */
-extern int re_search_2
-  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
-	     int length1, const char *string2, int length2,
-	     int start, int range, struct re_registers *regs, int stop));
+extern int re_search_2 (struct re_pattern_buffer *__buffer,
+			const char *__string1, int __length1,
+			const char *__string2, int __length2, int __start,
+			int __range, struct re_registers *__regs, int __stop);
 
 
 /* Like `re_search', but return how many characters in STRING the regexp
    in BUFFER matched, starting at position START.  */
-extern int re_match
-  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
-	     int length, int start, struct re_registers *regs));
+extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring,
+		     int __length, int __start, struct re_registers *__regs);
 
 
 /* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
-extern int re_match_2
-  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
-	     int length1, const char *string2, int length2,
-	     int start, struct re_registers *regs, int stop));
+extern int re_match_2 (struct re_pattern_buffer *__buffer,
+		       const char *__string1, int __length1,
+		       const char *__string2, int __length2, int __start,
+		       struct re_registers *__regs, int __stop);
 
 
 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
@@ -461,30 +524,59 @@
    Unless this function is called, the first search or match using
    PATTERN_BUFFER will allocate its own register data, without
    freeing the old data.  */
-extern void re_set_registers
-  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
-	     unsigned num_regs, regoff_t *starts, regoff_t *ends));
+extern void re_set_registers (struct re_pattern_buffer *__buffer,
+			      struct re_registers *__regs,
+			      unsigned int __num_regs,
+			      regoff_t *__starts, regoff_t *__ends);
+#endif	/* Use GNU */
 
+#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
+# ifndef _CRAY
 /* 4.2 bsd compatibility.  */
-extern char *re_comp _RE_ARGS ((const char *));
-extern int re_exec _RE_ARGS ((const char *));
+extern char *re_comp (const char *);
+extern int re_exec (const char *);
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+   "restrict", and "configure" may have defined "restrict".  */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+#  if defined restrict || 199901L <= __STDC_VERSION__
+#   define __restrict restrict
+#  else
+#   define __restrict
+#  endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax.  */
+#ifndef __restrict_arr
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
+     && !defined __GNUG__
+#  define __restrict_arr __restrict
+# else
+#  define __restrict_arr
+# endif
+#endif
 
 /* POSIX compatibility.  */
-extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
-extern int regexec
-  _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
-	     regmatch_t pmatch[], int eflags));
-extern size_t regerror
-  _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
-	     size_t errbuf_size));
-extern void regfree _RE_ARGS ((regex_t *preg));
+extern int regcomp (regex_t *__restrict __preg,
+		    const char *__restrict __pattern,
+		    int __cflags);
 
-#endif /* not __REGEXP_LIBRARY_H__ */
-
-/*
-Local variables:
-make-backup-files: t
-version-control: t
-trim-versions-without-asking: nil
-End:
-*/
+extern int regexec (const regex_t *__restrict __preg,
+		    const char *__restrict __cstring, size_t __nmatch,
+		    regmatch_t __pmatch[__restrict_arr],
+		    int __eflags);
+
+extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
+			char *__restrict __errbuf, size_t __errbuf_size);
+
+extern void regfree (regex_t *__preg);
+
+
+#ifdef __cplusplus
+}
+#endif	/* C++ */
+
+#endif /* regex.h */
diff --git a/compat/regex/regex_internal.c b/compat/regex/regex_internal.c
new file mode 100644
index 0000000..193854c
--- /dev/null
+++ b/compat/regex/regex_internal.c
@@ -0,0 +1,1744 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301 USA.  */
+
+static void re_string_construct_common (const char *str, int len,
+					re_string_t *pstr,
+					RE_TRANSLATE_TYPE trans, int icase,
+					const re_dfa_t *dfa) internal_function;
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
+					  const re_node_set *nodes,
+					  unsigned int hash) internal_function;
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
+					  const re_node_set *nodes,
+					  unsigned int context,
+					  unsigned int hash) internal_function;
+
+#ifdef GAWK
+#undef MAX	/* safety */
+static int
+MAX(size_t a, size_t b)
+{
+	return (a > b ? a : b);
+}
+#endif
+
+/* Functions for string operation.  */
+
+/* This function allocate the buffers.  It is necessary to call
+   re_string_reconstruct before using the object.  */
+
+static reg_errcode_t
+internal_function
+re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
+		    RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+  reg_errcode_t ret;
+  int init_buf_len;
+
+  /* Ensure at least one character fits into the buffers.  */
+  if (init_len < dfa->mb_cur_max)
+    init_len = dfa->mb_cur_max;
+  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+  re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+  ret = re_string_realloc_buffers (pstr, init_buf_len);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  pstr->word_char = dfa->word_char;
+  pstr->word_ops_used = dfa->word_ops_used;
+  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
+  pstr->valid_raw_len = pstr->valid_len;
+  return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them.  */
+
+static reg_errcode_t
+internal_function
+re_string_construct (re_string_t *pstr, const char *str, int len,
+		     RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+  reg_errcode_t ret;
+  memset (pstr, '\0', sizeof (re_string_t));
+  re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+  if (len > 0)
+    {
+      ret = re_string_realloc_buffers (pstr, len + 1);
+      if (BE (ret != REG_NOERROR, 0))
+	return ret;
+    }
+  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+
+  if (icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	{
+	  while (1)
+	    {
+	      ret = build_wcs_upper_buffer (pstr);
+	      if (BE (ret != REG_NOERROR, 0))
+		return ret;
+	      if (pstr->valid_raw_len >= len)
+		break;
+	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
+		break;
+	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+	      if (BE (ret != REG_NOERROR, 0))
+		return ret;
+	    }
+	}
+      else
+#endif /* RE_ENABLE_I18N  */
+	build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+	{
+	  if (trans != NULL)
+	    re_string_translate_buffer (pstr);
+	  else
+	    {
+	      pstr->valid_len = pstr->bufs_len;
+	      pstr->valid_raw_len = pstr->bufs_len;
+	    }
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct.  */
+
+static reg_errcode_t
+internal_function
+re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
+{
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1)
+    {
+      wint_t *new_wcs;
+
+      /* Avoid overflow in realloc.  */
+      const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
+      if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
+	return REG_ESPACE;
+
+      new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
+      if (BE (new_wcs == NULL, 0))
+	return REG_ESPACE;
+      pstr->wcs = new_wcs;
+      if (pstr->offsets != NULL)
+	{
+	  int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
+	  if (BE (new_offsets == NULL, 0))
+	    return REG_ESPACE;
+	  pstr->offsets = new_offsets;
+	}
+    }
+#endif /* RE_ENABLE_I18N  */
+  if (pstr->mbs_allocated)
+    {
+      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
+					   new_buf_len);
+      if (BE (new_mbs == NULL, 0))
+	return REG_ESPACE;
+      pstr->mbs = new_mbs;
+    }
+  pstr->bufs_len = new_buf_len;
+  return REG_NOERROR;
+}
+
+
+static void
+internal_function
+re_string_construct_common (const char *str, int len, re_string_t *pstr,
+			    RE_TRANSLATE_TYPE trans, int icase,
+			    const re_dfa_t *dfa)
+{
+  pstr->raw_mbs = (const unsigned char *) str;
+  pstr->len = len;
+  pstr->raw_len = len;
+  pstr->trans = trans;
+  pstr->icase = icase ? 1 : 0;
+  pstr->mbs_allocated = (trans != NULL || icase);
+  pstr->mb_cur_max = dfa->mb_cur_max;
+  pstr->is_utf8 = dfa->is_utf8;
+  pstr->map_notascii = dfa->map_notascii;
+  pstr->stop = pstr->len;
+  pstr->raw_stop = pstr->stop;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+   If the byte sequence of the string are:
+     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+   Then wide character buffer will be:
+     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
+   We use WEOF for padding, they indicate that the position isn't
+   a first byte of a multibyte character.
+
+   Note that this function assumes PSTR->VALID_LEN elements are already
+   built and starts from PSTR->VALID_LEN.  */
+
+static void
+internal_function
+build_wcs_buffer (re_string_t *pstr)
+{
+#ifdef _LIBC
+  unsigned char buf[MB_LEN_MAX];
+  assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+  unsigned char buf[64];
+#endif
+  mbstate_t prev_st;
+  int byte_idx, end_idx, remain_len;
+  size_t mbclen;
+
+  /* Build the buffers from pstr->valid_len to either pstr->len or
+     pstr->bufs_len.  */
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+    {
+      wchar_t wc;
+      const char *p;
+
+      remain_len = end_idx - byte_idx;
+      prev_st = pstr->cur_state;
+      /* Apply the translation if we need.  */
+      if (BE (pstr->trans != NULL, 0))
+	{
+	  int i, ch;
+
+	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+	    {
+	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
+	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
+	    }
+	  p = (const char *) buf;
+	}
+      else
+	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
+      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2, 0))
+	{
+	  /* The buffer doesn't have enough space, finish to build.  */
+	  pstr->cur_state = prev_st;
+	  break;
+	}
+      else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+	{
+	  /* We treat these cases as a singlebyte character.  */
+	  mbclen = 1;
+	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+	  if (BE (pstr->trans != NULL, 0))
+	    wc = pstr->trans[wc];
+	  pstr->cur_state = prev_st;
+	}
+
+      /* Write wide character and padding.  */
+      pstr->wcs[byte_idx++] = wc;
+      /* Write paddings.  */
+      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+	pstr->wcs[byte_idx++] = WEOF;
+    }
+  pstr->valid_len = byte_idx;
+  pstr->valid_raw_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+   but for REG_ICASE.  */
+
+static reg_errcode_t
+internal_function
+build_wcs_upper_buffer (re_string_t *pstr)
+{
+  mbstate_t prev_st;
+  int src_idx, byte_idx, end_idx, remain_len;
+  size_t mbclen;
+#ifdef _LIBC
+  char buf[MB_LEN_MAX];
+  assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+  char buf[64];
+#endif
+
+  byte_idx = pstr->valid_len;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  /* The following optimization assumes that ASCII characters can be
+     mapped to wide characters with a simple cast.  */
+  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
+    {
+      while (byte_idx < end_idx)
+	{
+	  wchar_t wc;
+
+	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
+	      && mbsinit (&pstr->cur_state))
+	    {
+	      /* In case of a singlebyte character.  */
+	      pstr->mbs[byte_idx]
+		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
+	      /* The next step uses the assumption that wchar_t is encoded
+		 ASCII-safe: all ASCII values can be converted like this.  */
+	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
+	      ++byte_idx;
+	      continue;
+	    }
+
+	  remain_len = end_idx - byte_idx;
+	  prev_st = pstr->cur_state;
+	  mbclen = __mbrtowc (&wc,
+			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+			       + byte_idx), remain_len, &pstr->cur_state);
+	  if (BE (mbclen + 2 > 2, 1))
+	    {
+	      wchar_t wcu = wc;
+	      if (iswlower (wc))
+		{
+		  size_t mbcdlen;
+
+		  wcu = towupper (wc);
+		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
+		  if (BE (mbclen == mbcdlen, 1))
+		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
+		  else
+		    {
+		      src_idx = byte_idx;
+		      goto offsets_needed;
+		    }
+		}
+	      else
+		memcpy (pstr->mbs + byte_idx,
+			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+	      pstr->wcs[byte_idx++] = wcu;
+	      /* Write paddings.  */
+	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+		pstr->wcs[byte_idx++] = WEOF;
+	    }
+	  else if (mbclen == (size_t) -1 || mbclen == 0)
+	    {
+	      /* It is an invalid character or '\0'.  Just use the byte.  */
+	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+	      pstr->mbs[byte_idx] = ch;
+	      /* And also cast it to wide char.  */
+	      pstr->wcs[byte_idx++] = (wchar_t) ch;
+	      if (BE (mbclen == (size_t) -1, 0))
+		pstr->cur_state = prev_st;
+	    }
+	  else
+	    {
+	      /* The buffer doesn't have enough space, finish to build.  */
+	      pstr->cur_state = prev_st;
+	      break;
+	    }
+	}
+      pstr->valid_len = byte_idx;
+      pstr->valid_raw_len = byte_idx;
+      return REG_NOERROR;
+    }
+  else
+    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
+      {
+	wchar_t wc;
+	const char *p;
+      offsets_needed:
+	remain_len = end_idx - byte_idx;
+	prev_st = pstr->cur_state;
+	if (BE (pstr->trans != NULL, 0))
+	  {
+	    int i, ch;
+
+	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+	      {
+		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
+		buf[i] = pstr->trans[ch];
+	      }
+	    p = (const char *) buf;
+	  }
+	else
+	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+	if (BE (mbclen + 2 > 2, 1))
+	  {
+	    wchar_t wcu = wc;
+	    if (iswlower (wc))
+	      {
+		size_t mbcdlen;
+
+		wcu = towupper (wc);
+		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
+		if (BE (mbclen == mbcdlen, 1))
+		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
+		else if (mbcdlen != (size_t) -1)
+		  {
+		    size_t i;
+
+		    if (byte_idx + mbcdlen > pstr->bufs_len)
+		      {
+			pstr->cur_state = prev_st;
+			break;
+		      }
+
+		    if (pstr->offsets == NULL)
+		      {
+			pstr->offsets = re_malloc (int, pstr->bufs_len);
+
+			if (pstr->offsets == NULL)
+			  return REG_ESPACE;
+		      }
+		    if (!pstr->offsets_needed)
+		      {
+			for (i = 0; i < (size_t) byte_idx; ++i)
+			  pstr->offsets[i] = i;
+			pstr->offsets_needed = 1;
+		      }
+
+		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
+		    pstr->wcs[byte_idx] = wcu;
+		    pstr->offsets[byte_idx] = src_idx;
+		    for (i = 1; i < mbcdlen; ++i)
+		      {
+			pstr->offsets[byte_idx + i]
+			  = src_idx + (i < mbclen ? i : mbclen - 1);
+			pstr->wcs[byte_idx + i] = WEOF;
+		      }
+		    pstr->len += mbcdlen - mbclen;
+		    if (pstr->raw_stop > src_idx)
+		      pstr->stop += mbcdlen - mbclen;
+		    end_idx = (pstr->bufs_len > pstr->len)
+			      ? pstr->len : pstr->bufs_len;
+		    byte_idx += mbcdlen;
+		    src_idx += mbclen;
+		    continue;
+		  }
+		else
+		  memcpy (pstr->mbs + byte_idx, p, mbclen);
+	      }
+	    else
+	      memcpy (pstr->mbs + byte_idx, p, mbclen);
+
+	    if (BE (pstr->offsets_needed != 0, 0))
+	      {
+		size_t i;
+		for (i = 0; i < mbclen; ++i)
+		  pstr->offsets[byte_idx + i] = src_idx + i;
+	      }
+	    src_idx += mbclen;
+
+	    pstr->wcs[byte_idx++] = wcu;
+	    /* Write paddings.  */
+	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+	      pstr->wcs[byte_idx++] = WEOF;
+	  }
+	else if (mbclen == (size_t) -1 || mbclen == 0)
+	  {
+	    /* It is an invalid character or '\0'.  Just use the byte.  */
+	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
+
+	    if (BE (pstr->trans != NULL, 0))
+	      ch = pstr->trans [ch];
+	    pstr->mbs[byte_idx] = ch;
+
+	    if (BE (pstr->offsets_needed != 0, 0))
+	      pstr->offsets[byte_idx] = src_idx;
+	    ++src_idx;
+
+	    /* And also cast it to wide char.  */
+	    pstr->wcs[byte_idx++] = (wchar_t) ch;
+	    if (BE (mbclen == (size_t) -1, 0))
+	      pstr->cur_state = prev_st;
+	  }
+	else
+	  {
+	    /* The buffer doesn't have enough space, finish to build.  */
+	    pstr->cur_state = prev_st;
+	    break;
+	  }
+      }
+  pstr->valid_len = byte_idx;
+  pstr->valid_raw_len = src_idx;
+  return REG_NOERROR;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+   Return the index.  */
+
+static int
+internal_function
+re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
+{
+  mbstate_t prev_st;
+  int rawbuf_idx;
+  size_t mbclen;
+  wint_t wc = WEOF;
+
+  /* Skip the characters which are not necessary to check.  */
+  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
+       rawbuf_idx < new_raw_idx;)
+    {
+      wchar_t wc2;
+      int remain_len = pstr->len - rawbuf_idx;
+      prev_st = pstr->cur_state;
+      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
+			  remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+	{
+	  /* We treat these cases as a single byte character.  */
+	  if (mbclen == 0 || remain_len == 0)
+	    wc = L'\0';
+	  else
+	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
+	  mbclen = 1;
+	  pstr->cur_state = prev_st;
+	}
+      else
+	wc = (wint_t) wc2;
+      /* Then proceed the next character.  */
+      rawbuf_idx += mbclen;
+    }
+  *last_wc = (wint_t) wc;
+  return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N  */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+   This function is used in case of REG_ICASE.  */
+
+static void
+internal_function
+build_upper_buffer (re_string_t *pstr)
+{
+  int char_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+      if (BE (pstr->trans != NULL, 0))
+	ch = pstr->trans[ch];
+      if (islower (ch))
+	pstr->mbs[char_idx] = toupper (ch);
+      else
+	pstr->mbs[char_idx] = ch;
+    }
+  pstr->valid_len = char_idx;
+  pstr->valid_raw_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR.  */
+
+static void
+internal_function
+re_string_translate_buffer (re_string_t *pstr)
+{
+  int buf_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+      pstr->mbs[buf_idx] = pstr->trans[ch];
+    }
+
+  pstr->valid_len = buf_idx;
+  pstr->valid_raw_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
+   convert to upper case in case of REG_ICASE, apply translation.  */
+
+static reg_errcode_t
+internal_function
+re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
+{
+  int offset = idx - pstr->raw_mbs_idx;
+  if (BE (offset < 0, 0))
+    {
+      /* Reset buffer.  */
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+      pstr->len = pstr->raw_len;
+      pstr->stop = pstr->raw_stop;
+      pstr->valid_len = 0;
+      pstr->raw_mbs_idx = 0;
+      pstr->valid_raw_len = 0;
+      pstr->offsets_needed = 0;
+      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+      if (!pstr->mbs_allocated)
+	pstr->mbs = (unsigned char *) pstr->raw_mbs;
+      offset = idx;
+    }
+
+  if (BE (offset != 0, 1))
+    {
+      /* Should the already checked characters be kept?  */
+      if (BE (offset < pstr->valid_raw_len, 1))
+	{
+	  /* Yes, move them to the front of the buffer.  */
+#ifdef RE_ENABLE_I18N
+	  if (BE (pstr->offsets_needed, 0))
+	    {
+	      int low = 0, high = pstr->valid_len, mid;
+	      do
+		{
+		  mid = (high + low) / 2;
+		  if (pstr->offsets[mid] > offset)
+		    high = mid;
+		  else if (pstr->offsets[mid] < offset)
+		    low = mid + 1;
+		  else
+		    break;
+		}
+	      while (low < high);
+	      if (pstr->offsets[mid] < offset)
+		++mid;
+	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
+							eflags);
+	      /* This can be quite complicated, so handle specially
+		 only the common and easy case where the character with
+		 different length representation of lower and upper
+		 case is present at or after offset.  */
+	      if (pstr->valid_len > offset
+		  && mid == offset && pstr->offsets[mid] == offset)
+		{
+		  memmove (pstr->wcs, pstr->wcs + offset,
+			   (pstr->valid_len - offset) * sizeof (wint_t));
+		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+		  pstr->valid_len -= offset;
+		  pstr->valid_raw_len -= offset;
+		  for (low = 0; low < pstr->valid_len; low++)
+		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+		}
+	      else
+		{
+		  /* Otherwise, just find out how long the partial multibyte
+		     character at offset is and fill it with WEOF/255.  */
+		  pstr->len = pstr->raw_len - idx + offset;
+		  pstr->stop = pstr->raw_stop - idx + offset;
+		  pstr->offsets_needed = 0;
+		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
+		    --mid;
+		  while (mid < pstr->valid_len)
+		    if (pstr->wcs[mid] != WEOF)
+		      break;
+		    else
+		      ++mid;
+		  if (mid == pstr->valid_len)
+		    pstr->valid_len = 0;
+		  else
+		    {
+		      pstr->valid_len = pstr->offsets[mid] - offset;
+		      if (pstr->valid_len)
+			{
+			  for (low = 0; low < pstr->valid_len; ++low)
+			    pstr->wcs[low] = WEOF;
+			  memset (pstr->mbs, 255, pstr->valid_len);
+			}
+		    }
+		  pstr->valid_raw_len = pstr->valid_len;
+		}
+	    }
+	  else
+#endif
+	    {
+	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
+							eflags);
+#ifdef RE_ENABLE_I18N
+	      if (pstr->mb_cur_max > 1)
+		memmove (pstr->wcs, pstr->wcs + offset,
+			 (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+	      if (BE (pstr->mbs_allocated, 0))
+		memmove (pstr->mbs, pstr->mbs + offset,
+			 pstr->valid_len - offset);
+	      pstr->valid_len -= offset;
+	      pstr->valid_raw_len -= offset;
+#if DEBUG
+	      assert (pstr->valid_len > 0);
+#endif
+	    }
+	}
+      else
+	{
+#ifdef RE_ENABLE_I18N
+	  /* No, skip all characters until IDX.  */
+	  int prev_valid_len = pstr->valid_len;
+
+	  if (BE (pstr->offsets_needed, 0))
+	    {
+	      pstr->len = pstr->raw_len - idx + offset;
+	      pstr->stop = pstr->raw_stop - idx + offset;
+	      pstr->offsets_needed = 0;
+	    }
+#endif
+	  pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+	  if (pstr->mb_cur_max > 1)
+	    {
+	      int wcs_idx;
+	      wint_t wc = WEOF;
+
+	      if (pstr->is_utf8)
+		{
+		  const unsigned char *raw, *p, *end;
+
+		  /* Special case UTF-8.  Multi-byte chars start with any
+		     byte other than 0x80 - 0xbf.  */
+		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
+		  end = raw + (offset - pstr->mb_cur_max);
+		  if (end < pstr->raw_mbs)
+		    end = pstr->raw_mbs;
+		  p = raw + offset - 1;
+#ifdef _LIBC
+		  /* We know the wchar_t encoding is UCS4, so for the simple
+		     case, ASCII characters, skip the conversion step.  */
+		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
+		    {
+		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+		      /* pstr->valid_len = 0; */
+		      wc = (wchar_t) *p;
+		    }
+		  else
+#endif
+		    for (; p >= end; --p)
+		      if ((*p & 0xc0) != 0x80)
+			{
+			  mbstate_t cur_state;
+			  wchar_t wc2;
+			  int mlen = raw + pstr->len - p;
+			  unsigned char buf[6];
+			  size_t mbclen;
+
+			  if (BE (pstr->trans != NULL, 0))
+			    {
+			      int i = mlen < 6 ? mlen : 6;
+			      while (--i >= 0)
+				buf[i] = pstr->trans[p[i]];
+			    }
+			  /* XXX Don't use mbrtowc, we know which conversion
+			     to use (UTF-8 -> UCS4).  */
+			  memset (&cur_state, 0, sizeof (cur_state));
+			  mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
+					      &cur_state);
+			  if (raw + offset - p <= mbclen
+			      && mbclen < (size_t) -2)
+			    {
+			      memset (&pstr->cur_state, '\0',
+				      sizeof (mbstate_t));
+			      pstr->valid_len = mbclen - (raw + offset - p);
+			      wc = wc2;
+			    }
+			  break;
+			}
+		}
+
+	      if (wc == WEOF)
+		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+	      if (wc == WEOF)
+		pstr->tip_context
+		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
+	      else
+		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+				      && IS_WIDE_WORD_CHAR (wc))
+				     ? CONTEXT_WORD
+				     : ((IS_WIDE_NEWLINE (wc)
+					 && pstr->newline_anchor)
+					? CONTEXT_NEWLINE : 0));
+	      if (BE (pstr->valid_len, 0))
+		{
+		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+		    pstr->wcs[wcs_idx] = WEOF;
+		  if (pstr->mbs_allocated)
+		    memset (pstr->mbs, 255, pstr->valid_len);
+		}
+	      pstr->valid_raw_len = pstr->valid_len;
+	    }
+	  else
+#endif /* RE_ENABLE_I18N */
+	    {
+	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+	      pstr->valid_raw_len = 0;
+	      if (pstr->trans)
+		c = pstr->trans[c];
+	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
+				   ? CONTEXT_WORD
+				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
+				      ? CONTEXT_NEWLINE : 0));
+	    }
+	}
+      if (!BE (pstr->mbs_allocated, 0))
+	pstr->mbs += offset;
+    }
+  pstr->raw_mbs_idx = idx;
+  pstr->len -= offset;
+  pstr->stop -= offset;
+
+  /* Then build the buffers.  */
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1)
+    {
+      if (pstr->icase)
+	{
+	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
+	  if (BE (ret != REG_NOERROR, 0))
+	    return ret;
+	}
+      else
+	build_wcs_buffer (pstr);
+    }
+  else
+#endif /* RE_ENABLE_I18N */
+    if (BE (pstr->mbs_allocated, 0))
+      {
+	if (pstr->icase)
+	  build_upper_buffer (pstr);
+	else if (pstr->trans != NULL)
+	  re_string_translate_buffer (pstr);
+      }
+    else
+      pstr->valid_len = pstr->len;
+
+  pstr->cur_idx = 0;
+  return REG_NOERROR;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_peek_byte_case (const re_string_t *pstr, int idx)
+{
+  int ch, off;
+
+  /* Handle the common (easiest) cases first.  */
+  if (BE (!pstr->mbs_allocated, 1))
+    return re_string_peek_byte (pstr, idx);
+
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1
+      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
+    return re_string_peek_byte (pstr, idx);
+#endif
+
+  off = pstr->cur_idx + idx;
+#ifdef RE_ENABLE_I18N
+  if (pstr->offsets_needed)
+    off = pstr->offsets[off];
+#endif
+
+  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
+     this function returns CAPITAL LETTER I instead of first byte of
+     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
+     since peek_byte_case doesn't advance cur_idx in any way.  */
+  if (pstr->offsets_needed && !isascii (ch))
+    return re_string_peek_byte (pstr, idx);
+#endif
+
+  return ch;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_fetch_byte_case (re_string_t *pstr)
+{
+  if (BE (!pstr->mbs_allocated, 1))
+    return re_string_fetch_byte (pstr);
+
+#ifdef RE_ENABLE_I18N
+  if (pstr->offsets_needed)
+    {
+      int off, ch;
+
+      /* For tr_TR.UTF-8 [[:islower:]] there is
+	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
+	 in that case the whole multi-byte character and return
+	 the original letter.  On the other side, with
+	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
+	 anything else would complicate things too much.  */
+
+      if (!re_string_first_byte (pstr, pstr->cur_idx))
+	return re_string_fetch_byte (pstr);
+
+      off = pstr->offsets[pstr->cur_idx];
+      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+      if (! isascii (ch))
+	return re_string_fetch_byte (pstr);
+
+      re_string_skip_bytes (pstr,
+			    re_string_char_size_at (pstr, pstr->cur_idx));
+      return ch;
+    }
+#endif
+
+  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
+}
+
+static void
+internal_function
+re_string_destruct (re_string_t *pstr)
+{
+#ifdef RE_ENABLE_I18N
+  re_free (pstr->wcs);
+  re_free (pstr->offsets);
+#endif /* RE_ENABLE_I18N  */
+  if (pstr->mbs_allocated)
+    re_free (pstr->mbs);
+}
+
+/* Return the context at IDX in INPUT.  */
+
+static unsigned int
+internal_function
+re_string_context_at (const re_string_t *input, int idx, int eflags)
+{
+  int c;
+  if (BE (idx < 0, 0))
+    /* In this case, we use the value stored in input->tip_context,
+       since we can't know the character in input->mbs[-1] here.  */
+    return input->tip_context;
+  if (BE (idx == input->len, 0))
+    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1)
+    {
+      wint_t wc;
+      int wc_idx = idx;
+      while(input->wcs[wc_idx] == WEOF)
+	{
+#ifdef DEBUG
+	  /* It must not happen.  */
+	  assert (wc_idx >= 0);
+#endif
+	  --wc_idx;
+	  if (wc_idx < 0)
+	    return input->tip_context;
+	}
+      wc = input->wcs[wc_idx];
+      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
+	return CONTEXT_WORD;
+      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
+	      ? CONTEXT_NEWLINE : 0);
+    }
+  else
+#endif
+    {
+      c = re_string_byte_at (input, idx);
+      if (bitset_contain (input->word_char, c))
+	return CONTEXT_WORD;
+      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
+    }
+}
+
+/* Functions for set operation.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_alloc (re_node_set *set, int size)
+{
+  /*
+   * ADR: valgrind says size can be 0, which then doesn't
+   * free the block of size 0.  Harumph. This seems
+   * to work ok, though.
+   */
+  if (size == 0)
+    {
+       memset(set, 0, sizeof(*set));
+       return REG_NOERROR;
+    }
+  set->alloc = size;
+  set->nelem = 0;
+  set->elems = re_malloc (int, size);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_1 (re_node_set *set, int elem)
+{
+  set->alloc = 1;
+  set->nelem = 1;
+  set->elems = re_malloc (int, 1);
+  if (BE (set->elems == NULL, 0))
+    {
+      set->alloc = set->nelem = 0;
+      return REG_ESPACE;
+    }
+  set->elems[0] = elem;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
+{
+  set->alloc = 2;
+  set->elems = re_malloc (int, 2);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  if (elem1 == elem2)
+    {
+      set->nelem = 1;
+      set->elems[0] = elem1;
+    }
+  else
+    {
+      set->nelem = 2;
+      if (elem1 < elem2)
+	{
+	  set->elems[0] = elem1;
+	  set->elems[1] = elem2;
+	}
+      else
+	{
+	  set->elems[0] = elem2;
+	  set->elems[1] = elem1;
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
+{
+  dest->nelem = src->nelem;
+  if (src->nelem > 0)
+    {
+      dest->alloc = dest->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+	{
+	  dest->alloc = dest->nelem = 0;
+	  return REG_ESPACE;
+	}
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+    }
+  else
+    re_node_set_init_empty (dest);
+  return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
+			   const re_node_set *src2)
+{
+  int i1, i2, is, id, delta, sbase;
+  if (src1->nelem == 0 || src2->nelem == 0)
+    return REG_NOERROR;
+
+  /* We need dest->nelem + 2 * elems_in_intersection; this is a
+     conservative estimate.  */
+  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+    {
+      int new_alloc = src1->nelem + src2->nelem + dest->alloc;
+      int *new_elems = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_elems == NULL, 0))
+	return REG_ESPACE;
+      dest->elems = new_elems;
+      dest->alloc = new_alloc;
+    }
+
+  /* Find the items in the intersection of SRC1 and SRC2, and copy
+     into the top of DEST those that are not already in DEST itself.  */
+  sbase = dest->nelem + src1->nelem + src2->nelem;
+  i1 = src1->nelem - 1;
+  i2 = src2->nelem - 1;
+  id = dest->nelem - 1;
+  for (;;)
+    {
+      if (src1->elems[i1] == src2->elems[i2])
+	{
+	  /* Try to find the item in DEST.  Maybe we could binary search?  */
+	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
+	    --id;
+
+	  if (id < 0 || dest->elems[id] != src1->elems[i1])
+	    dest->elems[--sbase] = src1->elems[i1];
+
+	  if (--i1 < 0 || --i2 < 0)
+	    break;
+	}
+
+      /* Lower the highest of the two items.  */
+      else if (src1->elems[i1] < src2->elems[i2])
+	{
+	  if (--i2 < 0)
+	    break;
+	}
+      else
+	{
+	  if (--i1 < 0)
+	    break;
+	}
+    }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + src1->nelem + src2->nelem - 1;
+  delta = is - sbase + 1;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place; this is more or
+     less the same loop that is in re_node_set_merge.  */
+  dest->nelem += delta;
+  if (delta > 0 && id >= 0)
+    for (;;)
+      {
+	if (dest->elems[is] > dest->elems[id])
+	  {
+	    /* Copy from the top.  */
+	    dest->elems[id + delta--] = dest->elems[is--];
+	    if (delta == 0)
+	      break;
+	  }
+	else
+	  {
+	    /* Slide from the bottom.  */
+	    dest->elems[id + delta] = dest->elems[id];
+	    if (--id < 0)
+	      break;
+	  }
+      }
+
+  /* Copy remaining SRC elements.  */
+  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
+			const re_node_set *src2)
+{
+  int i1, i2, id;
+  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+    {
+      dest->alloc = src1->nelem + src2->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+	return REG_ESPACE;
+    }
+  else
+    {
+      if (src1 != NULL && src1->nelem > 0)
+	return re_node_set_init_copy (dest, src1);
+      else if (src2 != NULL && src2->nelem > 0)
+	return re_node_set_init_copy (dest, src2);
+      else
+	re_node_set_init_empty (dest);
+      return REG_NOERROR;
+    }
+  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+    {
+      if (src1->elems[i1] > src2->elems[i2])
+	{
+	  dest->elems[id++] = src2->elems[i2++];
+	  continue;
+	}
+      if (src1->elems[i1] == src2->elems[i2])
+	++i2;
+      dest->elems[id++] = src1->elems[i1++];
+    }
+  if (i1 < src1->nelem)
+    {
+      memcpy (dest->elems + id, src1->elems + i1,
+	     (src1->nelem - i1) * sizeof (int));
+      id += src1->nelem - i1;
+    }
+  else if (i2 < src2->nelem)
+    {
+      memcpy (dest->elems + id, src2->elems + i2,
+	     (src2->nelem - i2) * sizeof (int));
+      id += src2->nelem - i2;
+    }
+  dest->nelem = id;
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_merge (re_node_set *dest, const re_node_set *src)
+{
+  int is, id, sbase, delta;
+  if (src == NULL || src->nelem == 0)
+    return REG_NOERROR;
+  if (dest->alloc < 2 * src->nelem + dest->nelem)
+    {
+      int new_alloc = 2 * (src->nelem + dest->alloc);
+      int *new_buffer = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_buffer == NULL, 0))
+	return REG_ESPACE;
+      dest->elems = new_buffer;
+      dest->alloc = new_alloc;
+    }
+
+  if (BE (dest->nelem == 0, 0))
+    {
+      dest->nelem = src->nelem;
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+      return REG_NOERROR;
+    }
+
+  /* Copy into the top of DEST the items of SRC that are not
+     found in DEST.  Maybe we could binary search in DEST?  */
+  for (sbase = dest->nelem + 2 * src->nelem,
+       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+    {
+      if (dest->elems[id] == src->elems[is])
+	is--, id--;
+      else if (dest->elems[id] < src->elems[is])
+	dest->elems[--sbase] = src->elems[is--];
+      else /* if (dest->elems[id] > src->elems[is]) */
+	--id;
+    }
+
+  if (is >= 0)
+    {
+      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
+      sbase -= is + 1;
+      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
+    }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + 2 * src->nelem - 1;
+  delta = is - sbase + 1;
+  if (delta == 0)
+    return REG_NOERROR;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place.  */
+  dest->nelem += delta;
+  for (;;)
+    {
+      if (dest->elems[is] > dest->elems[id])
+	{
+	  /* Copy from the top.  */
+	  dest->elems[id + delta--] = dest->elems[is--];
+	  if (delta == 0)
+	    break;
+	}
+      else
+	{
+	  /* Slide from the bottom.  */
+	  dest->elems[id + delta] = dest->elems[id];
+	  if (--id < 0)
+	    {
+	      /* Copy remaining SRC elements.  */
+	      memcpy (dest->elems, dest->elems + sbase,
+		      delta * sizeof (int));
+	      break;
+	    }
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have ELEM.
+   return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+internal_function
+re_node_set_insert (re_node_set *set, int elem)
+{
+  int idx;
+  /* In case the set is empty.  */
+  if (set->alloc == 0)
+    {
+      if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+	return 1;
+      else
+	return -1;
+    }
+
+  if (BE (set->nelem, 0) == 0)
+    {
+      /* We already guaranteed above that set->alloc != 0.  */
+      set->elems[0] = elem;
+      ++set->nelem;
+      return 1;
+    }
+
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_elems;
+      set->alloc = set->alloc * 2;
+      new_elems = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_elems == NULL, 0))
+	return -1;
+      set->elems = new_elems;
+    }
+
+  /* Move the elements which follows the new element.  Test the
+     first element separately to skip a check in the inner loop.  */
+  if (elem < set->elems[0])
+    {
+      idx = 0;
+      for (idx = set->nelem; idx > 0; idx--)
+	set->elems[idx] = set->elems[idx - 1];
+    }
+  else
+    {
+      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
+	set->elems[idx] = set->elems[idx - 1];
+    }
+
+  /* Insert the new element.  */
+  set->elems[idx] = elem;
+  ++set->nelem;
+  return 1;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have any element greater than or equal to ELEM.
+   Return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+internal_function
+re_node_set_insert_last (re_node_set *set, int elem)
+{
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_elems;
+      set->alloc = (set->alloc + 1) * 2;
+      new_elems = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_elems == NULL, 0))
+	return -1;
+      set->elems = new_elems;
+    }
+
+  /* Insert the new element.  */
+  set->elems[set->nelem++] = elem;
+  return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+   return 1 if SET1 and SET2 are equivalent, return 0 otherwise.  */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
+{
+  int i;
+  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+    return 0;
+  for (i = set1->nelem ; --i >= 0 ; )
+    if (set1->elems[i] != set2->elems[i])
+      return 0;
+  return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_contains (const re_node_set *set, int elem)
+{
+  unsigned int idx, right, mid;
+  if (set->nelem <= 0)
+    return 0;
+
+  /* Binary search the element.  */
+  idx = 0;
+  right = set->nelem - 1;
+  while (idx < right)
+    {
+      mid = (idx + right) / 2;
+      if (set->elems[mid] < elem)
+	idx = mid + 1;
+      else
+	right = mid;
+    }
+  return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+internal_function
+re_node_set_remove_at (re_node_set *set, int idx)
+{
+  if (idx < 0 || idx >= set->nelem)
+    return;
+  --set->nelem;
+  for (; idx < set->nelem; idx++)
+    set->elems[idx] = set->elems[idx + 1];
+}
+
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+   Or return -1, if an error will be occured.  */
+
+static int
+internal_function
+re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
+{
+  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
+    {
+      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
+      int *new_nexts, *new_indices;
+      re_node_set *new_edests, *new_eclosures;
+      re_token_t *new_nodes;
+
+      /* Avoid overflows in realloc.  */
+      const size_t max_object_size = MAX (sizeof (re_token_t),
+					  MAX (sizeof (re_node_set),
+					       sizeof (int)));
+      if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
+	return -1;
+
+      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
+      if (BE (new_nodes == NULL, 0))
+	return -1;
+      dfa->nodes = new_nodes;
+      new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+      new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
+      if (BE (new_nexts == NULL || new_indices == NULL
+	      || new_edests == NULL || new_eclosures == NULL, 0))
+	return -1;
+      dfa->nexts = new_nexts;
+      dfa->org_indices = new_indices;
+      dfa->edests = new_edests;
+      dfa->eclosures = new_eclosures;
+      dfa->nodes_alloc = new_nodes_alloc;
+    }
+  dfa->nodes[dfa->nodes_len] = token;
+  dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+  dfa->nodes[dfa->nodes_len].accept_mb =
+    (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET;
+#endif
+  dfa->nexts[dfa->nodes_len] = -1;
+  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
+  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
+  return dfa->nodes_len++;
+}
+
+static inline unsigned int
+internal_function
+calc_state_hash (const re_node_set *nodes, unsigned int context)
+{
+  unsigned int hash = nodes->nelem + context;
+  int i;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    hash += nodes->elems[i];
+  return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+	   return value is NULL and ERR is REG_NOERROR.
+	 - We never return non-NULL value in case of any errors, it is for
+	   optimization.  */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
+		  const re_node_set *nodes)
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (BE (nodes->nelem == 0, 0))
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, 0);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (hash != state->hash)
+	continue;
+      if (re_node_set_compare (&state->nodes, nodes))
+	return state;
+    }
+
+  /* There are no appropriate state in the dfa, create the new one.  */
+  new_state = create_ci_newstate (dfa, nodes, hash);
+  if (BE (new_state == NULL, 0))
+    *err = REG_ESPACE;
+
+  return new_state;
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+   whose context is equivalent to CONTEXT.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+	   return value is NULL and ERR is REG_NOERROR.
+	 - We never return non-NULL value in case of any errors, it is for
+	   optimization.  */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
+			  const re_node_set *nodes, unsigned int context)
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (nodes->nelem == 0)
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, context);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (state->hash == hash
+	  && state->context == context
+	  && re_node_set_compare (state->entrance_nodes, nodes))
+	return state;
+    }
+  /* There are no appropriate state in `dfa', create the new one.  */
+  new_state = create_cd_newstate (dfa, nodes, context, hash);
+  if (BE (new_state == NULL, 0))
+    *err = REG_ESPACE;
+
+  return new_state;
+}
+
+/* Finish initialization of the new state NEWSTATE, and using its hash value
+   HASH put in the appropriate bucket of DFA's state table.  Return value
+   indicates the error code if failed.  */
+
+static reg_errcode_t
+register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
+		unsigned int hash)
+{
+  struct re_state_table_entry *spot;
+  reg_errcode_t err;
+  int i;
+
+  newstate->hash = hash;
+  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
+  if (BE (err != REG_NOERROR, 0))
+    return REG_ESPACE;
+  for (i = 0; i < newstate->nodes.nelem; i++)
+    {
+      int elem = newstate->nodes.elems[i];
+      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
+	if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0)
+	  return REG_ESPACE;
+    }
+
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+  if (BE (spot->alloc <= spot->num, 0))
+    {
+      int new_alloc = 2 * spot->num + 2;
+      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
+					      new_alloc);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      spot->array = new_array;
+      spot->alloc = new_alloc;
+    }
+  spot->array[spot->num++] = newstate;
+  return REG_NOERROR;
+}
+
+static void
+free_state (re_dfastate_t *state)
+{
+  re_node_set_free (&state->non_eps_nodes);
+  re_node_set_free (&state->inveclosure);
+  if (state->entrance_nodes != &state->nodes)
+    {
+      re_node_set_free (state->entrance_nodes);
+      re_free (state->entrance_nodes);
+    }
+  re_node_set_free (&state->nodes);
+  re_free (state->word_trtable);
+  re_free (state->trtable);
+  re_free (state);
+}
+
+/* Create the new state which is independ of contexts.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+internal_function
+create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+		    unsigned int hash)
+{
+  int i;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
+  newstate->entrance_nodes = &newstate->nodes;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (type == CHARACTER && !node->constraint)
+	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
+      else if (type == OP_BACK_REF)
+	newstate->has_backref = 1;
+      else if (type == ANCHOR || node->constraint)
+	newstate->has_constraint = 1;
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+internal_function
+create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+		    unsigned int context, unsigned int hash)
+{
+  int i, nctx_nodes = 0;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
+  newstate->context = context;
+  newstate->entrance_nodes = &newstate->nodes;
+
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      unsigned int constraint = node->constraint;
+
+      if (type == CHARACTER && !constraint)
+	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
+      else if (type == OP_BACK_REF)
+	newstate->has_backref = 1;
+
+      if (constraint)
+	{
+	  if (newstate->entrance_nodes == &newstate->nodes)
+	    {
+	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
+	      if (BE (newstate->entrance_nodes == NULL, 0))
+		{
+		  free_state (newstate);
+		  return NULL;
+		}
+	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
+		  != REG_NOERROR)
+		return NULL;
+	      nctx_nodes = 0;
+	      newstate->has_constraint = 1;
+	    }
+
+	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+	    {
+	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+	      ++nctx_nodes;
+	    }
+	}
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return  newstate;
+}
diff --git a/compat/regex/regex_internal.h b/compat/regex/regex_internal.h
new file mode 100644
index 0000000..4184d7f
--- /dev/null
+++ b/compat/regex/regex_internal.h
@@ -0,0 +1,810 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002-2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+# include <langinfo.h>
+#endif
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined HAVE_STDBOOL_H || defined _LIBC
+# include <stdbool.h>
+#endif /* HAVE_STDBOOL_H || _LIBC */
+#if !defined(ZOS_USS)
+#if defined HAVE_STDINT_H || defined _LIBC
+# include <stdint.h>
+#endif /* HAVE_STDINT_H || _LIBC */
+#endif /* !ZOS_USS */
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_define(CLASS,NAME)
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
+
+#ifndef GAWK
+/* In case that the system doesn't have isblank().  */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+#else /* GAWK */
+/*
+ * This is a freaking mess. On glibc systems you have to define
+ * a magic constant to get isblank() out of <ctype.h>, since it's
+ * a C99 function.  To heck with all that and borrow a page from
+ * dfa.c's book.
+ */
+
+static int
+is_blank (int c)
+{
+   return (c == ' ' || c == '\t');
+}
+#endif /* GAWK */
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+#  define _RE_DEFINE_LOCALE_FUNCTIONS 1
+#   include <locale/localeinfo.h>
+#   include <locale/elem-hash.h>
+#   include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages.  */
+#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+#  undef gettext
+#  define gettext(msgid) \
+  INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+   strings.  */
+# define gettext_noop(String) String
+#endif
+
+/* For loser systems without the definition.  */
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#ifndef NO_MBSUPPORT
+#include "mbsupport.h" /* gawk */
+#endif
+#ifndef MB_CUR_MAX
+#define MB_CUR_MAX 1
+#endif
+
+#if (defined MBS_SUPPORT) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# ifdef inline
+# undef inline
+# endif
+# define inline
+#endif
+
+/* Number of single byte character.  */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline.  */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc.  */
+#ifndef _LIBC
+# ifdef __wctype
+# undef __wctype
+# endif
+# define __wctype wctype
+# ifdef __iswctype
+# undef __iswctype
+# endif
+# define __iswctype iswctype
+# define __btowc btowc
+# define __mbrtowc mbrtowc
+#undef __mempcpy	/* GAWK */
+# define __mempcpy mempcpy
+# define __wcrtomb wcrtomb
+# define __regfree regfree
+# define attribute_hidden
+#endif /* not _LIBC */
+
+#ifdef __GNUC__
+# define __attribute(arg) __attribute__ (arg)
+#else
+# define __attribute(arg)
+#endif
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* An integer used to represent a set of bits.  It must be unsigned,
+   and must be at least as wide as unsigned int.  */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t.  */
+#define BITSET_WORD_MAX ULONG_MAX
+/* Number of bits in a bitset_word_t.  */
+#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
+/* Number of bitset_word_t in a bit_set.  */
+#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) \
+  (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
+#define bitset_clear(set,i) \
+  (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_contain(set,i) \
+  (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
+#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
+#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+
+typedef enum
+{
+  INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+  LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+  BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+  BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+  WORD_DELIM = WORD_DELIM_CONSTRAINT,
+  NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+  int alloc;
+  int nelem;
+  int *elems;
+} re_node_set;
+
+typedef enum
+{
+  NON_TYPE = 0,
+
+  /* Node type, These are used by token, node, tree.  */
+  CHARACTER = 1,
+  END_OF_RE = 2,
+  SIMPLE_BRACKET = 3,
+  OP_BACK_REF = 4,
+  OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+  COMPLEX_BRACKET = 6,
+  OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+  /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
+     when the debugger shows values of this enum type.  */
+#define EPSILON_BIT 8
+  OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+  OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+  OP_ALT = EPSILON_BIT | 2,
+  OP_DUP_ASTERISK = EPSILON_BIT | 3,
+  ANCHOR = EPSILON_BIT | 4,
+
+  /* Tree type, these are used only by tree. */
+  CONCAT = 16,
+  SUBEXP = 17,
+
+  /* Token type, these are used only by token.  */
+  OP_DUP_PLUS = 18,
+  OP_DUP_QUESTION,
+  OP_OPEN_BRACKET,
+  OP_CLOSE_BRACKET,
+  OP_CHARSET_RANGE,
+  OP_OPEN_DUP_NUM,
+  OP_CLOSE_DUP_NUM,
+  OP_NON_MATCH_LIST,
+  OP_OPEN_COLL_ELEM,
+  OP_CLOSE_COLL_ELEM,
+  OP_OPEN_EQUIV_CLASS,
+  OP_CLOSE_EQUIV_CLASS,
+  OP_OPEN_CHAR_CLASS,
+  OP_CLOSE_CHAR_CLASS,
+  OP_WORD,
+  OP_NOTWORD,
+  OP_SPACE,
+  OP_NOTSPACE,
+  BACK_SLASH
+
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+  /* Multibyte characters.  */
+  wchar_t *mbchars;
+
+  /* Collating symbols.  */
+# ifdef _LIBC
+  int32_t *coll_syms;
+# endif
+
+  /* Equivalence classes. */
+# ifdef _LIBC
+  int32_t *equiv_classes;
+# endif
+
+  /* Range expressions. */
+# ifdef _LIBC
+  uint32_t *range_starts;
+  uint32_t *range_ends;
+# else /* not _LIBC */
+  wchar_t *range_starts;
+  wchar_t *range_ends;
+# endif /* not _LIBC */
+
+  /* Character classes. */
+  wctype_t *char_classes;
+
+  /* If this character set is the non-matching list.  */
+  unsigned int non_match : 1;
+
+  /* # of multibyte characters.  */
+  int nmbchars;
+
+  /* # of collating symbols.  */
+  int ncoll_syms;
+
+  /* # of equivalence classes. */
+  int nequiv_classes;
+
+  /* # of range expressions. */
+  int nranges;
+
+  /* # of character classes. */
+  int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+  union
+  {
+    unsigned char c;		/* for CHARACTER */
+    re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+    re_charset_t *mbcset;	/* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+    int idx;			/* for BACK_REF */
+    re_context_type ctx_type;	/* for ANCHOR */
+  } opr;
+#if __GNUC__ >= 2
+  re_token_type_t type : 8;
+#else
+  re_token_type_t type;
+#endif
+  unsigned int constraint : 10;	/* context constraint */
+  unsigned int duplicated : 1;
+  unsigned int opt_subexp : 1;
+#ifdef RE_ENABLE_I18N
+  unsigned int accept_mb : 1;
+  /* These 2 bits can be moved into the union if needed (e.g. if running out
+     of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
+  unsigned int mb_partial : 1;
+#endif
+  unsigned int word_char : 1;
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+
+struct re_string_t
+{
+  /* Indicate the raw buffer which is the original string passed as an
+     argument of regexec(), re_search(), etc..  */
+  const unsigned char *raw_mbs;
+  /* Store the multibyte string.  In case of "case insensitive mode" like
+     REG_ICASE, upper cases of the string are stored, otherwise MBS