!2 openEuler-mercurial

Merge pull request !2 from Liquor/next
This commit is contained in:
openeuler-ci-bot 2020-04-26 16:14:10 +08:00 committed by Gitee
commit 3105070580
7 changed files with 445 additions and 2 deletions

View File

@ -0,0 +1,78 @@
# HG changeset patch
# User Augie Fackler <augie@google.com>
# Date 1570565895 14400
# Node ID 2a0774e9d2a8ea3b452c416307ed1fc006010bce
# Parent 843da18386d580779a30b7d103615181a309262c
dirs: fix trivial over-read of input data
This code, introduced in 8c0a7eeda06d, was intentionally over-reading
an input string to avoid getting a shared string object for a one-byte
input. Unfortunately with an empty input (like in the case of a fuzzer
getting started) this was a trivial over-read and triggered an
AddressSanitizer failure.
I went out of my way to make sure the code still does the
copy-avoidance tricks. I don't think this change will cost us much
performance since the one-character strings should be cached
aggressively anyway.
Differential Revision: https://phab.mercurial-scm.org/D7030
diff -r 843da18386d5 -r 2a0774e9d2a8 mercurial/cext/dirs.c
--- a/mercurial/cext/dirs.c Sun Oct 06 23:36:52 2019 -0400
+++ b/mercurial/cext/dirs.c Tue Oct 08 16:18:15 2019 -0400
@@ -68,26 +68,41 @@
while ((pos = _finddir(cpath, pos - 1)) != -1) {
PyObject *val;
- /* It's likely that every prefix already has an entry
- in our dict. Try to avoid allocating and
- deallocating a string for each prefix we check. */
- if (key != NULL)
- ((PyBytesObject *)key)->ob_shash = -1;
- else {
- /* Force Python to not reuse a small shared string. */
- key = PyBytes_FromStringAndSize(cpath,
- pos < 2 ? 2 : pos);
+ if (pos < 2) {
+ key = PyBytes_FromStringAndSize(cpath, pos);
if (key == NULL)
goto bail;
+ } else {
+ /* It's likely that every prefix already has an entry
+ in our dict. Try to avoid allocating and
+ deallocating a string for each prefix we check. */
+ if (key != NULL)
+ ((PyBytesObject *)key)->ob_shash = -1;
+ else {
+ /* We know pos >= 2, so we won't get a small
+ * shared string. */
+ key = PyBytes_FromStringAndSize(cpath, pos);
+ if (key == NULL)
+ goto bail;
+ }
+ /* Py_SIZE(o) refers to the ob_size member of
+ * the struct. Yes, assigning to what looks
+ * like a function seems wrong. */
+ Py_SIZE(key) = pos;
+ ((PyBytesObject *)key)->ob_sval[pos] = '\0';
}
- /* Py_SIZE(o) refers to the ob_size member of the struct. Yes,
- * assigning to what looks like a function seems wrong. */
- Py_SIZE(key) = pos;
- ((PyBytesObject *)key)->ob_sval[pos] = '\0';
val = PyDict_GetItem(dirs, key);
if (val != NULL) {
PYLONG_VALUE(val) += 1;
+ if (pos < 2) {
+ /* This was a short string, so we
+ * probably got a small shared string
+ * we can't mutate on the next loop
+ * iteration. Clear it.
+ */
+ Py_CLEAR(key);
+ }
break;
}

View File

@ -0,0 +1,98 @@
# HG changeset patch
# User Augie Fackler <augie@google.com>
# Date 1571147645 14400
# Node ID ea62d7b06c129be54aaf0cf389b6e14dfedf638b
# Parent be178b5d91c823cf91ed28f6f369b902d3e2cdec
dirs: give formatting oversight to clang-format
Differential Revision: https://phab.mercurial-scm.org/D7104
diff -r be178b5d91c8 -r ea62d7b06c12 contrib/clang-format-ignorelist
--- a/contrib/clang-format-ignorelist Tue Oct 15 09:52:33 2019 -0400
+++ b/contrib/clang-format-ignorelist Tue Oct 15 09:54:05 2019 -0400
@@ -1,6 +1,5 @@
# Files that just need to be migrated to the formatter.
# Do not add new files here!
-mercurial/cext/dirs.c
mercurial/cext/manifest.c
mercurial/cext/osutil.c
# Vendored code that we should never format:
diff -r be178b5d91c8 -r ea62d7b06c12 mercurial/cext/dirs.c
--- a/mercurial/cext/dirs.c Tue Oct 15 09:52:33 2019 -0400
+++ b/mercurial/cext/dirs.c Tue Oct 15 09:54:05 2019 -0400
@@ -42,7 +42,7 @@
pos -= 1;
}
if (pos == -1) {
- return 0;
+ return 0;
}
return pos;
@@ -56,13 +56,13 @@
int ret = -1;
/* This loop is super critical for performance. That's why we inline
- * access to Python structs instead of going through a supported API.
- * The implementation, therefore, is heavily dependent on CPython
- * implementation details. We also commit violations of the Python
- * "protocol" such as mutating immutable objects. But since we only
- * mutate objects created in this function or in other well-defined
- * locations, the references are known so these violations should go
- * unnoticed. */
+ * access to Python structs instead of going through a supported API.
+ * The implementation, therefore, is heavily dependent on CPython
+ * implementation details. We also commit violations of the Python
+ * "protocol" such as mutating immutable objects. But since we only
+ * mutate objects created in this function or in other well-defined
+ * locations, the references are known so these violations should go
+ * unnoticed. */
while ((pos = _finddir(cpath, pos - 1)) != -1) {
PyObject *val;
@@ -120,7 +120,7 @@
val = PyDict_GetItem(dirs, key);
if (val == NULL) {
PyErr_SetString(PyExc_ValueError,
- "expected a value, found none");
+ "expected a value, found none");
goto bail;
}
@@ -152,7 +152,7 @@
if (skipchar) {
if (!dirstate_tuple_check(value)) {
PyErr_SetString(PyExc_TypeError,
- "expected a dirstate tuple");
+ "expected a dirstate tuple");
return -1;
}
if (((dirstateTupleObject *)value)->state == skipchar)
@@ -218,8 +218,8 @@
ret = dirs_fromdict(dirs, source, skipchar);
else if (skipchar)
PyErr_SetString(PyExc_ValueError,
- "skip character is only supported "
- "with a dict source");
+ "skip character is only supported "
+ "with a dict source");
else
ret = dirs_fromiter(dirs, source);
@@ -276,12 +276,12 @@
static PySequenceMethods dirs_sequence_methods;
static PyMethodDef dirs_methods[] = {
- {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
- {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
- {NULL} /* Sentinel */
+ {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
+ {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
+ {NULL} /* Sentinel */
};
-static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) };
+static PyTypeObject dirsType = {PyVarObject_HEAD_INIT(NULL, 0)};
void dirs_module_init(PyObject *mod)
{

View File

@ -0,0 +1,84 @@
# HG changeset patch
# User Augie Fackler <augie@google.com>
# Date 1571354962 14400
# Node ID 5d40317d42b7083b49467502549e25f144888cb3
# Parent 3a463e5e470b40c275091a38b1a4464e36c0c5a4
dirs: reject consecutive slashes in paths
We shouldn't ever see those, and the fuzzer go really excited that if
it gives us a 65k string with 55k slashes in it we use a lot of RAM.
This is a better fix than what I tried in D7105. It was suggested by
Yuya, and I verified it does in fact cause the fuzzer to not OOM.
This is a revision of D7234, but with the missing set of an error
added. I added a unit test of the dirs behavior because I needed to
reason more carefully about the failure modes around consecutive
slashes.
Differential Revision: https://phab.mercurial-scm.org/D7252
diff -r 3a463e5e470b -r 5d40317d42b7 mercurial/cext/dirs.c
--- a/mercurial/cext/dirs.c Tue Nov 05 22:56:12 2019 -0500
+++ b/mercurial/cext/dirs.c Thu Oct 17 19:29:22 2019 -0400
@@ -66,6 +66,14 @@
while ((pos = _finddir(cpath, pos - 1)) != -1) {
PyObject *val;
+ /* Sniff for trailing slashes, a marker of an invalid input. */
+ if (pos > 0 && cpath[pos - 1] == '/') {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "found invalid consecutive slashes in path");
+ goto bail;
+ }
+
key = PyBytes_FromStringAndSize(cpath, pos);
if (key == NULL)
goto bail;
diff -r 3a463e5e470b -r 5d40317d42b7 mercurial/util.py
--- a/mercurial/util.py Tue Nov 05 22:56:12 2019 -0500
+++ b/mercurial/util.py Thu Oct 17 19:29:22 2019 -0400
@@ -3515,6 +3515,10 @@
def addpath(self, path):
dirs = self._dirs
for base in finddirs(path):
+ if base.endswith(b'/'):
+ raise ValueError(
+ "found invalid consecutive slashes in path: %r" % base
+ )
if base in dirs:
dirs[base] += 1
return
diff -r 3a463e5e470b -r 5d40317d42b7 tests/test-dirs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-dirs.py Thu Oct 17 19:29:22 2019 -0400
@@ -0,0 +1,27 @@
+from __future__ import absolute_import
+
+import unittest
+
+import silenttestrunner
+
+from mercurial import util
+
+
+class dirstests(unittest.TestCase):
+ def testdirs(self):
+ for case, want in [
+ (b'a/a/a', [b'a', b'a/a', b'']),
+ (b'alpha/beta/gamma', [b'', b'alpha', b'alpha/beta']),
+ ]:
+ d = util.dirs({})
+ d.addpath(case)
+ self.assertEqual(sorted(d), sorted(want))
+
+ def testinvalid(self):
+ with self.assertRaises(ValueError):
+ d = util.dirs({})
+ d.addpath(b'a//b')
+
+
+if __name__ == '__main__':
+ silenttestrunner.main(__name__)

View File

@ -0,0 +1,76 @@
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1570964769 -32400
# Node ID 9fa941faef94a18e493cd571246f8c1a8730bf35
# Parent 0d609ed185ea3847bfd6a5ec89d1c8efa373fdbb
dirs: remove mutable string optimization at all
As far as I can see, the optimization trick has been dead since 42e89b87ca79
"dirs: speed up by storing number of direct children per dir". After
42e89b87ca79, the key variable is cleared to NULL at each iteration.
diff -r 0d609ed185ea -r 9fa941faef94 mercurial/cext/dirs.c
--- a/mercurial/cext/dirs.c Tue Oct 15 12:14:44 2019 +0200
+++ b/mercurial/cext/dirs.c Sun Oct 13 20:06:09 2019 +0900
@@ -26,9 +26,6 @@
*
* We modify Python integers for refcounting, but those integers are
* never visible to Python code.
- *
- * We mutate strings in-place, but leave them immutable once they can
- * be seen by Python code.
*/
typedef struct {
PyObject_HEAD
@@ -63,46 +60,18 @@
* "protocol" such as mutating immutable objects. But since we only
* mutate objects created in this function or in other well-defined
* locations, the references are known so these violations should go
- * unnoticed. The code for adjusting the length of a PyBytesObject is
- * essentially a minimal version of _PyBytes_Resize. */
+ * unnoticed. */
while ((pos = _finddir(cpath, pos - 1)) != -1) {
PyObject *val;
- if (pos < 2) {
- key = PyBytes_FromStringAndSize(cpath, pos);
- if (key == NULL)
- goto bail;
- } else {
- /* It's likely that every prefix already has an entry
- in our dict. Try to avoid allocating and
- deallocating a string for each prefix we check. */
- if (key != NULL)
- ((PyBytesObject *)key)->ob_shash = -1;
- else {
- /* We know pos >= 2, so we won't get a small
- * shared string. */
- key = PyBytes_FromStringAndSize(cpath, pos);
- if (key == NULL)
- goto bail;
- }
- /* Py_SIZE(o) refers to the ob_size member of
- * the struct. Yes, assigning to what looks
- * like a function seems wrong. */
- Py_SIZE(key) = pos;
- ((PyBytesObject *)key)->ob_sval[pos] = '\0';
- }
+ key = PyBytes_FromStringAndSize(cpath, pos);
+ if (key == NULL)
+ goto bail;
val = PyDict_GetItem(dirs, key);
if (val != NULL) {
PYLONG_VALUE(val) += 1;
- if (pos < 2) {
- /* This was a short string, so we
- * probably got a small shared string
- * we can't mutate on the next loop
- * iteration. Clear it.
- */
- Py_CLEAR(key);
- }
+ Py_CLEAR(key);
break;
}

View File

@ -0,0 +1,69 @@
# HG changeset patch
# User Augie Fackler <augie@google.com>
# Date 1573571879 18000
# Node ID 0796e266d26bdc4e116012bb1f8039ee76f2e9c3
# Parent 38387f9e4d22056b5b75cb9918152447f739dd7d
dirs: resolve fuzzer OOM situation by disallowing deep directory hierarchies
It seems like 2048 directories ought to be enough for any reasonable
use of Mercurial?
A previous version of this patch scanned for slashes before any allocations
occurred. That approach is slower than this in the happy path, but much faster
than this in the case that too many slashes are encountered. We may want to
revisit it in the future using memchr() so it'll be well-optimized by the libc
we're using.
.. bc:
Mercurial will now defend against OOMs by refusing to operate on
paths with 2048 or more components. This means that _extremely_
deep path hierarchies will be rejected, but we anticipate nobody
is using hierarchies this deep.
Differential Revision: https://phab.mercurial-scm.org/D7411
diff -r 38387f9e4d22 -r 0796e266d26b mercurial/cext/dirs.c
--- a/mercurial/cext/dirs.c Thu Nov 14 14:14:11 2019 -0800
+++ b/mercurial/cext/dirs.c Tue Nov 12 10:17:59 2019 -0500
@@ -9,6 +9,7 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
+#include <string.h>
#include "util.h"
@@ -48,12 +49,19 @@
return pos;
}
+/* Mercurial will fail to run on directory hierarchies deeper than
+ * this constant, so we should try and keep this constant as big as
+ * possible.
+ */
+#define MAX_DIRS_DEPTH 2048
+
static int _addpath(PyObject *dirs, PyObject *path)
{
const char *cpath = PyBytes_AS_STRING(path);
Py_ssize_t pos = PyBytes_GET_SIZE(path);
PyObject *key = NULL;
int ret = -1;
+ size_t num_slashes = 0;
/* This loop is super critical for performance. That's why we inline
* access to Python structs instead of going through a supported API.
@@ -65,6 +73,12 @@
* unnoticed. */
while ((pos = _finddir(cpath, pos - 1)) != -1) {
PyObject *val;
+ ++num_slashes;
+ if (num_slashes > MAX_DIRS_DEPTH) {
+ PyErr_SetString(PyExc_ValueError,
+ "Directory hierarchy too deep.");
+ goto bail;
+ }
/* Sniff for trailing slashes, a marker of an invalid input. */
if (pos > 0 && cpath[pos - 1] == '/') {

View File

@ -0,0 +1,25 @@
# HG changeset patch
# User Augie Fackler <augie@google.com>
# Date 1571147553 14400
# Node ID be178b5d91c823cf91ed28f6f369b902d3e2cdec
# Parent 30570a056fa8396e6008e0ebb611ff3d7c020d22
dirs: tag a struct as not being formattable
Differential Revision: https://phab.mercurial-scm.org/D7103
diff -r 30570a056fa8 -r be178b5d91c8 mercurial/cext/dirs.c
--- a/mercurial/cext/dirs.c Wed Oct 02 14:38:34 2019 -0400
+++ b/mercurial/cext/dirs.c Tue Oct 15 09:52:33 2019 -0400
@@ -27,10 +27,12 @@
* We modify Python integers for refcounting, but those integers are
* never visible to Python code.
*/
+/* clang-format off */
typedef struct {
PyObject_HEAD
PyObject *dict;
} dirsObject;
+/* clang-format on */
static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
{

View File

@ -2,12 +2,19 @@
Name: mercurial
Version: 5.1
Release: 2
Release: 3
Summary: Source control management tool
License: GPLv2+
URL: http://www.selenic.com/mercurial/
Source0: http://www.selenic.com/mercurial/release/%{name}-%{version}.tar.gz
Patch0: dirs-fix-trivial-over-read-of-input-data.patch
Patch1: dirs-remove-mutable-string-optimization-at-all.patch
Patch2: dirs-tag-a-struct-as-not-being-formattable.patch
Patch3: dirs-give-formatting-oversight-to-clang-format.patch
Patch4: dirs-reject-consecutive-slashes-in-paths.patch
Patch5: dirs-resolve-fuzzer-OOM-situation-by-disallowing-deep-directory-hierarchies.patch
BuildRequires: gcc python2 python2-devel bash-completion emacs-nox emacs-el pkgconfig gettext python2-docutils
Requires: python2 emacs-filesystem tk
Provides: hg = %{version}-%{release} emacs-mercurial <= 3.4.1 emacs-mercurial-el <= 3.4.1
@ -23,7 +30,7 @@ It efficiently handles projects of any size and offers an easy and intuitive int
#Build sections
%prep
%autosetup -n %{name}-%{version}
%autosetup -n %{name}-%{version} -p1
sed -i 's|python|python2|' %{_builddir}/%{name}-%{version}/Makefile %{_builddir}/%{name}-%{version}/doc/Makefile
@ -131,6 +138,12 @@ grep -v locale %{name}-base.files > %{name}-base-filtered.files
%{_mandir}/man?/chg.*.gz
%changelog
* Thu Apr 23 2020 openEuler Buildteam <buildteam@openeuler.org> - 5.1-3
- Type:bugfix
- ID:NA
- SUG:NA
- DESC:fix the problems detected by oss-fuzz
* Thu Jan 9 2020 JeanLeo <liujianliu.liu@huawei.com> - 5.1-2
- Type:bugfix
- Id:NA