tcl/Fix-behavior-of-Tcl_GetRange-and-string-range-regard.patch
2019-09-30 11:18:08 -04:00

69 lines
2.4 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 4199a703dccda415778cc3431696a6bed57ab15c Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 11 Jan 2018 15:44:05 +0000
Subject: [PATCH 0834/1800] Fix behavior of Tcl_GetRange() and "string range"
regarding surrogates, when Tcl is compiled with -DTCL_UTF_MAX=4. Partial fix
for bug [11ae2be95dac9417]. Also, fix typo.
---
generic/tclStringObj.c | 13 ++++++++++++-
tests/string.test | 4 ++++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 0f238cfb1..1b35c567b 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -533,7 +533,7 @@ Tcl_GetUniChar(
*
* Get the Unicode form of the String object. If the object is not
* already a String object, it will be converted to one. If the String
- * object does not have a Unicode rep, then one is create from the UTF
+ * object does not have a Unicode rep, then one is created from the UTF
* string format.
*
* Results:
@@ -667,6 +667,17 @@ Tcl_GetRange(
stringPtr = GET_STRING(objPtr);
}
+#if TCL_UTF_MAX == 4
+ /* See: bug [11ae2be95dac9417] */
+ if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) {
+ ++first;
+ }
+ if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) {
+ ++last;
+ }
+#endif
return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
}
diff --git a/tests/string.test b/tests/string.test
index bbba5ebec..53f1cfb73 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -24,6 +24,7 @@ catch [list package require -exact Tcltest [info patchlevel]]
testConstraint testobj [expr {[info commands testobj] != {}}]
testConstraint testindexobj [expr {[info commands testindexobj] != {}}]
+testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}]
# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]
@@ -1276,6 +1277,9 @@ test string-12.22 {string range, shimmering binary/index} {
binary scan $s a* x
string range $s $s end
} 000000001
+test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} fullutf {
+ list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3]
+} [list \U100000 {} b]
test string-13.1 {string repeat} {
list [catch {string repeat} msg] $msg
--
2.19.1