122 lines
5.1 KiB
Diff
122 lines
5.1 KiB
Diff
Partial backport of:
|
|
|
|
From bf79b61cb1677d6865c45d397435848a21e8a647 Mon Sep 17 00:00:00 2001
|
|
From: Ken Sharp <ken.sharp@artifex.com>
|
|
Date: Tue, 27 Sep 2022 13:03:57 +0100
|
|
Subject: [PATCH] PCL interpreter - fix decode_glyph for Unicode
|
|
|
|
The text extraction (and pdfwrite family) expect that decode_glyph
|
|
should always return pairs of bytes (an assumption that Unicode code
|
|
points are 2 bytes), and the return value from the routine should be
|
|
the number of bytes required to hold the value.
|
|
|
|
The PCL decode_glyph routine however was simply returning 1, which
|
|
caused the text extraction code some difficulty since it wasn't
|
|
expecting that.
|
|
|
|
This commit firstly alters the text extraction code to cope 'better'
|
|
with a decode_glyph routine which returns an odd value (basically
|
|
ignore it and fall back to using the character code).
|
|
|
|
We also alter the pl_decode_glyph routine to return 2 instead of 1,
|
|
so that it correctly tells the caller that it is returning 2 bytes.
|
|
Finally we make sure that the returned value is big-endian, because the
|
|
text extraction code assumes it will be.
|
|
---
|
|
devices/vector/doc_common.c | 8 ++++++++
|
|
pcl/pl/plfont.c | 12 +++++++++---
|
|
2 files changed, 17 insertions(+), 3 deletions(-)
|
|
|
|
--- a/devices/vector/doc_common.c
|
|
+++ b/devices/vector/doc_common.c
|
|
@@ -513,6 +513,14 @@ int txt_get_unicode(gx_device *dev, gs_f
|
|
char *b, *u;
|
|
int l = length - 1;
|
|
|
|
+ /* Real Unicode values should be at least 2 bytes. In fact I think the code assumes exactly
|
|
+ * 2 bytes. If we got an odd number, give up and return the character code.
|
|
+ */
|
|
+ if (length & 1) {
|
|
+ *Buffer = fallback;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
unicode = (ushort *)gs_alloc_bytes(dev->memory, length, "temporary Unicode array");
|
|
length = font->procs.decode_glyph((gs_font *)font, glyph, ch, unicode, length);
|
|
#if ARCH_IS_BIG_ENDIAN
|
|
From d6e713dda4f8d75c6a4ed8c7568a0d4f532dcb17 Mon Sep 17 00:00:00 2001
|
|
From: Zdenek Hutyra <zhutyra@centrum.cz>
|
|
Date: Thu, 21 Nov 2024 10:04:17 +0000
|
|
Subject: Prevent Unicode decoding overrun
|
|
|
|
Bug #708132 "Text buffer overflow with long characters"
|
|
|
|
The txt_get_unicode function was copying too few bytes from the
|
|
fixed glyph name to unicode mapping tables. This was probably
|
|
causing incorrect Unicode code points in relatively rare cases but
|
|
not otherwise a problem.
|
|
|
|
However, a badly formed GlyphNames2Unicode array attached to a font
|
|
could cause the decoding to spill over the assigned buffer.
|
|
|
|
We really should rewrite the Unicode handling, but until we do just
|
|
checking that the length is no more than 4 Unicode code points is
|
|
enough to prevent an overrun. All the current clients allocate at least
|
|
4 code points per character code.
|
|
|
|
Added a comment to explain the magic number.
|
|
|
|
CVE-2025-27831
|
|
---
|
|
devices/vector/doc_common.c | 14 +++++++++-----
|
|
1 file changed, 9 insertions(+), 5 deletions(-)
|
|
|
|
--- a/devices/vector/doc_common.c
|
|
+++ b/devices/vector/doc_common.c
|
|
@@ -463,7 +463,7 @@ int txt_get_unicode(gx_device *dev, gs_f
|
|
}
|
|
if (strlen(dentry->Glyph) == gnstr.size) {
|
|
if(memcmp(gnstr.data, dentry->Glyph, gnstr.size) == 0) {
|
|
- memcpy(Buffer, dentry->Unicode, 2);
|
|
+ memcpy(Buffer, dentry->Unicode, 2 * sizeof(unsigned short));
|
|
return 2;
|
|
}
|
|
}
|
|
@@ -481,7 +481,7 @@ int txt_get_unicode(gx_device *dev, gs_f
|
|
}
|
|
if (strlen(tentry->Glyph) == gnstr.size) {
|
|
if(memcmp(gnstr.data, tentry->Glyph, gnstr.size) == 0) {
|
|
- memcpy(Buffer, tentry->Unicode, 3);
|
|
+ memcpy(Buffer, tentry->Unicode, 3 * sizeof(unsigned short));
|
|
return 3;
|
|
}
|
|
}
|
|
@@ -499,7 +499,7 @@ int txt_get_unicode(gx_device *dev, gs_f
|
|
}
|
|
if (strlen(qentry->Glyph) == gnstr.size) {
|
|
if(memcmp(gnstr.data, qentry->Glyph, gnstr.size) == 0) {
|
|
- memcpy(Buffer, qentry->Unicode, 4);
|
|
+ memcpy(Buffer, qentry->Unicode, 4 * sizeof(unsigned short));
|
|
return 4;
|
|
}
|
|
}
|
|
@@ -511,12 +511,16 @@ int txt_get_unicode(gx_device *dev, gs_f
|
|
return 1;
|
|
} else {
|
|
char *b, *u;
|
|
- int l = length - 1;
|
|
+ int l;
|
|
|
|
/* Real Unicode values should be at least 2 bytes. In fact I think the code assumes exactly
|
|
* 2 bytes. If we got an odd number, give up and return the character code.
|
|
+ *
|
|
+ * The magic number here is due to the clients calling this code. Currently txtwrite and docxwrite
|
|
+ * allow up to 4 Unicode values per character/glyph, if the length would exceed that we can't
|
|
+ * write it. For now, again, fall back to the character code.
|
|
*/
|
|
- if (length & 1) {
|
|
+ if (length & 1 || length > 4 * sizeof(unsigned short)) {
|
|
*Buffer = fallback;
|
|
return 1;
|
|
}
|