From 5b7674bd226198c7599c71ee3331114e5db83786 Mon Sep 17 00:00:00 2001 From: huangduirong Date: Sun, 26 Feb 2023 23:56:10 -0500 Subject: [PATCH] x --- lib/ultrajson.h | 3 +- lib/ultrajsondec.c | 73 ++++++++++++++++++---------------------------- python/JSONtoObj.c | 13 +++++++-- 3 files changed, 40 insertions(+), 49 deletions(-) diff --git a/lib/ultrajson.h b/lib/ultrajson.h index c686bd0..3af7ca6 100644 --- a/lib/ultrajson.h +++ b/lib/ultrajson.h @@ -54,7 +54,6 @@ tree doesn't have cyclic references. #define __ULTRAJSON_H__ #include -#include // Don't output any extra whitespaces when encoding #define JSON_NO_EXTRA_WHITESPACE @@ -316,7 +315,7 @@ EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char * typedef struct __JSONObjectDecoder { - JSOBJ (*newString)(void *prv, wchar_t *start, wchar_t *end); + JSOBJ (*newString)(void *prv, JSUINT32 *start, JSUINT32 *end); void (*objectAddKey)(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); void (*arrayAddItem)(void *prv, JSOBJ obj, JSOBJ value); JSOBJ (*newTrue)(void *prv); diff --git a/lib/ultrajsondec.c b/lib/ultrajsondec.c index 05b1452..cfa3be0 100644 --- a/lib/ultrajsondec.c +++ b/lib/ultrajsondec.c @@ -41,7 +41,6 @@ https://opensource.apple.com/source/tcl/tcl-14/tcl/license.terms #include #include #include -#include #include #include #include @@ -58,8 +57,8 @@ struct DecoderState { char *start; char *end; - wchar_t *escStart; - wchar_t *escEnd; + JSUINT32 *escStart; + JSUINT32 *escEnd; int escHeap; int lastType; JSUINT32 objDepth; @@ -309,8 +308,8 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds JSUTF16 sur[2] = { 0 }; int iSur = 0; int index; - wchar_t *escOffset; - wchar_t *escStart; + JSUINT32 *escOffset; + JSUINT32 *escStart; size_t escLen = (ds->escEnd - ds->escStart); JSUINT8 *inputOffset; JSUINT8 oct; @@ -324,11 +323,11 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds if (ds->escHeap) { - if (newSize > (SIZE_MAX / sizeof(wchar_t))) + if (newSize > (SIZE_MAX / sizeof(JSUINT32))) { return SetError(ds, -1, "Could not reserve memory block"); } - escStart = (wchar_t *)ds->dec->realloc(ds->escStart, newSize * sizeof(wchar_t)); + escStart = (JSUINT32 *)ds->dec->realloc(ds->escStart, newSize * sizeof(JSUINT32)); if (!escStart) { ds->dec->free(ds->escStart); @@ -338,18 +337,18 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds } else { - wchar_t *oldStart = ds->escStart; - if (newSize > (SIZE_MAX / sizeof(wchar_t))) + JSUINT32 *oldStart = ds->escStart; + if (newSize > (SIZE_MAX / sizeof(JSUINT32))) { return SetError(ds, -1, "Could not reserve memory block"); } - ds->escStart = (wchar_t *) ds->dec->malloc(newSize * sizeof(wchar_t)); + ds->escStart = (JSUINT32 *) ds->dec->malloc(newSize * sizeof(JSUINT32)); if (!ds->escStart) { return SetError(ds, -1, "Could not reserve memory block"); } ds->escHeap = 1; - memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t)); + memcpy(ds->escStart, oldStart, escLen * sizeof(JSUINT32)); } ds->escEnd = ds->escStart + newSize; @@ -382,14 +381,14 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds inputOffset ++; switch (*inputOffset) { - case '\\': *(escOffset++) = L'\\'; inputOffset++; continue; - case '\"': *(escOffset++) = L'\"'; inputOffset++; continue; - case '/': *(escOffset++) = L'/'; inputOffset++; continue; - case 'b': *(escOffset++) = L'\b'; inputOffset++; continue; - case 'f': *(escOffset++) = L'\f'; inputOffset++; continue; - case 'n': *(escOffset++) = L'\n'; inputOffset++; continue; - case 'r': *(escOffset++) = L'\r'; inputOffset++; continue; - case 't': *(escOffset++) = L'\t'; inputOffset++; continue; + case '\\': *(escOffset++) = '\\'; inputOffset++; continue; + case '\"': *(escOffset++) = '\"'; inputOffset++; continue; + case '/': *(escOffset++) = '/'; inputOffset++; continue; + case 'b': *(escOffset++) = '\b'; inputOffset++; continue; + case 'f': *(escOffset++) = '\f'; inputOffset++; continue; + case 'n': *(escOffset++) = '\n'; inputOffset++; continue; + case 'r': *(escOffset++) = '\r'; inputOffset++; continue; + case 't': *(escOffset++) = '\t'; inputOffset++; continue; case 'u': { @@ -446,7 +445,7 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds iSur ++; break; } - (*escOffset++) = (wchar_t) sur[iSur]; + (*escOffset++) = (JSUINT32) sur[iSur]; iSur = 0; } else @@ -456,12 +455,9 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds { return SetError (ds, -1, "Unpaired high surrogate when decoding 'string'"); } -#if WCHAR_MAX == 0xffff - (*escOffset++) = (wchar_t) sur[0]; - (*escOffset++) = (wchar_t) sur[1]; -#else - (*escOffset++) = (wchar_t) 0x10000 + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); -#endif + (*escOffset++) = (JSUINT32) sur[0]; + (*escOffset++) = (JSUINT32) sur[1]; + (*escOffset++) = (JSUINT32) 0x10000 + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); iSur = 0; } break; @@ -475,7 +471,7 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds case 1: { - *(escOffset++) = (wchar_t) (*inputOffset++); + *(escOffset++) = (JSUINT32) (*inputOffset++); break; } @@ -489,7 +485,7 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds } ucs |= (*inputOffset++) & 0x3f; if (ucs < 0x80) return SetError (ds, -1, "Overlong 2 byte UTF-8 sequence detected when decoding 'string'"); - *(escOffset++) = (wchar_t) ucs; + *(escOffset++) = (JSUINT32) ucs; break; } @@ -512,7 +508,7 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds } if (ucs < 0x800) return SetError (ds, -1, "Overlong 3 byte UTF-8 sequence detected when encoding string"); - *(escOffset++) = (wchar_t) ucs; + *(escOffset++) = (JSUINT32) ucs; break; } @@ -536,20 +532,7 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds if (ucs < 0x10000) return SetError (ds, -1, "Overlong 4 byte UTF-8 sequence detected when decoding 'string'"); -#if WCHAR_MAX == 0xffff - if (ucs >= 0x10000) - { - ucs -= 0x10000; - *(escOffset++) = (wchar_t) (ucs >> 10) + 0xd800; - *(escOffset++) = (wchar_t) (ucs & 0x3ff) + 0xdc00; - } - else - { - *(escOffset++) = (wchar_t) ucs; - } -#else - *(escOffset++) = (wchar_t) ucs; -#endif + *(escOffset++) = (JSUINT32) ucs; break; } } @@ -760,14 +743,14 @@ JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuf /* FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode escaping doesn't run into the wall each time */ struct DecoderState ds; - wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; + JSUINT32 escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(JSUINT32))]; JSOBJ ret; ds.start = (char *) buffer; ds.end = ds.start + cbBuffer; ds.escStart = escBuffer; - ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); + ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(JSUINT32)); ds.escHeap = 0; ds.prv = dec->prv; ds.dec = dec; diff --git a/python/JSONtoObj.c b/python/JSONtoObj.c index cc752a4..6dcc561 100644 --- a/python/JSONtoObj.c +++ b/python/JSONtoObj.c @@ -58,9 +58,18 @@ static void Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) return; } -static JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) +/* +Check that Py_UCS4 is the same as JSUINT32, else Object_newString will fail. +Based on Linux's check in vbox_vmmdev_types.h. +This should be replaced with + _Static_assert(sizeof(Py_UCS4) == sizeof(JSUINT32)); +when C11 is made mandatory (CPython 3.11+, PyPy ?). +*/ +typedef char assert_py_ucs4_is_jsuint32[1 - 2*!(sizeof(Py_UCS4) == sizeof(JSUINT32))]; + +static JSOBJ Object_newString(void *prv, JSUINT32 *start, JSUINT32 *end) { - return PyUnicode_FromWideChar (start, (end - start)); + return PyUnicode_FromKindAndData (PyUnicode_4BYTE_KIND, (Py_UCS4 *) start, (end - start)); } static JSOBJ Object_newTrue(void *prv) -- 2.35.1