From ae3510465ceb3586b15bef050257215ef37dfb68 Mon Sep 17 00:00:00 2001
From: hdliu <dev03108@linx-info.com>
Date: Mon, 28 Apr 2025 10:07:30 +0800
Subject: [PATCH] Validate Chunked-Encoding chunk footer

Signed-off-by: hdliu <dev03108@linx-info.com>
---
 h11/_readers.py      | 23 ++++++++++--------
 h11/tests/test_io.py | 56 ++++++++++++++++++++++++++++++--------------
 2 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/h11/_readers.py b/h11/_readers.py
index 08a9574..7db4dac 100644
--- a/h11/_readers.py
+++ b/h11/_readers.py
@@ -148,10 +148,9 @@ chunk_header_re = re.compile(chunk_header.encode("ascii"))
 class ChunkedReader:
     def __init__(self) -> None:
         self._bytes_in_chunk = 0
-        # After reading a chunk, we have to throw away the trailing \r\n; if
-        # this is >0 then we discard that many bytes before resuming regular
-        # de-chunkification.
-        self._bytes_to_discard = 0
+        # After reading a chunk, we have to throw away the trailing \r\n.
+        # This tracks the bytes that we need to match and throw away.
+        self._bytes_to_discard = b""
         self._reading_trailer = False
 
     def __call__(self, buf: ReceiveBuffer) -> Union[Data, EndOfMessage, None]:
@@ -160,15 +159,19 @@ class ChunkedReader:
             if lines is None:
                 return None
             return EndOfMessage(headers=list(_decode_header_lines(lines)))
-        if self._bytes_to_discard > 0:
-            data = buf.maybe_extract_at_most(self._bytes_to_discard)
+        if self._bytes_to_discard:
+            data = buf.maybe_extract_at_most(len(self._bytes_to_discard))
             if data is None:
                 return None
-            self._bytes_to_discard -= len(data)
-            if self._bytes_to_discard > 0:
+            if data != self._bytes_to_discard[:len(data)]:
+                raise LocalProtocolError(
+                    f"malformed chunk footer: {data!r} (expected {self._bytes_to_discard!r})"
+                 )
+            self._bytes_to_discard = self._bytes_to_discard[len(data):]
+            if self._bytes_to_discard:
                 return None
             # else, fall through and read some more
-        assert self._bytes_to_discard == 0
+        assert self._bytes_to_discard == b""
         if self._bytes_in_chunk == 0:
             # We need to refill our chunk count
             chunk_header = buf.maybe_extract_next_line()
@@ -194,7 +197,7 @@ class ChunkedReader:
             return None
         self._bytes_in_chunk -= len(data)
         if self._bytes_in_chunk == 0:
-            self._bytes_to_discard = 2
+            self._bytes_to_discard = b"\r\n"
             chunk_end = True
         else:
             chunk_end = False
diff --git a/h11/tests/test_io.py b/h11/tests/test_io.py
index 2b47c0e..c759077 100644
--- a/h11/tests/test_io.py
+++ b/h11/tests/test_io.py
@@ -360,22 +360,34 @@ def _run_reader(*args: Any) -> List[Event]:
     return normalize_data_events(events)
 
 
-def t_body_reader(thunk: Any, data: bytes, expected: Any, do_eof: bool = False) -> None:
+def t_body_reader(thunk: Any, data: bytes, expected: list, do_eof: bool = False) -> None:
     # Simple: consume whole thing
     print("Test 1")
     buf = makebuf(data)
-    assert _run_reader(thunk(), buf, do_eof) == expected
+    try:
+        assert _run_reader(thunk(), buf, do_eof) == expected
+    except LocalProtocolError:
+        if LocalProtocolError in expected:
+            pass
+        else:
+            raise
 
     # Incrementally growing buffer
     print("Test 2")
     reader = thunk()
     buf = ReceiveBuffer()
     events = []
-    for i in range(len(data)):
-        events += _run_reader(reader, buf, False)
-        buf += data[i : i + 1]
-    events += _run_reader(reader, buf, do_eof)
-    assert normalize_data_events(events) == expected
+    try:
+        for i in range(len(data)):
+            events += _run_reader(reader, buf, False)
+            buf += data[i : i + 1]
+        events += _run_reader(reader, buf, do_eof)
+        assert normalize_data_events(events) == expected
+    except LocalProtocolError:
+        if LocalProtocolError in expected:
+            pass
+        else:
+            raise
 
     is_complete = any(type(event) is EndOfMessage for event in expected)
     if is_complete and not do_eof:
@@ -436,14 +448,12 @@ def test_ChunkedReader() -> None:
     )
 
     # refuses arbitrarily long chunk integers
-    with pytest.raises(LocalProtocolError):
-        # Technically this is legal HTTP/1.1, but we refuse to process chunk
-        # sizes that don't fit into 20 characters of hex
-        t_body_reader(ChunkedReader, b"9" * 100 + b"\r\nxxx", [Data(data=b"xxx")])
+    # Technically this is legal HTTP/1.1, but we refuse to process chunk
+    # sizes that don't fit into 20 characters of hex
+    t_body_reader(ChunkedReader, b"9" * 100 + b"\r\nxxx", [LocalProtocolError])
 
     # refuses garbage in the chunk count
-    with pytest.raises(LocalProtocolError):
-        t_body_reader(ChunkedReader, b"10\x00\r\nxxx", None)
+    t_body_reader(ChunkedReader, b"10\x00\r\nxxx", [LocalProtocolError])
 
     # handles (and discards) "chunk extensions" omg wtf
     t_body_reader(
@@ -457,10 +467,22 @@ def test_ChunkedReader() -> None:
 
     t_body_reader(
         ChunkedReader,
-        b"5   	 \r\n01234\r\n" + b"0\r\n\r\n",
+        b"5   \t \r\n01234\r\n" + b"0\r\n\r\n",
         [Data(data=b"01234"), EndOfMessage()],
     )

+    # Chunked encoding with bad chunk termination characters are refused. Originally we
+    # simply dropped the 2 bytes after a chunk, instead of validating that the bytes
+    # were \r\n -- so we would successfully decode the data below as b"xxxa". And
+    # apparently there are other HTTP processors that ignore the chunk length and just
+    # keep reading until they see \r\n, so they would decode it as b"xxx__1a". Any time
+    # two HTTP processors accept the same input but interpret it differently, there's a
+    # possibility of request smuggling shenanigans. So we now reject this.
+    t_body_reader(ChunkedReader, b"3\r\nxxx__1a\r\n", [LocalProtocolError])
+
+    # Confirm we check both bytes individually
+    t_body_reader(ChunkedReader, b"3\r\nxxx\r_1a\r\n", [LocalProtocolError])
+    t_body_reader(ChunkedReader, b"3\r\nxxx_\n1a\r\n", [LocalProtocolError])
 
 def test_ContentLengthWriter() -> None:
     w = ContentLengthWriter(5)
@@ -483,8 +505,8 @@ def test_ContentLengthWriter() -> None:
         dowrite(w, EndOfMessage())
 
     w = ContentLengthWriter(5)
-    dowrite(w, Data(data=b"123")) == b"123"
-    dowrite(w, Data(data=b"45")) == b"45"
+    assert dowrite(w, Data(data=b"123")) == b"123"
+    assert dowrite(w, Data(data=b"45")) == b"45"
     with pytest.raises(LocalProtocolError):
         dowrite(w, EndOfMessage(headers=[("Etag", "asdf")]))
 
-- 
2.33.0