raven-rhel8/base/python-twisted/0004-Fix-CVE-2022-24801.patch
2024-06-29 12:12:15 +06:00

438 lines
15 KiB
Diff

From c26f9865d02ac6cf43a560a25a6201439c33640d Mon Sep 17 00:00:00 2001
From: Tom Most <twm@freecog.net>
Date: Sat, 5 Mar 2022 23:26:55 -0800
Subject: [PATCH 4/4] Fix CVE-2022-24801
Some tests for GHSA-c2jg-hw38-jrqq
(cherry picked from commit 22b067793cbcd0fb5dee04cfd9115fa85a7ca110)
Replace obs-fold with a single space
(cherry picked from commit 79ee8c564ca0d4c2910c8859e0a6014d2dc40005)
Strip only spaces and tabs from header values
(cherry picked from commit c3a4e1d015740c1d87a3ec7d57570257e75b0062)
Reject non-digit Content-Length
(cherry picked from commit 8ebfa8f6577431226e109ff98ba48f5152a2c416)
Test for malformed chunk size and extensions
(cherry picked from commit f22d0d9c889822adb7eaf84b42a20ff5f7c4d421)
Reject malformed chunk sizes
(cherry picked from commit 0275152f147506c82868ff1dabd9bf655ab67946)
We should deprecate http.fromChunk
(cherry picked from commit 2a5763d5b168372abb591c0eb6323ed4dfe8a4fc)
Correct chunk extension byte validation
Go back to the RFC to figure out the correct allowed ranges.
(cherry picked from commit fa9caa54d63399b4ccdfbf0429ba1b504ccc7c89)
Address review feedback
(cherry picked from commit 2bbd6c89110f0d44d2bb109c14d787f65bca9df8)
---
src/twisted/web/http.py | 83 ++++++++++++-
src/twisted/web/test/test_http.py | 193 +++++++++++++++++++++++++++++-
2 files changed, 268 insertions(+), 8 deletions(-)
diff --git a/src/twisted/web/http.py b/src/twisted/web/http.py
index 71188a8d50..e988601789 100644
--- a/src/twisted/web/http.py
+++ b/src/twisted/web/http.py
@@ -340,10 +340,39 @@ def toChunk(data):
+def _ishexdigits(b):
+ """
+ Is the string case-insensitively hexidecimal?
+
+ It must be composed of one or more characters in the ranges a-f, A-F
+ and 0-9.
+ """
+ for c in b:
+ if c not in b"0123456789abcdefABCDEF":
+ return False
+ return b != b""
+
+
+def _hexint(b):
+ """
+ Decode a hexadecimal integer.
+
+ Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has
+ a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when
+ parsing network protocols.
+ """
+ if not _ishexdigits(b):
+ raise ValueError(b)
+ return int(b, 16)
+
+
def fromChunk(data):
"""
Convert chunk to string.
+ Note that this function is not specification compliant: it doesn't handle
+ chunk extensions.
+
@type data: C{bytes}
@return: tuple of (result, remaining) - both C{bytes}.
@@ -352,7 +381,7 @@ def fromChunk(data):
byte string.
"""
prefix, rest = data.split(b'\r\n', 1)
- length = int(prefix, 16)
+ length = _hexint(prefix)
if length < 0:
raise ValueError("Chunk length must be >= 0, not %d" % (length,))
if rest[length:length + 2] != b'\r\n':
@@ -1774,6 +1803,46 @@ class _IdentityTransferDecoder(object):
raise _DataLoss()
+_chunkExtChars = (
+ b"\t !\"#$%&'()*+,-./0123456789:;<=>?@"
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
+ b"abcdefghijklmnopqrstuvwxyz{|}~"
+ b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+ b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+ b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+ b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+ b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+ b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+ b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+ b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+)
+"""
+Characters that are valid in a chunk extension.
+
+See RFC 7230 section 4.1.1::
+
+ chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+
+ chunk-ext-name = token
+ chunk-ext-val = token / quoted-string
+
+And section 3.2.6::
+
+ token = 1*tchar
+
+ tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+ / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+ / DIGIT / ALPHA
+ ; any VCHAR, except delimiters
+
+ quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+ qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+ obs-text = %x80-FF
+
+We don't check if chunk extensions are well-formed beyond validating that they
+don't contain characters outside this range.
+"""
+
class _ChunkedTransferDecoder(object):
"""
@@ -1826,10 +1895,14 @@ class _ChunkedTransferDecoder(object):
line, rest = data.split(b'\r\n', 1)
parts = line.split(b';')
try:
- self.length = int(parts[0], 16)
+ self.length = _hexint(parts[0])
except ValueError:
raise _MalformedChunkedDataError(
"Chunk-size must be an integer.")
+ if len(parts) > 1 and parts[1].translate(None, _chunkExtChars) != b"":
+ raise _MalformedChunkedDataError(
+ "Invalid characters in chunk extensions: %r." % parts[1]
+ )
if self.length == 0:
self.state = 'TRAILER'
else:
@@ -2156,7 +2229,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
self.setRawMode()
elif line[0] in b' \t':
# Continuation of a multi line header.
- self.__header = self.__header + b'\n' + line
+ self.__header += b" " + line.lstrip(b" \t")
# Regular header line.
# Processing of header line is delayed to allow accumulating multi
# line headers.
@@ -2184,6 +2257,8 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
# Can this header determine the length?
if header == b'content-length':
+ if not data.isdigit():
+ return fail()
try:
length = int(data)
except ValueError:
@@ -2235,7 +2310,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
return False
header = header.lower()
- data = data.strip()
+ data = data.strip(b" \t")
if not self._maybeChooseTransferDecoder(header, data):
return False
diff --git a/src/twisted/web/test/test_http.py b/src/twisted/web/test/test_http.py
index 8f2c1bac21..d6f1fc0673 100644
--- a/src/twisted/web/test/test_http.py
+++ b/src/twisted/web/test/test_http.py
@@ -1300,6 +1300,43 @@ class ChunkedTransferEncodingTests(unittest.TestCase):
p.dataReceived(b'3; x-foo=bar\r\nabc\r\n')
self.assertEqual(L, [b'abc'])
+ def test_extensionsMalformed(self):
+ """
+ L{_ChunkedTransferDecoder.dataReceived} raises
+ L{_MalformedChunkedDataError} when the chunk extension fields contain
+ invalid characters.
+
+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+ """
+ invalidControl = (
+ b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\n\x0b\x0c\r\x0e\x0f"
+ b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+ )
+ invalidDelimiter = b"\\"
+ invalidDel = b"\x7f"
+ for b in invalidControl + invalidDelimiter + invalidDel:
+ data = b"3; " + bytes((b,)) + b"\r\nabc\r\n"
+ p = http._ChunkedTransferDecoder(
+ lambda b: None, # pragma: nocov
+ lambda b: None, # pragma: nocov
+ )
+ self.assertRaises(http._MalformedChunkedDataError, p.dataReceived, data)
+
+ def test_malformedChunkSizeHex(self):
+ """
+ L{_ChunkedTransferDecoder.dataReceived} raises
+ L{_MalformedChunkedDataError} when the chunk size is prefixed with
+ "0x", as if it were a Python integer literal.
+
+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+ """
+ p = http._ChunkedTransferDecoder(
+ lambda b: None, # pragma: nocov
+ lambda b: None, # pragma: nocov
+ )
+ self.assertRaises(
+ http._MalformedChunkedDataError, p.dataReceived, b"0x3\r\nabc\r\n"
+ )
def test_finish(self):
"""
@@ -1389,6 +1426,8 @@ class ChunkingTests(unittest.TestCase, ResponseTestMixin):
chunked = b''.join(http.toChunk(s))
self.assertEqual((s, b''), http.fromChunk(chunked))
self.assertRaises(ValueError, http.fromChunk, b'-5\r\nmalformed!\r\n')
+ self.assertRaises(ValueError, http.fromChunk, b"0xa\r\nmalformed!\r\n")
+ self.assertRaises(ValueError, http.fromChunk, b"0XA\r\nmalformed!\r\n")
def testConcatenatedChunks(self):
chunked = b''.join([b''.join(http.toChunk(t)) for t in self.strings])
@@ -1579,7 +1618,12 @@ class ParsingTests(unittest.TestCase):
Line folded headers are handled by L{HTTPChannel} by replacing each
fold with a single space by the time they are made available to the
L{Request}. Any leading whitespace in the folded lines of the header
- value is preserved.
+ value is replaced with a single space, per:
+
+ A server that receives an obs-fold in a request message ... MUST
+ ... replace each received obs-fold with one or more SP octets prior
+ to interpreting the field value or forwarding the message
+ downstream.
See RFC 7230 section 3.2.4.
"""
@@ -1616,17 +1660,66 @@ class ParsingTests(unittest.TestCase):
)
self.assertEqual(
request.requestHeaders.getRawHeaders(b"space"),
- [b"space space"],
+ [b"space space"],
)
self.assertEqual(
request.requestHeaders.getRawHeaders(b"spaces"),
- [b"spaces spaces spaces"],
+ [b"spaces spaces spaces"],
)
self.assertEqual(
request.requestHeaders.getRawHeaders(b"tab"),
- [b"t \ta \tb"],
+ [b"t a b"],
)
+ def test_headerStripWhitespace(self):
+ """
+ Leading and trailing space and tab characters are stripped from
+ headers. Other forms of whitespace are preserved.
+
+ See RFC 7230 section 3.2.3 and 3.2.4.
+ """
+ processed = []
+
+ class MyRequest(http.Request):
+ def process(self):
+ processed.append(self)
+ self.finish()
+
+ requestLines = [
+ b"GET / HTTP/1.0",
+ b"spaces: spaces were stripped ",
+ b"tabs: \t\ttabs were stripped\t\t",
+ b"spaces-and-tabs: \t \t spaces and tabs were stripped\t \t",
+ b"line-tab: \v vertical tab was preserved\v\t",
+ b"form-feed: \f form feed was preserved \f ",
+ b"",
+ b"",
+ ]
+
+ self.runRequest(b"\n".join(requestLines), MyRequest, 0)
+ [request] = processed
+ # All leading and trailing whitespace is stripped from the
+ # header-value.
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"spaces"),
+ [b"spaces were stripped"],
+ )
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"tabs"),
+ [b"tabs were stripped"],
+ )
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"spaces-and-tabs"),
+ [b"spaces and tabs were stripped"],
+ )
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"line-tab"),
+ [b"\v vertical tab was preserved\v"],
+ )
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"form-feed"),
+ [b"\f form feed was preserved \f"],
+ )
def test_tooManyHeaders(self):
"""
@@ -2182,6 +2275,58 @@ Hello,
])
+ def test_contentLengthMalformed(self):
+ """
+ A request with a non-integer C{Content-Length} header fails with a 400
+ response without calling L{Request.process}.
+ """
+ self.assertRequestRejected(
+ [
+ b"GET /a HTTP/1.1",
+ b"Content-Length: MORE THAN NINE THOUSAND!",
+ b"Host: host.invalid",
+ b"",
+ b"",
+ b"x" * 9001,
+ ]
+ )
+
+ def test_contentLengthTooPositive(self):
+ """
+ A request with a C{Content-Length} header that begins with a L{+} fails
+ with a 400 response without calling L{Request.process}.
+
+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+ """
+ self.assertRequestRejected(
+ [
+ b"GET /a HTTP/1.1",
+ b"Content-Length: +100",
+ b"Host: host.invalid",
+ b"",
+ b"",
+ b"x" * 100,
+ ]
+ )
+
+ def test_contentLengthNegative(self):
+ """
+ A request with a C{Content-Length} header that is negative fails with
+ a 400 response without calling L{Request.process}.
+
+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+ """
+ self.assertRequestRejected(
+ [
+ b"GET /a HTTP/1.1",
+ b"Content-Length: -100",
+ b"Host: host.invalid",
+ b"",
+ b"",
+ b"x" * 200,
+ ]
+ )
+
def test_duplicateContentLengthsWithPipelinedRequests(self):
"""
Two pipelined requests, the first of which includes multiple
@@ -4140,3 +4285,43 @@ class HTTPClientSanitizationTests(unittest.SynchronousTestCase):
transport.value().splitlines(),
[b": ".join([sanitizedBytes, sanitizedBytes])]
)
+
+
+class HexHelperTests(unittest.SynchronousTestCase):
+ """
+ Test the L{http._hexint} and L{http._ishexdigits} helper functions.
+ """
+
+ badStrings = (b"", b"0x1234", b"feds", b"-123" b"+123")
+
+ def test_isHex(self):
+ """
+ L{_ishexdigits()} returns L{True} for nonempy bytestrings containing
+ hexadecimal digits.
+ """
+ for s in (b"10", b"abcdef", b"AB1234", b"fed", b"123467890"):
+ self.assertIs(True, http._ishexdigits(s))
+
+ def test_decodes(self):
+ """
+ L{_hexint()} returns the integer equivalent of the input.
+ """
+ self.assertEqual(10, http._hexint(b"a"))
+ self.assertEqual(0x10, http._hexint(b"10"))
+ self.assertEqual(0xABCD123, http._hexint(b"abCD123"))
+
+ def test_isNotHex(self):
+ """
+ L{_ishexdigits()} returns L{False} for bytestrings that don't contain
+ hexadecimal digits, including the empty string.
+ """
+ for s in self.badStrings:
+ self.assertIs(False, http._ishexdigits(s))
+
+ def test_decodeNotHex(self):
+ """
+ L{_hexint()} raises L{ValueError} for bytestrings that can't
+ be decoded.
+ """
+ for s in self.badStrings:
+ self.assertRaises(ValueError, http._hexint, s)
--
2.39.2