python3.11: update to 3.11.8

This commit is contained in:
Raven 2024-03-22 12:56:14 +06:00
parent d4f367b332
commit 6a3161ed61
9 changed files with 428 additions and 593 deletions

View File

@ -61,10 +61,10 @@ index 01d5331a63..79f70f0de4 100644
else:
if self.exec_prefix is None:
diff --git a/Lib/site.py b/Lib/site.py
index 69670d9d7f..104cb93899 100644
index 2904e44cff..1c9bfa7713 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -377,8 +377,15 @@ def getsitepackages(prefixes=None):
@@ -387,8 +387,15 @@ def getsitepackages(prefixes=None):
return sitepackages
def addsitepackages(known_paths, prefixes=None):

View File

@ -1,4 +1,4 @@
From c96f1bea2ffc5c0ca849d5406236c07ea229a64f Mon Sep 17 00:00:00 2001
From ecc5137120f471c22ff6dcb1bd128561c31e023c Mon Sep 17 00:00:00 2001
From: Charalampos Stratakis <cstratak@redhat.com>
Date: Thu, 12 Dec 2019 16:58:31 +0100
Subject: [PATCH 1/7] Expose blake2b and blake2s hashes from OpenSSL
@ -29,10 +29,10 @@ index 67becdd..6607ef7 100644
computed = m.hexdigest() if not shake else m.hexdigest(length)
self.assertEqual(
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
index 3c40f09..e819d02 100644
index 57d64bd..d0c3b9e 100644
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -1077,6 +1077,41 @@ _hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj,
@@ -1078,6 +1078,41 @@ _hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj,
}
@ -74,7 +74,7 @@ index 3c40f09..e819d02 100644
#ifdef PY_OPENSSL_HAS_SHA3
/*[clinic input]
@@ -2065,6 +2100,8 @@ static struct PyMethodDef EVP_functions[] = {
@@ -2066,6 +2101,8 @@ static struct PyMethodDef EVP_functions[] = {
_HASHLIB_OPENSSL_SHA256_METHODDEF
_HASHLIB_OPENSSL_SHA384_METHODDEF
_HASHLIB_OPENSSL_SHA512_METHODDEF
@ -205,10 +205,10 @@ index 5d84f4a..011026a 100644
-/*[clinic end generated code: output=69f2374071bff707 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=c6a9af5563972eda input=a9049054013a1b77]*/
--
2.39.1
2.43.0
From 9a7e164840aa35602e1c6dddadd461fafc666a63 Mon Sep 17 00:00:00 2001
From 0198d467525e79cb4be4418708719af3eaee7a40 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <pviktori@redhat.com>
Date: Thu, 1 Aug 2019 17:57:05 +0200
Subject: [PATCH 2/7] Use a stronger hash in multiprocessing handshake
@ -220,10 +220,10 @@ https://bugs.python.org/issue17258
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py
index b08144f..0497557 100644
index 8b81f99..69c0b7e 100644
--- a/Lib/multiprocessing/connection.py
+++ b/Lib/multiprocessing/connection.py
@@ -42,6 +42,10 @@ BUFSIZE = 8192
@@ -43,6 +43,10 @@ BUFSIZE = 8192
# A very generous timeout when it comes to local connections...
CONNECTION_TIMEOUT = 20.
@ -234,7 +234,7 @@ index b08144f..0497557 100644
_mmap_counter = itertools.count()
default_family = 'AF_INET'
@@ -735,7 +739,7 @@ def deliver_challenge(connection, authkey):
@@ -752,7 +756,7 @@ def deliver_challenge(connection, authkey):
"Authkey must be bytes, not {0!s}".format(type(authkey)))
message = os.urandom(MESSAGE_LENGTH)
connection.send_bytes(CHALLENGE + message)
@ -243,7 +243,7 @@ index b08144f..0497557 100644
response = connection.recv_bytes(256) # reject large message
if response == digest:
connection.send_bytes(WELCOME)
@@ -751,7 +755,7 @@ def answer_challenge(connection, authkey):
@@ -768,7 +772,7 @@ def answer_challenge(connection, authkey):
message = connection.recv_bytes(256) # reject large message
assert message[:len(CHALLENGE)] == CHALLENGE, 'message = %r' % message
message = message[len(CHALLENGE):]
@ -253,10 +253,10 @@ index b08144f..0497557 100644
response = connection.recv_bytes(256) # reject large message
if response != WELCOME:
--
2.39.1
2.43.0
From 10b91783a2f22153738c5658a98daf7475ad9a8c Mon Sep 17 00:00:00 2001
From a7822e2e1f21529e9730885bd8c9c6ab7c704d5b Mon Sep 17 00:00:00 2001
From: Petr Viktorin <pviktori@redhat.com>
Date: Thu, 25 Jul 2019 17:19:06 +0200
Subject: [PATCH 3/7] Disable Python's hash implementations in FIPS mode,
@ -359,7 +359,7 @@ index c2cac98..55b1677 100644
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE)
diff --git a/Modules/_blake2/blake2module.c b/Modules/_blake2/blake2module.c
index 44d783b..d247e44 100644
index 93478f5..e3a024d 100644
--- a/Modules/_blake2/blake2module.c
+++ b/Modules/_blake2/blake2module.c
@@ -13,6 +13,7 @@
@ -370,7 +370,7 @@ index 44d783b..d247e44 100644
#include "blake2module.h"
extern PyType_Spec blake2b_type_spec;
@@ -77,6 +78,7 @@ _blake2_free(void *module)
@@ -83,6 +84,7 @@ _blake2_free(void *module)
static int
blake2_exec(PyObject *m)
{
@ -378,7 +378,7 @@ index 44d783b..d247e44 100644
Blake2State* st = blake2_get_state(m);
st->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec(
@@ -145,5 +147,6 @@ static struct PyModuleDef blake2_module = {
@@ -154,5 +156,6 @@ static struct PyModuleDef blake2_module = {
PyMODINIT_FUNC
PyInit__blake2(void)
{
@ -446,10 +446,10 @@ index 56ae7a5..45fb403 100644
+ if (_Py_hashlib_fips_error(exc, name)) return NULL; \
+} while (0)
diff --git a/configure.ac b/configure.ac
index c62a565..861f7a0 100644
index 52d5c1f..56aff78 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7044,7 +7044,8 @@ PY_STDLIB_MOD([_sha512], [test "$with_builtin_sha512" = yes])
@@ -7069,7 +7069,8 @@ PY_STDLIB_MOD([_sha512], [test "$with_builtin_sha512" = yes])
PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes])
PY_STDLIB_MOD([_blake2],
[test "$with_builtin_blake2" = yes], [],
@ -460,10 +460,10 @@ index c62a565..861f7a0 100644
PY_STDLIB_MOD([_crypt],
[], [test "$ac_cv_crypt_crypt" = yes],
--
2.39.1
2.43.0
From e26066b1c05c9768e38cb6f45d6a01058de55b3f Mon Sep 17 00:00:00 2001
From e9ce6d33544559172dbebbe0c0dfba2757c62331 Mon Sep 17 00:00:00 2001
From: Charalampos Stratakis <cstratak@redhat.com>
Date: Fri, 29 Jan 2021 14:16:21 +0100
Subject: [PATCH 4/7] Use python's fall back crypto implementations only if we
@ -623,10 +623,10 @@ index 01d12f5..a7cdb07 100644
def test_pbkdf2_hmac_py(self):
with warnings_helper.check_warnings():
--
2.39.1
2.43.0
From 9ccbd22b8538fee379717c8b2916dc1ff8b96f07 Mon Sep 17 00:00:00 2001
From 641c617775b6973ed84711a2602ba190fe064474 Mon Sep 17 00:00:00 2001
From: Charalampos Stratakis <cstratak@redhat.com>
Date: Wed, 31 Jul 2019 15:43:43 +0200
Subject: [PATCH 5/7] Test equivalence of hashes for the various digests with
@ -783,10 +783,10 @@ index a7cdb07..c071f28 100644
class KDFTests(unittest.TestCase):
--
2.39.1
2.43.0
From c3b8d6ecc76c87e8b05fd2cb212d5dece50ce0b1 Mon Sep 17 00:00:00 2001
From a706c8342f0f9307d44c43c203702e1476fe73b4 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <pviktori@redhat.com>
Date: Mon, 26 Aug 2019 19:39:48 +0200
Subject: [PATCH 6/7] Guard against Python HMAC in FIPS mode
@ -800,7 +800,7 @@ diff --git a/Lib/hmac.py b/Lib/hmac.py
index 8b4f920..20ef96c 100644
--- a/Lib/hmac.py
+++ b/Lib/hmac.py
@@ -16,8 +16,9 @@ else:
@@ -16,8 +16,9 @@
import hashlib as _hashlib
@ -812,16 +812,9 @@ index 8b4f920..20ef96c 100644
# The size of the digests returned by HMAC depends on the underlying
# hashing module used. Use digest_size from the instance of HMAC instead.
@@ -48,17 +49,18 @@ class HMAC:
msg argument. Passing it as a keyword argument is
recommended, though not required for legacy API reasons.
"""
-
if not isinstance(key, (bytes, bytearray)):
raise TypeError("key: expected bytes or bytearray, but got %r" % type(key).__name__)
@@ -55,10 +56,12 @@
if not digestmod:
raise TypeError("Missing required parameter 'digestmod'.")
raise TypeError("Missing required argument 'digestmod'.")
- if _hashopenssl and isinstance(digestmod, (str, _functype)):
+ if _hashopenssl.get_fips_mode() or (_hashopenssl and isinstance(digestmod, (str, _functype))):
@ -833,7 +826,7 @@ index 8b4f920..20ef96c 100644
self._init_old(key, msg, digestmod)
else:
self._init_old(key, msg, digestmod)
@@ -69,6 +71,9 @@ class HMAC:
@@ -69,6 +72,9 @@
self.block_size = self._hmac.block_size
def _init_old(self, key, msg, digestmod):
@ -844,7 +837,7 @@ index 8b4f920..20ef96c 100644
digest_cons = digestmod
elif isinstance(digestmod, str):
diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py
index 7cf9973..a9e4e39 100644
index a39a2c4..0742a1c 100644
--- a/Lib/test/test_hmac.py
+++ b/Lib/test/test_hmac.py
@@ -5,6 +5,7 @@ import hashlib
@ -875,7 +868,7 @@ index 7cf9973..a9e4e39 100644
with warnings.catch_warnings():
warnings.simplefilter('error', RuntimeWarning)
with self.assertRaises(RuntimeWarning):
@@ -443,6 +450,7 @@ class ConstructorTestCase(unittest.TestCase):
@@ -453,6 +460,7 @@ class ConstructorTestCase(unittest.TestCase):
with self.assertRaisesRegex(TypeError, "immutable type"):
C_HMAC.value = None
@ -883,7 +876,7 @@ index 7cf9973..a9e4e39 100644
@unittest.skipUnless(sha256_module is not None, 'need _sha256')
def test_with_sha256_module(self):
h = hmac.HMAC(b"key", b"hash this!", digestmod=sha256_module.sha256)
@@ -471,6 +479,7 @@ class SanityTestCase(unittest.TestCase):
@@ -481,6 +489,7 @@ class SanityTestCase(unittest.TestCase):
class CopyTestCase(unittest.TestCase):
@ -891,7 +884,7 @@ index 7cf9973..a9e4e39 100644
@hashlib_helper.requires_hashdigest('sha256')
def test_attributes_old(self):
# Testing if attributes are of same type.
@@ -482,6 +491,7 @@ class CopyTestCase(unittest.TestCase):
@@ -492,6 +501,7 @@ class CopyTestCase(unittest.TestCase):
self.assertEqual(type(h1._outer), type(h2._outer),
"Types of outer don't match.")
@ -900,10 +893,10 @@ index 7cf9973..a9e4e39 100644
def test_realcopy_old(self):
# Testing if the copy method created a real copy.
--
2.39.1
2.43.0
From 2b06ee89344e8735cdc8435aadbdf83fe289e934 Mon Sep 17 00:00:00 2001
From 03f1dedfe5d29af20fb3686d76b045384d41d8dd Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 25 Aug 2021 16:44:43 +0200
Subject: [PATCH 7/7] Disable hash-based PYCs in FIPS mode
@ -946,11 +939,11 @@ index db52725..5fca65e 100644
return PycInvalidationMode.CHECKED_HASH
else:
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index c33f90d..7d40540 100644
index dc7a6e6..646b328 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -2225,6 +2225,20 @@ def requires_venv_with_pip():
return unittest.skipUnless(ctypes, 'venv: pip requires ctypes')
@@ -2203,6 +2203,20 @@ def sleeping_retry(timeout, err_msg=None, /,
delay = min(delay * 2, max_delay)
+def fails_in_fips_mode(expected_error):
@ -971,7 +964,7 @@ index c33f90d..7d40540 100644
def adjust_int_max_str_digits(max_digits):
"""Temporarily change the integer string conversion length limit."""
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 4dadbc0..7dc7e51 100644
index 7fcd563..476b557 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -286,6 +286,7 @@ class CmdLineTest(unittest.TestCase):
@ -991,10 +984,10 @@ index 4dadbc0..7dc7e51 100644
with os_helper.temp_dir() as script_dir:
script_name = _make_test_script(script_dir, '__main__')
diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py
index 05154c8..c678d4a 100644
index 9cd92ad..4ec29a1 100644
--- a/Lib/test/test_compileall.py
+++ b/Lib/test/test_compileall.py
@@ -800,14 +800,23 @@ class CommandLineTestsBase:
@@ -806,14 +806,23 @@ class CommandLineTestsBase:
out = self.assertRunOK('badfilename')
self.assertRegex(out, b"Can't list 'badfilename'")
@ -1020,10 +1013,10 @@ index 05154c8..c678d4a 100644
with open(pyc, 'rb') as fp:
data = fp.read()
diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py
index 4bb0390..ff62483 100644
index 4062afd..6bc276d 100644
--- a/Lib/test/test_imp.py
+++ b/Lib/test/test_imp.py
@@ -350,6 +350,7 @@ class ImportTests(unittest.TestCase):
@@ -352,6 +352,7 @@ class ImportTests(unittest.TestCase):
import _frozen_importlib
self.assertEqual(_frozen_importlib.__spec__.origin, "frozen")
@ -1031,7 +1024,7 @@ index 4bb0390..ff62483 100644
def test_source_hash(self):
self.assertEqual(_imp.source_hash(42, b'hi'), b'\xfb\xd9G\x05\xaf$\x9b~')
self.assertEqual(_imp.source_hash(43, b'hi'), b'\xd0/\x87C\xccC\xff\xe2')
@@ -369,6 +370,7 @@ class ImportTests(unittest.TestCase):
@@ -371,6 +372,7 @@ class ImportTests(unittest.TestCase):
res = script_helper.assert_python_ok(*args)
self.assertEqual(res.out.strip().decode('utf-8'), expected)
@ -1092,10 +1085,10 @@ index 378dcbe..7b223a1 100644
with util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode(
diff --git a/Lib/test/test_py_compile.py b/Lib/test/test_py_compile.py
index e53f5d9..7266212 100644
index 9b420d2..dd6460a 100644
--- a/Lib/test/test_py_compile.py
+++ b/Lib/test/test_py_compile.py
@@ -141,13 +141,16 @@ class PyCompileTestsBase:
@@ -143,13 +143,16 @@ class PyCompileTestsBase:
importlib.util.cache_from_source(bad_coding)))
def test_source_date_epoch(self):
@ -1113,7 +1106,7 @@ index e53f5d9..7266212 100644
expected_flags = 0b11
else:
expected_flags = 0b00
@@ -178,7 +181,8 @@ class PyCompileTestsBase:
@@ -180,7 +183,8 @@ class PyCompileTestsBase:
# Specifying optimized bytecode should lead to a path reflecting that.
self.assertIn('opt-2', py_compile.compile(self.source_path, optimize=2))
@ -1123,7 +1116,7 @@ index e53f5d9..7266212 100644
py_compile.compile(
self.source_path,
invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
@@ -187,6 +191,9 @@ class PyCompileTestsBase:
@@ -189,6 +193,9 @@ class PyCompileTestsBase:
flags = importlib._bootstrap_external._classify_pyc(
fp.read(), 'test', {})
self.assertEqual(flags, 0b11)
@ -1154,10 +1147,10 @@ index 59a5200..81fadb3 100644
def test_checked_hash_based_change_pyc(self):
source = b"state = 'old'"
diff --git a/Python/import.c b/Python/import.c
index 07a8b90..e97b47b 100644
index 39144d3..b439059 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -2437,6 +2437,26 @@ static PyObject *
@@ -2449,6 +2449,26 @@ static PyObject *
_imp_source_hash_impl(PyObject *module, long key, Py_buffer *source)
/*[clinic end generated code: output=edb292448cf399ea input=9aaad1e590089789]*/
{
@ -1185,5 +1178,5 @@ index 07a8b90..e97b47b 100644
uint64_t x;
char data[sizeof(uint64_t)];
--
2.39.1
2.43.0

View File

@ -16,10 +16,10 @@ https://github.com/GrahamDumpleton/mod_wsgi/issues/730
2 files changed, 8 insertions(+), 50 deletions(-)
diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py
index 9c6561c099..84714c03fe 100644
index ec6a319486..1dbb9d0baa 100644
--- a/Lib/test/test_threading.py
+++ b/Lib/test/test_threading.py
@@ -956,39 +956,6 @@ def test_debug_deprecation(self):
@@ -1045,39 +1045,6 @@ def test_debug_deprecation(self):
b'is deprecated and will be removed in Python 3.12')
self.assertIn(msg, err)
@ -60,10 +60,10 @@ index 9c6561c099..84714c03fe 100644
class ThreadJoinOnShutdown(BaseTestCase):
diff --git a/Lib/threading.py b/Lib/threading.py
index 4f72938551..18c10e6489 100644
index 29b8ec7465..2145f5a6dc 100644
--- a/Lib/threading.py
+++ b/Lib/threading.py
@@ -1546,29 +1546,20 @@ def _shutdown():
@@ -1553,29 +1553,20 @@ def _shutdown():
global _SHUTTING_DOWN
_SHUTTING_DOWN = True

View File

@ -1,47 +0,0 @@
From db083095e3bdb93e4f8170d814664c482b1e94da Mon Sep 17 00:00:00 2001
From: rpm-build <rpm-build>
Date: Tue, 14 Jun 2022 06:38:43 +0200
Subject: [PATCH] Fix test suite for Expat >= 2.4.5
---
Lib/test/test_minidom.py | 17 +++++------------
1 file changed, 5 insertions(+), 12 deletions(-)
diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
index 9762025..5f52ed1 100644
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -1149,14 +1149,10 @@ class MinidomTest(unittest.TestCase):
# Verify that character decoding errors raise exceptions instead
# of crashing
- if pyexpat.version_info >= (2, 4, 5):
- self.assertRaises(ExpatError, parseString,
- b'<fran\xe7ais></fran\xe7ais>')
- self.assertRaises(ExpatError, parseString,
- b'<franais>Comment \xe7a va ? Tr\xe8s bien ?</franais>')
- else:
- self.assertRaises(UnicodeDecodeError, parseString,
- b'<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
+ self.assertRaises(ExpatError, parseString,
+ b'<fran\xe7ais></fran\xe7ais>')
+ self.assertRaises(ExpatError, parseString,
+ b'<franais>Comment \xe7a va ? Tr\xe8s bien ?</franais>')
doc.unlink()
@@ -1617,10 +1613,7 @@ class MinidomTest(unittest.TestCase):
self.confirm(doc2.namespaceURI == xml.dom.EMPTY_NAMESPACE)
def testExceptionOnSpacesInXMLNSValue(self):
- if pyexpat.version_info >= (2, 4, 5):
- context = self.assertRaisesRegex(ExpatError, 'syntax error')
- else:
- context = self.assertRaisesRegex(ValueError, 'Unsupported syntax')
+ context = self.assertRaisesRegex(ExpatError, 'syntax error')
with context:
parseString('<element xmlns:abc="http:abc.com/de f g/hi/j k"><abc:foo /></element>')
--
2.35.3

View File

@ -0,0 +1,250 @@
From 8b70605b594b3831331a9340ba764ff751871612 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Mon, 6 Mar 2023 17:24:24 +0100
Subject: [PATCH] CVE-2007-4559, PEP-706: Add filters for tarfile extraction
(downstream)
Add and test RHEL-specific ways of configuring the default behavior: environment
variable and config file.
---
Lib/tarfile.py | 42 +++++++++++++
Lib/test/test_shutil.py | 3 +-
Lib/test/test_tarfile.py | 128 ++++++++++++++++++++++++++++++++++++++-
3 files changed, 169 insertions(+), 4 deletions(-)
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 130b5e0..3b7d8d5 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -72,6 +72,13 @@ __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
"ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
"DEFAULT_FORMAT", "open"]
+# If true, use the safer (but backwards-incompatible) 'tar' extraction filter,
+# rather than 'fully_trusted', by default.
+# The emitted warning is changed to match.
+_RH_SAFER_DEFAULT = True
+
+# System-wide configuration file
+_CONFIG_FILENAME = '/etc/python/tarfile.cfg'
#---------------------------------------------------------
# tar constants
@@ -2211,6 +2218,41 @@ class TarFile(object):
if filter is None:
filter = self.extraction_filter
if filter is None:
+ name = os.environ.get('PYTHON_TARFILE_EXTRACTION_FILTER')
+ if name is None:
+ try:
+ file = bltn_open(_CONFIG_FILENAME)
+ except FileNotFoundError:
+ pass
+ else:
+ import configparser
+ conf = configparser.ConfigParser(
+ interpolation=None,
+ comment_prefixes=('#', ),
+ )
+ with file:
+ conf.read_file(file)
+ name = conf.get('tarfile',
+ 'PYTHON_TARFILE_EXTRACTION_FILTER',
+ fallback='')
+ if name:
+ try:
+ filter = _NAMED_FILTERS[name]
+ except KeyError:
+ raise ValueError(f"filter {filter!r} not found") from None
+ self.extraction_filter = filter
+ return filter
+ if _RH_SAFER_DEFAULT:
+ warnings.warn(
+ 'The default behavior of tarfile extraction has been '
+ + 'changed to disallow common exploits '
+ + '(including CVE-2007-4559). '
+ + 'By default, absolute/parent paths are disallowed '
+ + 'and some mode bits are cleared. '
+ + 'See https://access.redhat.com/articles/7004769 '
+ + 'for more details.',
+ RuntimeWarning)
+ return tar_filter
return fully_trusted_filter
if isinstance(filter, str):
raise TypeError(
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index 9bf4145..f247b82 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -1665,7 +1665,8 @@ class TestArchives(BaseTest, unittest.TestCase):
def check_unpack_tarball(self, format):
self.check_unpack_archive(format, filter='fully_trusted')
self.check_unpack_archive(format, filter='data')
- with warnings_helper.check_no_warnings(self):
+ with warnings_helper.check_warnings(
+ ('.*CVE-2007-4559', RuntimeWarning)):
self.check_unpack_archive(format)
def test_unpack_archive_tar(self):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index cdea033..4724285 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -3,7 +3,7 @@
import os
import io
from hashlib import sha256
-from contextlib import contextmanager
+from contextlib import contextmanager, ExitStack
from random import Random
import pathlib
import shutil
@@ -3025,7 +3025,11 @@
tar = tarfile.open(tarname, mode='r', encoding="iso8859-1")
cls.control_dir = pathlib.Path(TEMPDIR) / "extractall_ctrl"
tar.errorlevel = 0
- tar.extractall(cls.control_dir, filter=cls.extraction_filter)
+ with ExitStack() as cm:
+ if cls.extraction_filter is None:
+ cm.enter_context(warnings.catch_warnings())
+ warnings.simplefilter(action="ignore", category=RuntimeWarning)
+ tar.extractall(cls.control_dir, filter=cls.extraction_filter)
tar.close()
cls.control_paths = set(
p.relative_to(cls.control_dir)
@@ -3844,7 +3848,8 @@
"""Ensure the default filter does not warn (like in 3.12)"""
with ArchiveMaker() as arc:
arc.add('foo')
- with warnings_helper.check_no_warnings(self):
+ with warnings_helper.check_warnings(
+ ('.*CVE-2007-4559', RuntimeWarning)):
with self.check_context(arc.open(), None):
self.expect_file('foo')
@@ -4013,6 +4018,122 @@
with self.check_context(arc.open(errorlevel='boo!'), filtererror_filter):
self.expect_exception(TypeError) # errorlevel is not int
+ @contextmanager
+ def rh_config_context(self, config_lines=None):
+ """Set up for testing various ways of overriding the default filter
+
+ return a triple with:
+ - temporary directory
+ - EnvironmentVarGuard()
+ - a test archive for use with check_* methods below
+
+ If config_lines is given, write them to the config file. Otherwise
+ the config file is missing.
+ """
+ tempdir = pathlib.Path(TEMPDIR) / 'tmp'
+ configfile = tempdir / 'tarfile.cfg'
+ with ArchiveMaker() as arc:
+ arc.add('good')
+ arc.add('ugly', symlink_to='/etc/passwd')
+ arc.add('../bad')
+ with (
+ os_helper.temp_dir(tempdir),
+ support.swap_attr(tarfile, '_CONFIG_FILENAME', str(configfile)),
+ os_helper.EnvironmentVarGuard() as env,
+ arc.open() as tar,
+ ):
+ if config_lines is not None:
+ with configfile.open('w') as f:
+ for line in config_lines:
+ print(line, file=f)
+ yield tempdir, env, tar
+
+ def check_rh_default_behavior(self, tar, tempdir):
+ """Check RH default: warn and refuse to extract dangerous files."""
+ with (
+ warnings_helper.check_warnings(
+ ('.*CVE-2007-4559', RuntimeWarning)),
+ self.assertRaises(tarfile.OutsideDestinationError),
+ ):
+ tar.extractall(tempdir / 'outdir')
+
+ def check_trusted_default(self, tar, tempdir):
+ """Check 'fully_trusted' is configured as the default filter."""
+ with (
+ warnings_helper.check_no_warnings(self),
+ ):
+ tar.extractall(tempdir / 'outdir')
+ self.assertTrue((tempdir / 'outdir/good').exists())
+ self.assertEqual((tempdir / 'outdir/ugly').readlink(),
+ pathlib.Path('/etc/passwd'))
+ self.assertTrue((tempdir / 'bad').exists())
+
+ def test_rh_default_no_conf(self):
+ with self.rh_config_context() as (tempdir, env, tar):
+ self.check_rh_default_behavior(tar, tempdir)
+
+ def test_rh_default_from_file(self):
+ lines = ['[tarfile]', 'PYTHON_TARFILE_EXTRACTION_FILTER=fully_trusted']
+ with self.rh_config_context(lines) as (tempdir, env, tar):
+ self.check_trusted_default(tar, tempdir)
+
+ def test_rh_empty_config_file(self):
+ """Empty config file -> default behavior"""
+ lines = []
+ with self.rh_config_context(lines) as (tempdir, env, tar):
+ self.check_rh_default_behavior(tar, tempdir)
+
+ def test_empty_config_section(self):
+ """Empty section in config file -> default behavior"""
+ lines = ['[tarfile]']
+ with self.rh_config_context(lines) as (tempdir, env, tar):
+ self.check_rh_default_behavior(tar, tempdir)
+
+ def test_rh_default_empty_config_option(self):
+ """Empty option value in config file -> default behavior"""
+ lines = ['[tarfile]', 'PYTHON_TARFILE_EXTRACTION_FILTER=']
+ with self.rh_config_context(lines) as (tempdir, env, tar):
+ self.check_rh_default_behavior(tar, tempdir)
+
+ def test_bad_config_option(self):
+ """Bad option value in config file -> ValueError"""
+ lines = ['[tarfile]', 'PYTHON_TARFILE_EXTRACTION_FILTER=unknown!']
+ with self.rh_config_context(lines) as (tempdir, env, tar):
+ with self.assertRaises(ValueError):
+ tar.extractall(tempdir / 'outdir')
+
+ def test_default_from_envvar(self):
+ with self.rh_config_context() as (tempdir, env, tar):
+ env['PYTHON_TARFILE_EXTRACTION_FILTER'] = 'fully_trusted'
+ self.check_trusted_default(tar, tempdir)
+
+ def test_empty_envvar(self):
+ """Empty env variable -> default behavior"""
+ with self.rh_config_context() as (tempdir, env, tar):
+ env['PYTHON_TARFILE_EXTRACTION_FILTER'] = ''
+ self.check_rh_default_behavior(tar, tempdir)
+
+ def test_bad_envvar(self):
+ with self.rh_config_context() as (tempdir, env, tar):
+ env['PYTHON_TARFILE_EXTRACTION_FILTER'] = 'unknown!'
+ with self.assertRaises(ValueError):
+ tar.extractall(tempdir / 'outdir')
+
+ def test_envvar_overrides_file(self):
+ lines = ['[tarfile]', 'PYTHON_TARFILE_EXTRACTION_FILTER=data']
+ with self.rh_config_context(lines) as (tempdir, env, tar):
+ env['PYTHON_TARFILE_EXTRACTION_FILTER'] = 'fully_trusted'
+ self.check_trusted_default(tar, tempdir)
+
+ def test_monkeypatch_overrides_envvar(self):
+ with self.rh_config_context(None) as (tempdir, env, tar):
+ env['PYTHON_TARFILE_EXTRACTION_FILTER'] = 'data'
+ with support.swap_attr(
+ tarfile.TarFile, 'extraction_filter',
+ staticmethod(tarfile.fully_trusted_filter)
+ ):
+ self.check_trusted_default(tar, tempdir)
+
class OverwriteTests(archiver_tests.OverwriteTests, unittest.TestCase):
testdir = os.path.join(TEMPDIR, "testoverwrite")
--
2.41.0

View File

@ -1,229 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Wed, 17 May 2023 14:41:25 -0700
Subject: [PATCH] 00399: CVE-2023-24329
* gh-102153: Start stripping C0 control and space chars in `urlsplit` (GH-102508)
`urllib.parse.urlsplit` has already been respecting the WHATWG spec a bit GH-25595.
This adds more sanitizing to respect the "Remove any leading C0 control or space from input" [rule](https://url.spec.whatwg.org/GH-url-parsing:~:text=Remove%20any%20leading%20and%20trailing%20C0%20control%20or%20space%20from%20input.) in response to [CVE-2023-24329](https://nvd.nist.gov/vuln/detail/CVE-2023-24329).
---------
(cherry picked from commit 2f630e1ce18ad2e07428296532a68b11dc66ad10)
Co-authored-by: Illia Volochii <illia.volochii@gmail.com>
Co-authored-by: Gregory P. Smith [Google] <greg@krypto.org>
---
Doc/library/urllib.parse.rst | 46 +++++++++++++-
Lib/test/test_urlparse.py | 61 ++++++++++++++++++-
Lib/urllib/parse.py | 12 ++++
...-03-07-20-59-17.gh-issue-102153.14CLSZ.rst | 3 +
4 files changed, 119 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 96b3965107..a326e82e30 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -159,6 +159,10 @@ or on combining URL components into a URL string.
ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
params='', query='', fragment='')
+ .. warning::
+
+ :func:`urlparse` does not perform validation. See :ref:`URL parsing
+ security <url-parsing-security>` for details.
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
@@ -324,8 +328,14 @@ or on combining URL components into a URL string.
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
- Following the `WHATWG spec`_ that updates RFC 3986, ASCII newline
- ``\n``, ``\r`` and tab ``\t`` characters are stripped from the URL.
+ Following some of the `WHATWG spec`_ that updates RFC 3986, leading C0
+ control and space characters are stripped from the URL. ``\n``,
+ ``\r`` and tab ``\t`` characters are removed from the URL at any position.
+
+ .. warning::
+
+ :func:`urlsplit` does not perform validation. See :ref:`URL parsing
+ security <url-parsing-security>` for details.
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
@@ -338,6 +348,9 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.10
ASCII newline and tab characters are stripped from the URL.
+ .. versionchanged:: 3.11.4
+ Leading WHATWG C0 control and space characters are stripped from the URL.
+
.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
.. function:: urlunsplit(parts)
@@ -414,6 +427,35 @@ or on combining URL components into a URL string.
or ``scheme://host/path``). If *url* is not a wrapped URL, it is returned
without changes.
+.. _url-parsing-security:
+
+URL parsing security
+--------------------
+
+The :func:`urlsplit` and :func:`urlparse` APIs do not perform **validation** of
+inputs. They may not raise errors on inputs that other applications consider
+invalid. They may also succeed on some inputs that might not be considered
+URLs elsewhere. Their purpose is for practical functionality rather than
+purity.
+
+Instead of raising an exception on unusual input, they may instead return some
+component parts as empty strings. Or components may contain more than perhaps
+they should.
+
+We recommend that users of these APIs where the values may be used anywhere
+with security implications code defensively. Do some verification within your
+code before trusting a returned component part. Does that ``scheme`` make
+sense? Is that a sensible ``path``? Is there anything strange about that
+``hostname``? etc.
+
+What constitutes a URL is not universally well defined. Different applications
+have different needs and desired constraints. For instance the living `WHATWG
+spec`_ describes what user facing web clients such as a web browser require.
+While :rfc:`3986` is more general. These functions incorporate some aspects of
+both, but cannot be claimed compliant with either. The APIs and existing user
+code with expectations on specific behaviors predate both standards leading us
+to be very cautious about making API behavior changes.
+
.. _parsing-ascii-encoded-bytes:
Parsing ASCII Encoded Bytes
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index b426110723..40f13d631c 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -649,6 +649,65 @@ def test_urlsplit_remove_unsafe_bytes(self):
self.assertEqual(p.scheme, "http")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+ def test_urlsplit_strip_url(self):
+ noise = bytes(range(0, 0x20 + 1))
+ base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
+
+ url = noise.decode("utf-8") + base_url
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, "http")
+ self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
+ self.assertEqual(p.path, "/doc/")
+ self.assertEqual(p.query, "query=yes")
+ self.assertEqual(p.fragment, "frag")
+ self.assertEqual(p.username, "User")
+ self.assertEqual(p.password, "Pass")
+ self.assertEqual(p.hostname, "www.python.org")
+ self.assertEqual(p.port, 80)
+ self.assertEqual(p.geturl(), base_url)
+
+ url = noise + base_url.encode("utf-8")
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, b"http")
+ self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
+ self.assertEqual(p.path, b"/doc/")
+ self.assertEqual(p.query, b"query=yes")
+ self.assertEqual(p.fragment, b"frag")
+ self.assertEqual(p.username, b"User")
+ self.assertEqual(p.password, b"Pass")
+ self.assertEqual(p.hostname, b"www.python.org")
+ self.assertEqual(p.port, 80)
+ self.assertEqual(p.geturl(), base_url.encode("utf-8"))
+
+ # Test that trailing space is preserved as some applications rely on
+ # this within query strings.
+ query_spaces_url = "https://www.python.org:88/doc/?query= "
+ p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.netloc, "www.python.org:88")
+ self.assertEqual(p.path, "/doc/")
+ self.assertEqual(p.query, "query= ")
+ self.assertEqual(p.port, 88)
+ self.assertEqual(p.geturl(), query_spaces_url)
+
+ p = urllib.parse.urlsplit("www.pypi.org ")
+ # That "hostname" gets considered a "path" due to the
+ # trailing space and our existing logic... YUCK...
+ # and re-assembles via geturl aka unurlsplit into the original.
+ # django.core.validators.URLValidator (at least through v3.2) relies on
+ # this, for better or worse, to catch it in a ValidationError via its
+ # regular expressions.
+ # Here we test the basic round trip concept of such a trailing space.
+ self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
+
+ # with scheme as cache-key
+ url = "//www.python.org/"
+ scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
+ for _ in range(2):
+ p = urllib.parse.urlsplit(url, scheme=scheme)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.geturl(), "https://www.python.org/")
+
def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
for bytes in (False, True):
@@ -656,7 +715,7 @@ def test_attributes_bad_port(self):
for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
with self.subTest(bytes=bytes, parse=parse, port=port):
netloc = "www.example.net:" + port
- url = "http://" + netloc
+ url = "http://" + netloc + "/"
if bytes:
if netloc.isascii() and port.isascii():
netloc = netloc.encode("ascii")
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 69631cbb81..4f06fd509e 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -25,6 +25,10 @@
scenarios for parsing, and for backward compatibility purposes, some
parsing quirks from older RFCs are retained. The testcases in
test_urlparse.py provides a good indicator of parsing behavior.
+
+The WHATWG URL Parser spec should also be considered. We are not compliant with
+it either due to existing user code API behavior expectations (Hyrum's Law).
+It serves as a useful guide when making changes.
"""
from collections import namedtuple
@@ -79,6 +83,10 @@
'0123456789'
'+-.')
+# Leading and trailing C0 control and space to be stripped per WHATWG spec.
+# == "".join([chr(i) for i in range(0, 0x20 + 1)])
+_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
+
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
@@ -452,6 +460,10 @@ def urlsplit(url, scheme='', allow_fragments=True):
"""
url, scheme, _coerce_result = _coerce_args(url, scheme)
+ # Only lstrip url as some applications rely on preserving trailing space.
+ # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
+ url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
+ scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
diff --git a/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
new file mode 100644
index 0000000000..e57ac4ed3a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
@@ -0,0 +1,3 @@
+:func:`urllib.parse.urlsplit` now strips leading C0 control and space
+characters following the specification for URLs defined by WHATWG in
+response to CVE-2023-24329. Patch by Illia Volochii.

View File

@ -1,230 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Wed, 17 May 2023 14:41:25 -0700
Subject: [PATCH] 00399: gh-102153: Start stripping C0 control and space chars
in `urlsplit` (GH-102508) (#104575)
* gh-102153: Start stripping C0 control and space chars in `urlsplit` (GH-102508)
`urllib.parse.urlsplit` has already been respecting the WHATWG spec a bit GH-25595.
This adds more sanitizing to respect the "Remove any leading C0 control or space from input" [rule](https://url.spec.whatwg.org/GH-url-parsing:~:text=Remove%20any%20leading%20and%20trailing%20C0%20control%20or%20space%20from%20input.) in response to [CVE-2023-24329](https://nvd.nist.gov/vuln/detail/CVE-2023-24329).
---------
(cherry picked from commit 2f630e1ce18ad2e07428296532a68b11dc66ad10)
Co-authored-by: Illia Volochii <illia.volochii@gmail.com>
Co-authored-by: Gregory P. Smith [Google] <greg@krypto.org>
---
Doc/library/urllib.parse.rst | 46 +++++++++++++-
Lib/test/test_urlparse.py | 61 ++++++++++++++++++-
Lib/urllib/parse.py | 12 ++++
...-03-07-20-59-17.gh-issue-102153.14CLSZ.rst | 3 +
4 files changed, 119 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 96b3965107..a326e82e30 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -159,6 +159,10 @@ or on combining URL components into a URL string.
ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
params='', query='', fragment='')
+ .. warning::
+
+ :func:`urlparse` does not perform validation. See :ref:`URL parsing
+ security <url-parsing-security>` for details.
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
@@ -324,8 +328,14 @@ or on combining URL components into a URL string.
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
- Following the `WHATWG spec`_ that updates RFC 3986, ASCII newline
- ``\n``, ``\r`` and tab ``\t`` characters are stripped from the URL.
+ Following some of the `WHATWG spec`_ that updates RFC 3986, leading C0
+ control and space characters are stripped from the URL. ``\n``,
+ ``\r`` and tab ``\t`` characters are removed from the URL at any position.
+
+ .. warning::
+
+ :func:`urlsplit` does not perform validation. See :ref:`URL parsing
+ security <url-parsing-security>` for details.
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
@@ -338,6 +348,9 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.10
ASCII newline and tab characters are stripped from the URL.
+ .. versionchanged:: 3.11.4
+ Leading WHATWG C0 control and space characters are stripped from the URL.
+
.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
.. function:: urlunsplit(parts)
@@ -414,6 +427,35 @@ or on combining URL components into a URL string.
or ``scheme://host/path``). If *url* is not a wrapped URL, it is returned
without changes.
+.. _url-parsing-security:
+
+URL parsing security
+--------------------
+
+The :func:`urlsplit` and :func:`urlparse` APIs do not perform **validation** of
+inputs. They may not raise errors on inputs that other applications consider
+invalid. They may also succeed on some inputs that might not be considered
+URLs elsewhere. Their purpose is for practical functionality rather than
+purity.
+
+Instead of raising an exception on unusual input, they may instead return some
+component parts as empty strings. Or components may contain more than perhaps
+they should.
+
+We recommend that users of these APIs where the values may be used anywhere
+with security implications code defensively. Do some verification within your
+code before trusting a returned component part. Does that ``scheme`` make
+sense? Is that a sensible ``path``? Is there anything strange about that
+``hostname``? etc.
+
+What constitutes a URL is not universally well defined. Different applications
+have different needs and desired constraints. For instance the living `WHATWG
+spec`_ describes what user facing web clients such as a web browser require.
+While :rfc:`3986` is more general. These functions incorporate some aspects of
+both, but cannot be claimed compliant with either. The APIs and existing user
+code with expectations on specific behaviors predate both standards leading us
+to be very cautious about making API behavior changes.
+
.. _parsing-ascii-encoded-bytes:
Parsing ASCII Encoded Bytes
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index b426110723..40f13d631c 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -649,6 +649,65 @@ def test_urlsplit_remove_unsafe_bytes(self):
self.assertEqual(p.scheme, "http")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+ def test_urlsplit_strip_url(self):
+ noise = bytes(range(0, 0x20 + 1))
+ base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
+
+ url = noise.decode("utf-8") + base_url
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, "http")
+ self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
+ self.assertEqual(p.path, "/doc/")
+ self.assertEqual(p.query, "query=yes")
+ self.assertEqual(p.fragment, "frag")
+ self.assertEqual(p.username, "User")
+ self.assertEqual(p.password, "Pass")
+ self.assertEqual(p.hostname, "www.python.org")
+ self.assertEqual(p.port, 80)
+ self.assertEqual(p.geturl(), base_url)
+
+ url = noise + base_url.encode("utf-8")
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, b"http")
+ self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
+ self.assertEqual(p.path, b"/doc/")
+ self.assertEqual(p.query, b"query=yes")
+ self.assertEqual(p.fragment, b"frag")
+ self.assertEqual(p.username, b"User")
+ self.assertEqual(p.password, b"Pass")
+ self.assertEqual(p.hostname, b"www.python.org")
+ self.assertEqual(p.port, 80)
+ self.assertEqual(p.geturl(), base_url.encode("utf-8"))
+
+ # Test that trailing space is preserved as some applications rely on
+ # this within query strings.
+ query_spaces_url = "https://www.python.org:88/doc/?query= "
+ p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.netloc, "www.python.org:88")
+ self.assertEqual(p.path, "/doc/")
+ self.assertEqual(p.query, "query= ")
+ self.assertEqual(p.port, 88)
+ self.assertEqual(p.geturl(), query_spaces_url)
+
+ p = urllib.parse.urlsplit("www.pypi.org ")
+ # That "hostname" gets considered a "path" due to the
+ # trailing space and our existing logic... YUCK...
+ # and re-assembles via geturl aka unurlsplit into the original.
+ # django.core.validators.URLValidator (at least through v3.2) relies on
+ # this, for better or worse, to catch it in a ValidationError via its
+ # regular expressions.
+ # Here we test the basic round trip concept of such a trailing space.
+ self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
+
+ # with scheme as cache-key
+ url = "//www.python.org/"
+ scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
+ for _ in range(2):
+ p = urllib.parse.urlsplit(url, scheme=scheme)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.geturl(), "https://www.python.org/")
+
def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
for bytes in (False, True):
@@ -656,7 +715,7 @@ def test_attributes_bad_port(self):
for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
with self.subTest(bytes=bytes, parse=parse, port=port):
netloc = "www.example.net:" + port
- url = "http://" + netloc
+ url = "http://" + netloc + "/"
if bytes:
if netloc.isascii() and port.isascii():
netloc = netloc.encode("ascii")
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 69631cbb81..4f06fd509e 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -25,6 +25,10 @@
scenarios for parsing, and for backward compatibility purposes, some
parsing quirks from older RFCs are retained. The testcases in
test_urlparse.py provides a good indicator of parsing behavior.
+
+The WHATWG URL Parser spec should also be considered. We are not compliant with
+it either due to existing user code API behavior expectations (Hyrum's Law).
+It serves as a useful guide when making changes.
"""
from collections import namedtuple
@@ -79,6 +83,10 @@
'0123456789'
'+-.')
+# Leading and trailing C0 control and space to be stripped per WHATWG spec.
+# == "".join([chr(i) for i in range(0, 0x20 + 1)])
+_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
+
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
@@ -452,6 +460,10 @@ def urlsplit(url, scheme='', allow_fragments=True):
"""
url, scheme, _coerce_result = _coerce_args(url, scheme)
+ # Only lstrip url as some applications rely on preserving trailing space.
+ # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
+ url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
+ scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
diff --git a/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
new file mode 100644
index 0000000000..e57ac4ed3a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
@@ -0,0 +1,3 @@
+:func:`urllib.parse.urlsplit` now strips leading C0 control and space
+characters following the specification for URLs defined by WHATWG in
+response to CVE-2023-24329. Patch by Illia Volochii.

View File

@ -0,0 +1,107 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 11 Feb 2024 12:08:39 +0200
Subject: [PATCH] 00422: gh-115133: Fix tests for XMLPullParser with Expat
2.6.0
Feeding the parser by too small chunks defers parsing to prevent
CVE-2023-52425. Future versions of Expat may be more reactive.
(cherry picked from commit 4a08e7b3431cd32a0daf22a33421cd3035343dc4)
---
Lib/test/test_xml_etree.py | 58 ++++++++++++-------
...-02-08-14-21-28.gh-issue-115133.ycl4ko.rst | 2 +
2 files changed, 38 insertions(+), 22 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 267982a823..fa03f381fa 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -13,6 +13,7 @@
import operator
import os
import pickle
+import pyexpat
import sys
import textwrap
import types
@@ -120,6 +121,10 @@
</foo>
"""
+fails_with_expat_2_6_0 = (unittest.expectedFailure
+ if pyexpat.version_info >= (2, 6, 0) else
+ lambda test: test)
+
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
@@ -1400,28 +1405,37 @@ def assert_event_tags(self, parser, expected, max_events=None):
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)
- def test_simple_xml(self):
- for chunk_size in (None, 1, 5):
- with self.subTest(chunk_size=chunk_size):
- parser = ET.XMLPullParser()
- self.assert_event_tags(parser, [])
- self._feed(parser, "<!-- comment -->\n", chunk_size)
- self.assert_event_tags(parser, [])
- self._feed(parser,
- "<root>\n <element key='value'>text</element",
- chunk_size)
- self.assert_event_tags(parser, [])
- self._feed(parser, ">\n", chunk_size)
- self.assert_event_tags(parser, [('end', 'element')])
- self._feed(parser, "<element>text</element>tail\n", chunk_size)
- self._feed(parser, "<empty-element/>\n", chunk_size)
- self.assert_event_tags(parser, [
- ('end', 'element'),
- ('end', 'empty-element'),
- ])
- self._feed(parser, "</root>\n", chunk_size)
- self.assert_event_tags(parser, [('end', 'root')])
- self.assertIsNone(parser.close())
+ def test_simple_xml(self, chunk_size=None):
+ parser = ET.XMLPullParser()
+ self.assert_event_tags(parser, [])
+ self._feed(parser, "<!-- comment -->\n", chunk_size)
+ self.assert_event_tags(parser, [])
+ self._feed(parser,
+ "<root>\n <element key='value'>text</element",
+ chunk_size)
+ self.assert_event_tags(parser, [])
+ self._feed(parser, ">\n", chunk_size)
+ self.assert_event_tags(parser, [('end', 'element')])
+ self._feed(parser, "<element>text</element>tail\n", chunk_size)
+ self._feed(parser, "<empty-element/>\n", chunk_size)
+ self.assert_event_tags(parser, [
+ ('end', 'element'),
+ ('end', 'empty-element'),
+ ])
+ self._feed(parser, "</root>\n", chunk_size)
+ self.assert_event_tags(parser, [('end', 'root')])
+ self.assertIsNone(parser.close())
+
+ @fails_with_expat_2_6_0
+ def test_simple_xml_chunk_1(self):
+ self.test_simple_xml(chunk_size=1)
+
+ @fails_with_expat_2_6_0
+ def test_simple_xml_chunk_5(self):
+ self.test_simple_xml(chunk_size=5)
+
+ def test_simple_xml_chunk_22(self):
+ self.test_simple_xml(chunk_size=22)
def test_feed_while_iterating(self):
parser = ET.XMLPullParser()
diff --git a/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst b/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst
new file mode 100644
index 0000000000..6f1015235c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst
@@ -0,0 +1,2 @@
+Fix tests for :class:`~xml.etree.ElementTree.XMLPullParser` with Expat
+2.6.0.

View File

@ -18,7 +18,7 @@ URL: https://www.python.org/
# WARNING When rebasing to a new Python version,
# remember to update the python3-docs package as well
%global general_version %{pybasever}.4
%global general_version %{pybasever}.8
#global prerel ...
%global upstream_version %{general_version}%{?prerel}
Version: %{general_version}%{?prerel:~%{prerel}}
@ -65,7 +65,7 @@ License: Python
# If the rpmwheels condition is disabled, we use the bundled wheel packages
# from Python with the versions below.
# This needs to be manually updated when we update Python.
%global pip_version 23.1.2
%global pip_version 24.0
%global setuptools_version 65.5.0
# Expensive optimizations (mainly, profile-guided optimizations)
@ -345,31 +345,22 @@ Patch329: 00329-fips.patch
# https://github.com/GrahamDumpleton/mod_wsgi/issues/730
Patch371: 00371-revert-bpo-1596321-fix-threading-_shutdown-for-the-main-thread-gh-28549-gh-28589.patch
# 00397 #
# Filters for tarfile extraction (CVE-2007-4559, PEP-706)
# First patch fixes determination of symlink targets, which were treated
# as relative to the root of the archive,
# rather than the directory containing the symlink.
# Not yet upstream as of this writing.
# The second patch is Red Hat configuration, see KB for documentation:
# - https://access.redhat.com/articles/7004769
Patch397: 00397-tarfile-filter.patch
# 00378 #
# Support expat 2.4.5
# 00422 # a353cebef737c41420dc7ae2469dd657371b8881
# gh-115133: Fix tests for XMLPullParser with Expat 2.6.0
#
# Curly brackets were never allowed in namespace URIs
# according to RFC 3986, and so-called namespace-validating
# XML parsers have the right to reject them a invalid URIs.
#
# libexpat >=2.4.5 has become strcter in that regard due to
# related security issues; with ET.XML instantiating a
# namespace-aware parser under the hood, this test has no
# future in CPython.
#
# References:
# - https://datatracker.ietf.org/doc/html/rfc3968
# - https://www.w3.org/TR/xml-names/
#
# Also, test_minidom.py: Support Expat >=2.4.5
#
# The patch has diverged from upstream as the python test
# suite was relying on checking the expat version, whereas
# in RHEL fixes get backported instead of rebasing packages.
#
# Upstream: https://bugs.python.org/issue46811
Patch378: 00378-support-expat-2-4-5.patch
# Feeding the parser by too small chunks defers parsing to prevent
# CVE-2023-52425. Future versions of Expat may be more reactive.
Patch422: 00422-gh-115133-fix-tests-for-xmlpullparser-with-expat-2-6-0.patch
# (New patches go here ^^^)
#