From 3ccc7d583e325ceb0ebdf8fc295bbb7fc8cd404d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= Date: Sat, 2 Mar 2024 06:51:01 +0100 Subject: [PATCH] Fix test_elementtree with Expat 2.6.0 (GH-407) Feeding the parser by too small chunks defers parsing to prevent CVE-2023-52425. Future versions of Expat may be more reactive. Heavily inspired by https://github.com/python/cpython/commit/4a08e7b3431cd32a0daf22a33421cd3035343dc4 We cannot use a @fails_with_expat_2_6_0 decorator because the test passes in ETreePullTestCase. See https://github.com/python/cpython/issues/115133 See https://github.com/advisories/GHSA-gh68-jm46-84rf Co-authored-by: Serhiy Storchaka --- src/lxml/tests/test_elementtree.py | 62 +++++++++++++++++++----------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/src/lxml/tests/test_elementtree.py b/src/lxml/tests/test_elementtree.py index 8ccf4442a..ef923c5ce 100644 --- a/src/lxml/tests/test_elementtree.py +++ b/src/lxml/tests/test_elementtree.py @@ -10,6 +10,7 @@ import io import operator import os +import pyexpat import re import sys import textwrap @@ -4383,29 +4384,44 @@ def assert_event_tags(self, parser, expected, max_events=None): self.assertEqual([(action, elem.tag) for action, elem in events], expected) - def test_simple_xml(self): - for chunk_size in (None, 1, 5): - #with self.subTest(chunk_size=chunk_size): - parser = self.etree.XMLPullParser() - self.assert_event_tags(parser, []) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, []) - self._feed(parser, - "\n text\n", chunk_size) - self.assert_event_tags(parser, [('end', 'element')]) - self._feed(parser, "texttail\n", chunk_size) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [ - ('end', 'element'), - ('end', 'empty-element'), - ]) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [('end', 'root')]) - root = self._close_and_return_root(parser) - self.assertEqual(root.tag, 'root') + def test_simple_xml(self, chunk_size=None): + parser = self.etree.XMLPullParser() + self.assert_event_tags(parser, []) + self._feed(parser, "\n", chunk_size) + self.assert_event_tags(parser, []) + self._feed(parser, + "\n text\n", chunk_size) + self.assert_event_tags(parser, [('end', 'element')]) + self._feed(parser, "texttail\n", chunk_size) + self._feed(parser, "\n", chunk_size) + self.assert_event_tags(parser, [ + ('end', 'element'), + ('end', 'empty-element'), + ]) + self._feed(parser, "\n", chunk_size) + self.assert_event_tags(parser, [('end', 'root')]) + root = self._close_and_return_root(parser) + self.assertEqual(root.tag, 'root') + + def test_simple_xml_chunk_1(self): + if self.etree is not etree and pyexpat.version_info >= (2, 6, 0): + raise unittest.SkipTest( + "Feeding the parser by too small chunks defers parsing" + ) + self.test_simple_xml(chunk_size=1) + + def test_simple_xml_chunk_5(self): + if self.etree is not etree and pyexpat.version_info >= (2, 6, 0): + raise unittest.SkipTest( + "Feeding the parser by too small chunks defers parsing" + ) + self.test_simple_xml(chunk_size=5) + + def test_simple_xml_chunk_22(self): + self.test_simple_xml(chunk_size=22) def test_feed_while_iterating(self): parser = self.etree.XMLPullParser()