Skip to content

Commit 4748bb1

Browse files
junitxml: fix supplementary plane characters incorrectly escaped (#14484)
Closes #14483
1 parent ea98f72 commit 4748bb1

3 files changed

Lines changed: 4 additions & 9 deletions

File tree

changelog/14483.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed JUnit XML report incorrectly escaping high Unicode codepoints (supplementary plane characters like emoji) in test failure messages. -- by :user:`EternalRights`

src/_pytest/junitxml.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@ def repl(matchobj: re.Match[str]) -> str:
5555
# The spec range of valid chars is:
5656
# Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
5757
# For an unknown(?) reason, we disallow #x7F (DEL) as well.
58-
illegal_xml_re = (
59-
"[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\u10000-\u10ffff]"
60-
)
58+
illegal_xml_re = "[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\U00010000-\U0010ffff]"
6159
return re.sub(illegal_xml_re, repl, str(arg))
6260

6361

testing/test_junitxml.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,9 +1114,6 @@ def test_invalid_xml_escape() -> None:
11141114
# Test some more invalid xml chars, the full range should be
11151115
# tested really but let's just test the edges of the ranges
11161116
# instead.
1117-
# XXX This only tests low unicode character points for now as
1118-
# there are some issues with the testing infrastructure for
1119-
# the higher ones.
11201117
# XXX Testing 0xD (\r) is tricky as it overwrites the just written
11211118
# line in the output, so we skip it too.
11221119
invalid = (
@@ -1131,9 +1128,8 @@ def test_invalid_xml_escape() -> None:
11311128
0xDFFF,
11321129
0xFFFE,
11331130
0x0FFFF,
1134-
) # , 0x110000)
1135-
valid = (0x9, 0xA, 0x20)
1136-
# 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF)
1131+
)
1132+
valid = (0x9, 0xA, 0x20, 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF)
11371133

11381134
for i in invalid:
11391135
got = bin_xml_escape(chr(i))

0 commit comments

Comments
 (0)