Skip to content

Commit 5bc9faf

Browse files
authored
MAINT: Comform ASCIIHexDecode implementation to specification (#3274)
If the filter encounters the EOD marker after reading an odd number of hexadecimal digits, it shall behave as if a 0 (zero) followed the last digit [PDF 2.0, §7.4.2].
1 parent d9ba201 commit 5bc9faf

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

pypdf/filters.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def decode(
294294
logger_warning(
295295
"missing EOD in ASCIIHexDecode, check if output is OK", __name__
296296
)
297-
break # Reached end of string even if no EOD
297+
break # Reached end of string without an EOD
298298
char = data[index : index + 1]
299299
if char == b">":
300300
break
@@ -306,7 +306,13 @@ def decode(
306306
retval += bytes((int(hex_pair, base=16),))
307307
hex_pair = b""
308308
index += 1
309-
assert hex_pair == b""
309+
# If the filter encounters the EOD marker after reading
310+
# an odd number of hexadecimal digits,
311+
# it shall behave as if a 0 (zero) followed the last digit.
312+
# For every even number of hexadecimal digits, hex_pair is reset to b"".
313+
if hex_pair != b"":
314+
hex_pair += b"0"
315+
retval += bytes((int(hex_pair, base=16),))
310316
return retval
311317

312318

@@ -351,7 +357,7 @@ def decode(
351357
logger_warning(
352358
"missing EOD in RunLengthDecode, check if output is OK", __name__
353359
)
354-
break # reach End Of String even if no EOD
360+
break # Reached end of string without an EOD
355361
length = data[index]
356362
index += 1
357363
if length == 128:

tests/test_filters.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ def test_flate_decode_decompress_with_array_params(params):
104104
), # Same as previous, but whitespaced
105105
("30313233343536373839616263646566414243444546>", string.hexdigits.encode()),
106106
("20090a0d0b0c>", string.whitespace.encode()),
107+
# Odd number of hexadecimal digits behaves as if a 0 (zero) followed the last digit
108+
("3938373635343332313>", string.digits[::-1].encode()),
107109
],
108110
ids=[
109111
"empty",
@@ -114,16 +116,13 @@ def test_flate_decode_decompress_with_array_params(params):
114116
"digits_whitespace",
115117
"hexdigits",
116118
"whitespace",
119+
"odd_number",
117120
],
118121
)
119122
def test_ascii_hex_decode_method(data, expected):
120123
"""
121124
Feeds a bunch of values to ASCIIHexDecode.decode() and ensures the
122125
correct output is returned.
123-
124-
TODO What is decode() supposed to do for such inputs as ">>", ">>>" or
125-
any other not terminated by ">"? (For the latter case, an exception
126-
is currently raised.)
127126
"""
128127
assert ASCIIHexDecode.decode(data) == expected
129128

0 commit comments

Comments
 (0)