From 679e46fbcbbc29eb76841638aeb09c0242db8835 Mon Sep 17 00:00:00 2001 From: apeshkov Date: Mon, 29 Nov 2021 14:40:11 +0300 Subject: [PATCH 1/5] Added the ability to control the start of error handling manually --- pdfkit/api.py | 20 ++++++++++---------- pdfkit/pdfkit.py | 9 ++++++--- tests/pdfkit-tests.py | 10 ++++++++++ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/pdfkit/api.py b/pdfkit/api.py index 86d9656..c250452 100644 --- a/pdfkit/api.py +++ b/pdfkit/api.py @@ -4,8 +4,8 @@ from .pdfkit import Configuration -def from_url(url, output_path=None, options=None, toc=None, cover=None, - configuration=None, cover_first=False, verbose=False): +def from_url(url, output_path=None, options=None, toc=None, cover=None, configuration=None, cover_first=False, + verbose=False, raise_exceptions=True): """ Convert file of files from URLs to PDF document @@ -21,14 +21,14 @@ def from_url(url, output_path=None, options=None, toc=None, cover=None, Returns: True on success """ - r = PDFKit(url, 'url', options=options, toc=toc, cover=cover, - configuration=configuration, cover_first=cover_first, verbose=verbose) + r = PDFKit(url, 'url', options=options, toc=toc, cover=cover, configuration=configuration, cover_first=cover_first, + verbose=verbose, raise_exceptions=raise_exceptions) return r.to_pdf(output_path) def from_file(input, output_path=None, options=None, toc=None, cover=None, css=None, - configuration=None, cover_first=False, verbose=False): + configuration=None, cover_first=False, verbose=False, raise_exceptions=True): """ Convert HTML file or files to PDF document @@ -45,14 +45,14 @@ def from_file(input, output_path=None, options=None, toc=None, cover=None, css=N Returns: True on success """ - r = PDFKit(input, 'file', options=options, toc=toc, cover=cover, css=css, - configuration=configuration, cover_first=cover_first, verbose=verbose) + r = PDFKit(input, 'file', options=options, toc=toc, cover=cover, css=css, configuration=configuration, + cover_first=cover_first, verbose=verbose, raise_exceptions=raise_exceptions) return r.to_pdf(output_path) def from_string(input, output_path=None, options=None, toc=None, cover=None, css=None, - configuration=None, cover_first=False, verbose=False): + configuration=None, cover_first=False, verbose=False, raise_exceptions=True): """ Convert given string or strings to PDF document @@ -69,8 +69,8 @@ def from_string(input, output_path=None, options=None, toc=None, cover=None, css Returns: True on success """ - r = PDFKit(input, 'string', options=options, toc=toc, cover=cover, css=css, - configuration=configuration, cover_first=cover_first, verbose=verbose) + r = PDFKit(input, 'string', options=options, toc=toc, cover=cover, css=css, configuration=configuration, + cover_first=cover_first, verbose=verbose, raise_exceptions=raise_exceptions) return r.to_pdf(output_path) diff --git a/pdfkit/pdfkit.py b/pdfkit/pdfkit.py index 784aad0..83236ed 100644 --- a/pdfkit/pdfkit.py +++ b/pdfkit/pdfkit.py @@ -38,8 +38,8 @@ def __init__(self, msg): def __str__(self): return self.msg - def __init__(self, url_or_file, type_, options=None, toc=None, cover=None, - css=None, configuration=None, cover_first=False, verbose=False): + def __init__(self, url_or_file, type_, options=None, toc=None, cover=None, css=None, configuration=None, + cover_first=False, verbose=False, raise_exceptions=True): self.source = Source(url_or_file, type_) self.configuration = (Configuration() if configuration is None @@ -64,6 +64,7 @@ def __init__(self, url_or_file, type_, options=None, toc=None, cover=None, self.verbose = verbose self.css = css self.stylesheets = [] + self.raise_exceptions = raise_exceptions def _genargs(self, opts): """ @@ -198,7 +199,9 @@ def to_pdf(self, path=None): stderr = stderr or stdout or b"" stderr = stderr.decode('utf-8', errors='replace') exit_code = result.returncode - self.handle_error(exit_code, stderr) + # don't raise errors if we want to get clean wkhtmltopdf output + if self.raise_exceptions: + self.handle_error(exit_code, stderr) # Since wkhtmltopdf sends its output to stderr we will capture it # and properly send to stdout diff --git a/tests/pdfkit-tests.py b/tests/pdfkit-tests.py index 969afbd..2432d31 100644 --- a/tests/pdfkit-tests.py +++ b/tests/pdfkit-tests.py @@ -492,5 +492,15 @@ def test_issue_169_quiet_boolean_True(self): output = r.to_pdf() self.assertEqual(output[:4].decode('utf-8'), '%PDF') + def test_raise_exceptions_kwarg(self): + r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=True) + with self.assertRaises(IOError): + r.to_pdf() + + r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=False) + with not self.assertRaises(IOError): + r.to_pdf() + + if __name__ == "__main__": unittest.main() From 1c9b86c54bd24e59fd209b64835ccd6df8afc8a8 Mon Sep 17 00:00:00 2001 From: apeshkov Date: Mon, 29 Nov 2021 14:46:41 +0300 Subject: [PATCH 2/5] tests fix --- tests/pdfkit-tests.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/pdfkit-tests.py b/tests/pdfkit-tests.py index 2432d31..24a526f 100644 --- a/tests/pdfkit-tests.py +++ b/tests/pdfkit-tests.py @@ -493,14 +493,17 @@ def test_issue_169_quiet_boolean_True(self): self.assertEqual(output[:4].decode('utf-8'), '%PDF') def test_raise_exceptions_kwarg(self): + # exception raised r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=True) with self.assertRaises(IOError): r.to_pdf() + # exception not raised r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=False) - with not self.assertRaises(IOError): + try: r.to_pdf() - + except IOError: + self.fail("r.to_pdf() raised an IOError exception despite 'raise_exceptions=False' kwarg") if __name__ == "__main__": unittest.main() From dbfb5cb212767b890beeb86eaa6d5ce881b3a085 Mon Sep 17 00:00:00 2001 From: apeshkov Date: Mon, 29 Nov 2021 14:48:11 +0300 Subject: [PATCH 3/5] tests fix --- tests/pdfkit-tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pdfkit-tests.py b/tests/pdfkit-tests.py index 24a526f..d1e3517 100644 --- a/tests/pdfkit-tests.py +++ b/tests/pdfkit-tests.py @@ -505,5 +505,6 @@ def test_raise_exceptions_kwarg(self): except IOError: self.fail("r.to_pdf() raised an IOError exception despite 'raise_exceptions=False' kwarg") + if __name__ == "__main__": unittest.main() From 624d291cfad99583128e913e5afd8cefccf62bc3 Mon Sep 17 00:00:00 2001 From: Aleksandr Date: Tue, 30 Nov 2021 18:48:24 +0300 Subject: [PATCH 4/5] review fix, tests update, docs add --- .gitignore | 3 +++ HISTORY.rst | 2 ++ README.rst | 6 ++++++ pdfkit/__init__.py | 2 +- pdfkit/pdfkit.py | 6 ++++-- tests/pdfkit-tests.py | 30 ++++++++++++++++++++---------- 6 files changed, 36 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 3faf7f2..2ea88d5 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ pdfkit.egg-info # Tests .tox .python-version + +# ENV +venv diff --git a/HISTORY.rst b/HISTORY.rst index 7e1809d..cccd071 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,5 +1,7 @@ Changelog --------- +* `1.0.1` + * By default PDFKit handle errors from wkhtmltopdf. Now if you need to get clear output from wkhtmltopdf output, if it existed, you should pass "raise_exceptions=False" to API calls * `1.0.0` * By default PDFKit now passes "quiet" option to wkhtmltopdf. Now if you need to get output you should pass "verbose=True" to API calls * Fix different issues with searching for wkhtmltopdf binary diff --git a/README.rst b/README.rst index 9119ea3..d99f1eb 100644 --- a/README.rst +++ b/README.rst @@ -146,6 +146,12 @@ You can also pass any options through meta tags in your HTML: pdfkit.from_string(body, 'out.pdf') #with --page-size=Legal and --orientation=Landscape +By default, PDFKit will handle errors from ``wkhtmltopdf``, but in some cases ``wkhtmltopdf`` return pdf data in ``stdout`` despite errors. To tell PDFKit do not handle errors from ``wkhtmltopdf`` you should pass ``raise_exceptions=False`` to API calls, but you should consider that in case of empty ``stdout`` error handling will be started anyway: + +.. code-block:: python + + pdfkit.from_url('google.com', 'out.pdf', raise_exceptions=False) + Configuration ------------- diff --git a/pdfkit/__init__.py b/pdfkit/__init__.py index a54653a..31c052e 100644 --- a/pdfkit/__init__.py +++ b/pdfkit/__init__.py @@ -4,7 +4,7 @@ """ __author__ = 'Golovanov Stanislav' -__version__ = '1.0.0' +__version__ = '1.0.1' __license__ = 'MIT' from .pdfkit import PDFKit diff --git a/pdfkit/pdfkit.py b/pdfkit/pdfkit.py index 83236ed..a4b3fa2 100644 --- a/pdfkit/pdfkit.py +++ b/pdfkit/pdfkit.py @@ -199,8 +199,10 @@ def to_pdf(self, path=None): stderr = stderr or stdout or b"" stderr = stderr.decode('utf-8', errors='replace') exit_code = result.returncode - # don't raise errors if we want to get clean wkhtmltopdf output - if self.raise_exceptions: + + # In some cases we don't want to handle errors if we want clean wkhtmltopdf output, + # but if we don't have stdout, we have to do it anyway + if not stdout or self.raise_exceptions: self.handle_error(exit_code, stderr) # Since wkhtmltopdf sends its output to stderr we will capture it diff --git a/tests/pdfkit-tests.py b/tests/pdfkit-tests.py index d1e3517..2df3aaf 100644 --- a/tests/pdfkit-tests.py +++ b/tests/pdfkit-tests.py @@ -493,17 +493,27 @@ def test_issue_169_quiet_boolean_True(self): self.assertEqual(output[:4].decode('utf-8'), '%PDF') def test_raise_exceptions_kwarg(self): - # exception raised - r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=True) - with self.assertRaises(IOError): - r.to_pdf() - # exception not raised - r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=False) - try: - r.to_pdf() - except IOError: - self.fail("r.to_pdf() raised an IOError exception despite 'raise_exceptions=False' kwarg") + with self.subTest('exception raised with stdout and raise_exceptions=True'): + + r = pdfkit.PDFKit('Hai!', 'string', options={'bad-option': None}, + raise_exceptions=True) + with self.assertRaises(IOError): + r.to_pdf() + + with self.subTest('exception raised despite raise_exceptions=False because no stdout'): + + r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=False) + with self.assertRaises(IOError): + r.to_pdf() + + with self.subTest('exception not raised with stdout and raise_exceptions=False'): + r = pdfkit.PDFKit('Hai!', 'string', options={'bad-option': None}, + raise_exceptions=False) + try: + r.to_pdf() + except IOError: + self.fail("r.to_pdf() raised an IOError exception despite 'raise_exceptions=False' kwarg") if __name__ == "__main__": From 2679a28c787535cb525b40dd5d6452093eb1523b Mon Sep 17 00:00:00 2001 From: Aleksandr Date: Thu, 2 Dec 2021 19:58:34 +0300 Subject: [PATCH 5/5] fixed the test for python2 compatibility --- tests/pdfkit-tests.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/tests/pdfkit-tests.py b/tests/pdfkit-tests.py index 2df3aaf..6451569 100644 --- a/tests/pdfkit-tests.py +++ b/tests/pdfkit-tests.py @@ -494,26 +494,24 @@ def test_issue_169_quiet_boolean_True(self): def test_raise_exceptions_kwarg(self): - with self.subTest('exception raised with stdout and raise_exceptions=True'): - - r = pdfkit.PDFKit('Hai!', 'string', options={'bad-option': None}, - raise_exceptions=True) - with self.assertRaises(IOError): - r.to_pdf() - - with self.subTest('exception raised despite raise_exceptions=False because no stdout'): - - r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=False) - with self.assertRaises(IOError): - r.to_pdf() - - with self.subTest('exception not raised with stdout and raise_exceptions=False'): - r = pdfkit.PDFKit('Hai!', 'string', options={'bad-option': None}, - raise_exceptions=False) - try: - r.to_pdf() - except IOError: - self.fail("r.to_pdf() raised an IOError exception despite 'raise_exceptions=False' kwarg") + # exception raised with stdout and raise_exceptions=True + r = pdfkit.PDFKit('Hai!', 'string', options={'bad-option': None}, + raise_exceptions=True) + with self.assertRaises(IOError): + r.to_pdf() + + # exception raised despite raise_exceptions=False because no stdout + r = pdfkit.PDFKit('clearlywrongurl.asdf', 'url', raise_exceptions=False) + with self.assertRaises(IOError): + r.to_pdf() + + # exception not raised with stdout and raise_exceptions=False + r = pdfkit.PDFKit('Hai!', 'string', options={'bad-option': None}, + raise_exceptions=False) + try: + r.to_pdf() + except IOError: + self.fail("r.to_pdf() raised an IOError exception despite 'raise_exceptions=False' kwarg") if __name__ == "__main__":