From 7496113a237456311d89e60a4abe5944c1d26b54 Mon Sep 17 00:00:00 2001 From: Eddy Comyn-Platt <53045993+EddyCMWF@users.noreply.github.com> Date: Mon, 6 Nov 2023 10:19:22 +0000 Subject: [PATCH] 60 handle errors from non http url sources (#61) * generic error hadnling * generic error hadnling * #60 * #60 * backup downloader added * add tests for ftp * fix urls * fix test * cleanup * wget only * ftp test added * multiurl updated to handle ftp properly * qa * unit tests * qa * removing unused test * clean up and extra download test * qa * Mattia comments addressed --------- Co-authored-by: Mattia Almansi --- cads_adaptors/tools/url_tools.py | 20 ++++++------- ci/environment-ci.yml | 1 + environment.yml | 3 +- pyproject.toml | 5 ++-- tests/test_20_url_tools.py | 50 ++++++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 14 deletions(-) create mode 100644 tests/test_20_url_tools.py diff --git a/cads_adaptors/tools/url_tools.py b/cads_adaptors/tools/url_tools.py index fefe5492..9bd02fa3 100644 --- a/cads_adaptors/tools/url_tools.py +++ b/cads_adaptors/tools/url_tools.py @@ -8,6 +8,7 @@ import jinja2 import multiurl import requests +import yaml from . import hcube_tools @@ -31,25 +32,22 @@ def requests_to_urls( def try_download(urls: List[str]) -> List[str]: paths = [] - excs = [] for url in urls: path = urllib.parse.urlparse(url).path.lstrip("/") dir = os.path.dirname(path) - os.makedirs(dir, exist_ok=True) + if dir: + os.makedirs(dir, exist_ok=True) try: multiurl.download(url, path) + except Exception as exc: + logger.warning(f"Failed download for URL: {url}\nTraceback: {exc}") + else: paths.append(path) - except requests.exceptions.HTTPError as exc: - if exc.response is not None and exc.response.status_code == 404: - logger.warning(exc) - excs.append(exc) - else: - raise exc + if len(paths) == 0: raise RuntimeError( - f"Request empty. At least one of the following {urls} " - "must be a valid url from which to download the data " - f"download errors: {[str(exc) for exc in excs]}" + f"Request empty. At least one of the following:\n{yaml.safe_dump(urls, indent=2)} " + "must be a valid url from which to download the data. " ) return paths diff --git a/ci/environment-ci.yml b/ci/environment-ci.yml index 0b7e42c9..bec6f628 100644 --- a/ci/environment-ci.yml +++ b/ci/environment-ci.yml @@ -17,5 +17,6 @@ dependencies: - types-python-dateutil - types-pyyaml - types-requests +- pytest-localftpserver - pip: - git+https://github.com/ecmwf-projects/cacholote.git diff --git a/environment.yml b/environment.yml index bd8c8b0b..f8af0dcf 100644 --- a/environment.yml +++ b/environment.yml @@ -15,6 +15,7 @@ dependencies: - cfgrib - h5netcdf - wget +- multiurl>=0.2.3.2 +- pyyaml - pip: - rooki - - multiurl diff --git a/pyproject.toml b/pyproject.toml index 3aa950b1..43935cd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,9 +17,10 @@ dependencies = [ "ecmwflibs", "cfgrib", "cacholote", - "multiurl", + "multiurl>=0.2.3.2", "rooki", - "python-dateutil" + "python-dateutil", + "pyyaml" ] description = "CADS data retrieve utilities to be used by adaptors" dynamic = ["version"] diff --git a/tests/test_20_url_tools.py b/tests/test_20_url_tools.py new file mode 100644 index 00000000..966e2606 --- /dev/null +++ b/tests/test_20_url_tools.py @@ -0,0 +1,50 @@ +import os + +import pytest + +from cads_adaptors.tools import url_tools + + +@pytest.mark.parametrize( + "urls,expected_nfiles", + ( + ( + [ + "https://get.ecmwf.int/repository/test-data/earthkit-data/test-data/test_single.nc" + ], + 1, + ), + ( + [ + "https://get.ecmwf.int/repository/test-data/earthkit-data/test-data/test_single.nc", + "https://get.ecmwf.int/repository/test-data/earthkit-data/test-data/test_single.grib", + ], + 2, + ), + ), +) +def test_downloaders(tmp_path, monkeypatch, urls, expected_nfiles): + monkeypatch.chdir(tmp_path) # try_download generates files in the working dir + paths = url_tools.try_download(urls) + assert len(paths) == expected_nfiles + + +@pytest.mark.parametrize( + "anon", + ( + True, + False, + ), +) +def test_ftp_download(tmp_path, ftpserver, anon): + local_test_file = os.path.join(tmp_path, "testfile.txt") + with open(local_test_file, "w") as f: + f.write("This is a test file") + + ftp_url = ftpserver.put_files(local_test_file, style="url", anon=anon) + work_dir = os.path.join(tmp_path, "work_dir") + os.makedirs(work_dir) + os.chdir(work_dir) + local_test_download = url_tools.try_download(ftp_url)[0] + with open(local_test_file) as original, open(local_test_download) as downloaded: + assert original.read() == downloaded.read()