Merge branch 'master' into prepare_v033

Ouranosinc · Jan 28, 2022 · 3292351 · 3292351
2 parents a2c9c78 + 422a44c
commit 3292351
Show file tree

Hide file tree

Showing 12 changed files with 208 additions and 140 deletions.
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -31,3 +31,4 @@ Contributors
 * Jamie Quinn <jamiejquinn@jamiejquinn.com> `@JamieJQuinn <https://github.com/JamieJQuinn>`_
 * Tom Keel <thomas.keel.18@ucl.ac.uk> `@Thomasjkeel <https://github.com/Thomasjkeel>`_
 * Maliko Tanguy <malngu@ceh.ac.uk> `@malngu <https://github.com/malngu>`_
+* Yannick Rousseau `@yrouranos <https://github.com/yrouranos>`_
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -4,7 +4,7 @@ History
 
 0.33.0 (2022-01-28)
 -------------------
-Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Tom Keel (:user:`Thomasjkeel`), Jeremy Fyke (:user:`JeremyFyke`), David Huard (:user:`huard`), Abel Aoun (:user:`bzah`), Juliette Lavoie (:user:`juliettelavoie`).
+Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Tom Keel (:user:`Thomasjkeel`), Jeremy Fyke (:user:`JeremyFyke`), David Huard (:user:`huard`), Abel Aoun (:user:`bzah`), Juliette Lavoie (:user:`juliettelavoie`), Yannick Rousseau (:user:`yrouranos`).
 
 Announcements
 ^^^^^^^^^^^^^
@@ -28,6 +28,7 @@ New features and enhancements
     - ``xclim.sdba.adjustment.PrincipalComponent`` was modified to have a simpler signature. The "full" method for finding the best PC orientation was added. (:issue:`697`).
 * New ``xclim.indices.stats.parametric_cdf`` function to facilitate the computation of return periods over DataArrays of statistical distribution parameters (:issue:`876`, :pull:`984`).
 * Add ``copy`` parameter to ``percentile_doy`` to control if the array input can be dumped after computing percentiles (:issue:`932`, :pull:`985`).
+* New improved algorithm for ``dry_spell_total_length``, performing the temporal indexing at the right moment and with control on the aggregation operator (``op``) for determining the dry spells.
 * Added ``properties.py`` and ``measures.py`` in order to perform diagnostic tests of sdba (:issue:`424`, :pull:`967`).
 * Update how ``percentile_doy`` rechunk the input data to preserve the initial chunk size. This should make the computation memory footprint more predictable (:issue:`932`, :pull:`987`).
 
@@ -46,7 +47,6 @@ Internal changes
 * Some slow tests were marked `slow` to help speed up the standard test ensemble. (:pull:`969`).
     - Tox testing ensemble now also reports slowest tests using the ``--durations`` flag.
 * `pint` no longer emits warnings about redefined units when the `logging` module is loaded. (:issue:`990`, :pull:`991`).
-* `pandas` is now temporarily pinned below version 1.4.0. (related: :issue:`992`).
 * Added a CI step for cancelling running workflows in pull requests that receive multiple pushes. (:pull:`988`).
 
 0.32.1 (2021-12-17)

diff --git a/environment.yml b/environment.yml
@@ -13,7 +13,7 @@ dependencies:
     - jsonpickle
     - numba
     - numpy>=1.16
-    - pandas>=0.23,<1.4
+    - pandas>=0.23
     - pint>=0.9
     - poppler>=0.67
     - pyyaml

diff --git a/setup.py b/setup.py
@@ -40,7 +40,7 @@
     "numba",
     "numpy>=1.16",
     "packaging>=20.0",
-    "pandas>=0.23,<1.4",
+    "pandas>=0.23",
     "pint>=0.10",
     "pyyaml",
     "scikit-learn>=0.21.3",

diff --git a/xclim/data/fr.json b/xclim/data/fr.json
@@ -947,8 +947,8 @@
   },
   "DRY_SPELL_TOTAL_LENGTH": {
     "title": "Durée totale en jours des périodes séches",
-    "abstract": "Durée totale en jours des périodes séches de n jours et plus, pendant lesquelles les précipitations accumulées sur une fenêtre de n jours sont inférieures à un seuil donné.",
-    "description": "Durée totale {freq:f} en jours des périodes séches de {window} jours et plus, pendant lesquelles les précipitations accumulées sur une fenêtre de {window} jours sont inférieures à {thresh}.",
+    "abstract": "Durée totale en jours des périodes séches de n jours et plus, pendant lesquelles les précipitations accumulées ou maximales sur une fenêtre de n jours sont inférieures à un seuil donné.",
+    "description": "Durée totale {freq:f} en jours des périodes séches de {window} jours et plus, pendant lesquelles les précipitations {op:fpl} sur une fenêtre de {window} jours sont inférieures à {thresh}.",
     "long_name": "Durée totale {freq:f} en jours des périodes sèches de {window} jours et plus"
   },
   "COLD_AND_DRY_DAYS": {

diff --git a/xclim/indicators/atmos/_precip.py b/xclim/indicators/atmos/_precip.py
@@ -370,8 +370,8 @@ class HrPrecip(Hourly):
 
 dry_spell_total_length = Precip(
     identifier="dry_spell_total_length",
-    description="The {freq} number of days in dry periods of {window} days and more, during which the accumulated "
-    "precipitation on a window of {window} days is under {thresh}.",
+    description="The {freq} number of days in dry periods of {window} days and more, during which the {op}"
+    "precipitation within windows of {window} days is under {thresh}.",
     units="days",
     cell_methods="",
     compute=indices.dry_spell_total_length,

diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py
@@ -1,12 +1,12 @@
 # noqa: D100
-
 from typing import Optional
 
 import numpy as np
 import xarray
 
 import xclim.indices as xci
 import xclim.indices.run_length as rl
+from xclim.core.calendar import select_time
 from xclim.core.units import convert_units_to, declare_units, rate2amount, to_agg_units
 from xclim.core.utils import DayOfYearStr
 from xclim.indices._threshold import first_day_above, first_day_below, freshet_start
@@ -647,11 +647,18 @@ def dry_spell_frequency(
 
 @declare_units(pr="[precipitation]", thresh="[length]")
 def dry_spell_total_length(
-    pr: xarray.DataArray, thresh: str = "1.0 mm", window: int = 3, freq: str = "YS"
+    pr: xarray.DataArray,
+    thresh: str = "1.0 mm",
+    window: int = 3,
+    op: str = "sum",
+    freq: str = "YS",
+    **indexer,
 ) -> xarray.DataArray:
     """
-    Return the total number of days in dry periods of n days and more, during which the accumulated precipitation
-    on a window of n days is under the threshold.
+    Total length of dry spells
+
+    Total number of days in dry periods of a minimum length, during which the maximum or
+    accumulated precipitation within a window of the same length is under a threshold.
 
     Parameters
     ----------
@@ -660,21 +667,40 @@ def dry_spell_total_length(
     thresh : str
       Accumulated precipitation value under which a period is considered dry.
     window : int
-      Number of days where the accumulated precipitation is under threshold.
+      Number of days where the maximum or accumulated precipitation is under threshold.
+    op : {"max", "sum"}
+      Reduce operation.
     freq : str
       Resampling frequency.
+    indexer :
+      Indexing parameters to compute the indicator on a temporal subset of the data.
+      It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`.
+      Indexing is done after finding the dry days, but before finding the spells.
 
     Returns
     -------
     xarray.DataArray
       The {freq} total number of days in dry periods of minimum {window} days.
+
+    Notes
+    -----
+    The algorithm assumes days before and after the timeseries are "wet", meaning that
+    the condition for being considered part of a dry spell is stricter on the edges. For
+    example, with `window=3` and `op='sum'`, the first day of the series is considered
+    part of a dry spell only if the accumulated precipitation within the first 3 days is
+    under the threshold. In comparison, a day in the middle of the series is considered
+    part of a dry spell if any of the three 3-day periods of which it is part are
+    considered dry (so a total of five days are included in the computation, compared to only 3.)
     """
     pram = rate2amount(pr, out_units="mm")
     thresh = convert_units_to(thresh, pram)
 
-    mask = pram.rolling(time=window, center=True).sum() < thresh
-    out = (mask.rolling(time=window, center=True).sum() >= 1).resample(time=freq).sum()
+    pram_pad = pram.pad(time=(0, window))
+    mask = getattr(pram_pad.rolling(time=window), op)() < thresh
+    dry = (mask.rolling(time=window).sum() >= 1).shift(time=-(window - 1))
+    dry = dry.isel(time=slice(0, pram.time.size)).astype(float)
 
+    out = select_time(dry, **indexer).resample(time=freq).sum("time")
     return to_agg_units(out, pram, "count")
 
 

diff --git a/xclim/testing/tests/conftest.py b/xclim/testing/tests/conftest.py
@@ -19,7 +19,7 @@ def tmp_netcdf_filename(tmpdir):
 @pytest.fixture
 def tas_series():
     def _tas_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -38,7 +38,7 @@ def _tas_series(values, start="7/1/2000"):
 @pytest.fixture
 def tasmax_series():
     def _tasmax_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -57,7 +57,7 @@ def _tasmax_series(values, start="7/1/2000"):
 @pytest.fixture
 def tasmin_series():
     def _tasmin_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -76,7 +76,7 @@ def _tasmin_series(values, start="7/1/2000"):
 @pytest.fixture
 def pr_series():
     def _pr_series(values, start="7/1/2000", units="kg m-2 s-1"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -95,7 +95,7 @@ def _pr_series(values, start="7/1/2000", units="kg m-2 s-1"):
 @pytest.fixture
 def prc_series():
     def _prc_series(values, start="7/1/2000", units="kg m-2 s-1"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -117,9 +117,7 @@ def _bootstrap_series(values, start="7/1/2000", units="kg m-2 s-1", cf_time=Fals
         if cf_time:
             coords = xr.cftime_range(start, periods=len(values), freq="D")
         else:
-            coords = pd.date_range(
-                start, periods=len(values), freq=pd.DateOffset(days=1)
-            )
+            coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -138,7 +136,7 @@ def _bootstrap_series(values, start="7/1/2000", units="kg m-2 s-1", cf_time=Fals
 @pytest.fixture
 def prsn_series():
     def _prsn_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -159,7 +157,7 @@ def pr_hr_series():
     """Return hourly time series."""
 
     def _pr_hr_series(values, start="1/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(hours=1))
+        coords = pd.date_range(start, periods=len(values), freq="1H")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -179,7 +177,7 @@ def _pr_hr_series(values, start="1/1/2000"):
 def pr_ndseries():
     def _pr_series(values, start="1/1/2000"):
         nt, nx, ny = np.atleast_3d(values).shape
-        time = pd.date_range(start, periods=nt, freq=pd.DateOffset(days=1))
+        time = pd.date_range(start, periods=nt, freq="D")
         x = np.arange(nx)
         y = np.arange(ny)
         return xr.DataArray(
@@ -200,7 +198,7 @@ def _pr_series(values, start="1/1/2000"):
 @pytest.fixture
 def q_series():
     def _q_series(values, start="1/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -223,7 +221,7 @@ def ndq_series():
 
     cx = xr.IndexVariable("x", x)
     cy = xr.IndexVariable("y", y)
-    dates = pd.date_range("1900-01-01", periods=nt, freq=pd.DateOffset(days=1))
+    dates = pd.date_range("1900-01-01", periods=nt, freq="D")
 
     time = xr.IndexVariable(
         "time", dates, attrs={"units": "days since 1900-01-01", "calendar": "standard"}
@@ -287,7 +285,7 @@ def areacella():
 @pytest.fixture
 def hurs_series():
     def _hurs_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -305,7 +303,7 @@ def _hurs_series(values, start="7/1/2000"):
 @pytest.fixture
 def sfcWind_series():
     def _sfcWind_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -323,7 +321,7 @@ def _sfcWind_series(values, start="7/1/2000"):
 @pytest.fixture
 def huss_series():
     def _huss_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -341,7 +339,7 @@ def _huss_series(values, start="7/1/2000"):
 @pytest.fixture
 def snd_series():
     def _snd_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -359,7 +357,7 @@ def _snd_series(values, start="7/1/2000"):
 @pytest.fixture
 def snw_series():
     def _snw_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],
@@ -377,7 +375,7 @@ def _snw_series(values, start="7/1/2000"):
 @pytest.fixture
 def ps_series():
     def _ps_series(values, start="7/1/2000"):
-        coords = pd.date_range(start, periods=len(values), freq=pd.DateOffset(days=1))
+        coords = pd.date_range(start, periods=len(values), freq="D")
         return xr.DataArray(
             values,
             coords=[coords],