bjheinen
diff --git a/‎LiquidDiffract/core/data_utils.py
+105-16 b/‎LiquidDiffract/core/data_utils.py
+105-16
diff --git a/‎tests/core/test_data_utils.py
+37-13 b/‎tests/core/test_data_utils.py
+37-13
diff --git a/‎tests/data/rdf_peak_test_data.npy
48.4 KB b/‎tests/data/rdf_peak_test_data.npy
48.4 KB
diff --git a/‎tests/data/rdf_peaks.npy
-7.94 KB b/‎tests/data/rdf_peaks.npy
-7.94 KB
@@ -117,7 +117,22 @@ def smooth_data(data, method='savitzky-golay', window_length=31, poly_order=3):
         raise NotImplementedError('Smooth method not implemented, please check keyword argument')
 
 
-def find_integration_limits(r, rdf, rho=None, peak_search_limit=10.0, search_method='first'):
+def clean_base_indices(lefts, rights):
+    """
+    Clean peak base limits
+    See https://github.com/scipy/scipy/issues/19232
+    """
+    _lefts = np.copy(lefts)
+    _rights = np.copy(rights)
+    for i in range(len(lefts)-1):
+        if lefts[i] == lefts[i+1]:
+            _lefts[i+1] = rights[i]
+        if rights[i] == rights[i+1]:
+            _rights[i] = lefts[i+1]
+    return _lefts, _rights
+
+
+def find_integration_limits(r, rdf, rho=None, peak_search_limit=10.0, search_method=None):
     '''
     Computes the integration limits used in the calculation of the 
     first coordination number for a monatomic system.
@@ -173,48 +188,122 @@ def find_integration_limits(r, rdf, rho=None, peak_search_limit=10.0, search_met
     # avoid the extra import may be less expensive
     peak_list, _ = scipy.signal.find_peaks(rdf[r<peak_search_limit])
 
-    # Select 1st peak after r0 as 1st coordination sphere
+    # Get index of first peak after r0 (last RDF = 0)
     peak_idx_first = np.argmax(peak_list>brent_a)
 
-    # Select most prominent peak as 1st coordination sphere and get positions
-    # at the base either side
+    # Get peak prominences and positions at the bases either side
     peak_prom, left_bases, right_bases = scipy.signal.peak_prominences(rdf, peak_list)
+    # Check if left bases merge peaks and fix if necessary
+    if len(np.unique(left_bases)) < len(left_bases):
+        left_bases, right_bases = clean_base_indices(left_bases, right_bases)
+    # Get index of most prominent peak
     peak_idx_prom = np.argmax(peak_prom)
-    
-    if search_method == 'first':
-        peak_idx = peak_idx_first
-    elif search_method == 'prominent':
+    # Could check relative prominence here? e.g. np.diff(peak_prom)/peak_prom[0:-1]
+    # Currently, simply assume most prominent peak is the right target. It should
+    # be the first peak with significant prominence.
+
+    # If the most prominent peak is not the first after last RDF = 0 it
+    # is usually because small oscillation at RDF ~ 0 need to be discounted
+    if peak_idx_prom > peak_idx_first:
+        # Take most prominent peak as position to use
+        peak_idx = peak_idx_prom
+        # By definition peak_idx here must be >= 1
+        # Re-define r_0 as minimum to left of peak to ignore prior oscillations
+        # Find minimum in Tr as sharper
+        r_0 = scipy.optimize.minimize_scalar(Tr_interp,
+                                            method='bounded',
+                                            bounds=(r[peak_list[peak_idx-1]],
+                                                    r[peak_list[peak_idx]])).x
+
+    # If the most prominent peak occurs before the 'first' peak after the last
+    # RDF = 0 it is usually because the right base of the prominent peak dips
+    # below RDF = 0. The left base may also be at RDF < 0.
+    elif peak_idx_prom < peak_idx_first:
+        # Take the most prominent peak as position to use
         peak_idx = peak_idx_prom
+        # Re-define r_0
+        # Find minimum preceding peak
+        # Set upper bound as peak position
+        # (-1 r-step to account for exact peak position)
+        r_0_ub = r[peak_list[peak_idx]-1]
+        if peak_idx > 0:
+            # If preceding peaks have been found take the max of the next one as
+            # lower bound to find r_0 (+1 r-step to account for exact position)
+            r_0_lb = r[peak_list[peak_idx-1]+1]
+        else:
+            # If no previous peaks found take the last change in sign of gradient
+            # (-1 r-step again)
+            r_0_lb = r[np.argwhere(np.sign(np.diff(Tr[np.where(r <= r_0_ub)])) == -1)[-2]]
+        # Find minimum
+        preceding_minimum = scipy.optimize.minimize_scalar(Tr_interp,
+                                                           method='bounded',
+                                                           bounds=(r_0_lb, r_0_ub)).x
+        # Check if minimum at RDF < 0
+        if rdf_interp(preceding_minimum) < 0:
+            # Redefine r_0 as last RDF = 0 crosing before peak
+            # First find last sign-changing interval in discrete data from
+            # preceding minimum to peak position
+            check_interval = np.ravel(np.where((r >= preceding_minimum) & (r <= r_0_ub)))
+            # Pad check_interval if len < 3
+            if len(check_interval) < 3:
+                check_interval = np.pad(check_interval, 1, 'edge')
+                check_interval[0] -= 1
+                check_interval[-1] += 1
+            brent_a = check_interval[np.argwhere(np.sign(rdf[check_interval]) == -1)[-1]]
+            # Find root using scipy.optimize.brentq
+            r_0 = scipy.optimize.brentq(rdf_interp, r[brent_a], r[brent_a+1])
+        else:
+            r_0 = preceding_minimum
+
     else:
-        raise AttributeError('\'method\' must be \'first\' or \'prominent\'')
+        # If peak idx match use located r_0
+        peak_idx = peak_idx_first
 
+    # Get left and right base of peak
     peak_bases = (r[left_bases[peak_idx]], r[right_bases[peak_idx]])
+    # Refine rp_max (peak centre in r g(r))
+    rp_max = scipy.optimize.minimize_scalar(lambda x: -Tr_interp(x), method='bounded', bounds=peak_bases).x
+    # Refine r_max (peak centre in r^2 g(r))
+    r_max = scipy.optimize.minimize_scalar(lambda x: -rdf_interp(x), method='bounded', bounds=peak_bases).x
 
     # Get approximate position of next peak to provide upper bound on r_min
     try:
         next_peak = r[peak_list[peak_idx+1]]
     # Handle IndexError if chosen peak is last in peak_list
     except IndexError:
         # Re-do peak search in g(r) if density (rho) available
+        # This seems useful in rare cases, but usually the search_limit needs to be increased
         if rho != None:
             with np.errstate(divide='ignore', invalid='ignore'):
                 gr = rdf/(4*np.pi*rho*r**2)
             gr_peak_list, _ = scipy.signal.find_peaks(gr[r<peak_search_limit])
-            # Take the first peak after r0
-            next_peak = r[gr_peak_list[np.argmax(gr_peak_list>brent_a)+1]]
+            # Take the next peak after peak_list[peak_idx]
+            try:
+                next_peak = r[np.where((r[gr_peak_list] > r_max) & (r[gr_peak_list] > rp_max))][0]
+            except IndexError:
+                # Using peak_search_limit is non-ideal
+                # Consider warning here to suggest increasing peak_search_limit
+                next_peak = peak_search_limit
         # If rho not available use
         else:
+            # Using peak_search_limit is non-ideal
+            # Consider warning here to suggest increasing peak_search_limit
+            # Programmatically increasing psl may lead to recursion issues
             next_peak = peak_search_limit
 
-    # Refine rp_max (peak centre in r g(r))
-    rp_max = scipy.optimize.minimize_scalar(lambda x: -Tr_interp(x), method='bounded', bounds=peak_bases).x
-    # Refine r_max (peak centre in r^2 g(r))
-    r_max = scipy.optimize.minimize_scalar(lambda x: -rdf_interp(x), method='bounded', bounds=peak_bases).x
-
     # Refine r_min (position of 1st minimum after 1st peak)
     # Note: r_min should be global minimum but optimisation may return local min
     r_min = scipy.optimize.minimize_scalar(rdf_interp, method='bounded', bounds=(r_max, next_peak)).x
 
+    # Check r_min is not at RDF < 0 in case where peak precedes final crossing of RDF = 0
+    if (peak_idx_prom < peak_idx_first) and (rdf_interp(r_min) < 0):
+        # Re-define r_min to preceding crossing of RDF = 0
+        # Find first sign changing interval between peak and r_min
+        check_interval = np.where((r >= rp_max) & (r <= r_min))
+        brent_b = np.ravel(check_interval)[np.argwhere(np.sign(rdf[check_interval]) == -1)[0]]
+        # Find root using scipy.optimize.brentq
+        r_min = scipy.optimize.brentq(rdf_interp, r[brent_b-1], r[brent_b])
+
     return r_0, rp_max, r_max, r_min
 
 
 
@@ -77,19 +77,43 @@ def test_bkg_scaling_residual(self):
 class TestFindIntegrationLimits(unittest.TestCase, CustomAssertions):
 
     def test_find_integration_limits(self):
-        rdf_data = np.load(os.path.join(data_path, 'rdf_peaks.npy'))
-        rdf_peaks = (2.239933089082285, 2.9162691080482155,
-                     2.9444851167454846, 3.4521306936532428)
-        limits_a = data_utils.find_integration_limits(*rdf_data.T)
-        limits_b = data_utils.find_integration_limits(*rdf_data.T, rho=0.05, peak_search_limit=10, search_method='first')
-        limits_c = data_utils.find_integration_limits(*rdf_data.T, rho=0.05, peak_search_limit=20, search_method='first')
-        limits_d = data_utils.find_integration_limits(*rdf_data.T, rho=0.05, peak_search_limit=20, search_method='prominent')
-        limits_e = data_utils.find_integration_limits(*rdf_data.T, peak_search_limit=20, search_method='prominent')
-        self.assertFloatArrayEqual(limits_a, rdf_peaks)
-        self.assertFloatArrayEqual(limits_b, rdf_peaks)
-        self.assertFloatArrayEqual(limits_c, rdf_peaks)
-        self.assertFloatArrayEqual(limits_d, rdf_peaks)
-        self.assertFloatArrayEqual(limits_e, rdf_peaks)
+        # Load test RDF data
+        rdf_data_a, rdf_data_b, rdf_data_c = np.load(os.path.join(data_path, 'rdf_peak_test_data.npy'), allow_pickle=True)
+        # State expected limit positions
+        expected_limits_a = (2.239933089082285, 2.9162691080482155,
+                             2.9444851167454846, 3.4521306936532428)
+        expected_limits_b = (2.1482738727778155, 2.906024136949478,
+                             2.9433519729572653, 3.5421030348918663)
+        expected_limits_c = (1.4413920965789064, 1.6158820019375424,
+                             1.621174657388145, 1.7887945926039175)
+
+        # Test regular case where peak_idx_prom == peak_idx_first
+        limits_a = data_utils.find_integration_limits(*rdf_data_a.T)
+        # Test kwargs, test higher peak search etc.
+        limits_a_kw = data_utils.find_integration_limits(*rdf_data_a.T, rho=0.05, peak_search_limit=10)
+        limits_a_psl = data_utils.find_integration_limits(*rdf_data_a.T, rho=0.05, peak_search_limit=50)
+
+        # Test case where no next peak, for rho and no rho
+        limits_a_np = data_utils.find_integration_limits(*rdf_data_a.T, peak_search_limit=3.5)
+        limits_a_np_rho = data_utils.find_integration_limits(*rdf_data_a.T, rho=0.05, peak_search_limit=3.5)
+
+        # Test case where peak_idx_prom > peak_idx_first (oscillations at RDF ~ 0 need to be discounted)
+        limits_b = data_utils.find_integration_limits(*rdf_data_b.T)
+
+        # Test case where peak_idx_prom < peak_idx_first (right base at RDF < 0)
+        limits_c = data_utils.find_integration_limits(*rdf_data_c.T)
+
+        self.assertFloatArrayEqual(limits_a, expected_limits_a)
+        self.assertFloatArrayEqual(limits_a_kw, expected_limits_a)
+        self.assertFloatArrayEqual(limits_a_kw, limits_a_psl)
+
+        self.assertFloatArrayEqual(limits_a_np[:-1], expected_limits_a[:-1])
+        self.assertFloatArrayEqual(limits_a_np_rho[:-1], expected_limits_a[:-1])
+        self.assertFloatArrayEqual(limits_a_np, expected_limits_a, atol=1.e-6)
+        self.assertFloatArrayEqual(limits_a_np_rho, expected_limits_a, atol=1.e-6)
+
+        self.assertFloatArrayEqual(limits_b, expected_limits_b)
+        self.assertFloatArrayEqual(limits_c, expected_limits_c)
 
 
 class TestRebinData(unittest.TestCase, CustomAssertions):