Skip to content

Commit 804ae35

Browse files
committed
Improve peak search for finding integration limits in RDF
1 parent ec5c7dd commit 804ae35

File tree

4 files changed

+142
-29
lines changed

4 files changed

+142
-29
lines changed

LiquidDiffract/core/data_utils.py

+105-16
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,22 @@ def smooth_data(data, method='savitzky-golay', window_length=31, poly_order=3):
117117
raise NotImplementedError('Smooth method not implemented, please check keyword argument')
118118

119119

120-
def find_integration_limits(r, rdf, rho=None, peak_search_limit=10.0, search_method='first'):
120+
def clean_base_indices(lefts, rights):
121+
"""
122+
Clean peak base limits
123+
See https://github.com/scipy/scipy/issues/19232
124+
"""
125+
_lefts = np.copy(lefts)
126+
_rights = np.copy(rights)
127+
for i in range(len(lefts)-1):
128+
if lefts[i] == lefts[i+1]:
129+
_lefts[i+1] = rights[i]
130+
if rights[i] == rights[i+1]:
131+
_rights[i] = lefts[i+1]
132+
return _lefts, _rights
133+
134+
135+
def find_integration_limits(r, rdf, rho=None, peak_search_limit=10.0, search_method=None):
121136
'''
122137
Computes the integration limits used in the calculation of the
123138
first coordination number for a monatomic system.
@@ -173,48 +188,122 @@ def find_integration_limits(r, rdf, rho=None, peak_search_limit=10.0, search_met
173188
# avoid the extra import may be less expensive
174189
peak_list, _ = scipy.signal.find_peaks(rdf[r<peak_search_limit])
175190

176-
# Select 1st peak after r0 as 1st coordination sphere
191+
# Get index of first peak after r0 (last RDF = 0)
177192
peak_idx_first = np.argmax(peak_list>brent_a)
178193

179-
# Select most prominent peak as 1st coordination sphere and get positions
180-
# at the base either side
194+
# Get peak prominences and positions at the bases either side
181195
peak_prom, left_bases, right_bases = scipy.signal.peak_prominences(rdf, peak_list)
196+
# Check if left bases merge peaks and fix if necessary
197+
if len(np.unique(left_bases)) < len(left_bases):
198+
left_bases, right_bases = clean_base_indices(left_bases, right_bases)
199+
# Get index of most prominent peak
182200
peak_idx_prom = np.argmax(peak_prom)
183-
184-
if search_method == 'first':
185-
peak_idx = peak_idx_first
186-
elif search_method == 'prominent':
201+
# Could check relative prominence here? e.g. np.diff(peak_prom)/peak_prom[0:-1]
202+
# Currently, simply assume most prominent peak is the right target. It should
203+
# be the first peak with significant prominence.
204+
205+
# If the most prominent peak is not the first after last RDF = 0 it
206+
# is usually because small oscillation at RDF ~ 0 need to be discounted
207+
if peak_idx_prom > peak_idx_first:
208+
# Take most prominent peak as position to use
209+
peak_idx = peak_idx_prom
210+
# By definition peak_idx here must be >= 1
211+
# Re-define r_0 as minimum to left of peak to ignore prior oscillations
212+
# Find minimum in Tr as sharper
213+
r_0 = scipy.optimize.minimize_scalar(Tr_interp,
214+
method='bounded',
215+
bounds=(r[peak_list[peak_idx-1]],
216+
r[peak_list[peak_idx]])).x
217+
218+
# If the most prominent peak occurs before the 'first' peak after the last
219+
# RDF = 0 it is usually because the right base of the prominent peak dips
220+
# below RDF = 0. The left base may also be at RDF < 0.
221+
elif peak_idx_prom < peak_idx_first:
222+
# Take the most prominent peak as position to use
187223
peak_idx = peak_idx_prom
224+
# Re-define r_0
225+
# Find minimum preceding peak
226+
# Set upper bound as peak position
227+
# (-1 r-step to account for exact peak position)
228+
r_0_ub = r[peak_list[peak_idx]-1]
229+
if peak_idx > 0:
230+
# If preceding peaks have been found take the max of the next one as
231+
# lower bound to find r_0 (+1 r-step to account for exact position)
232+
r_0_lb = r[peak_list[peak_idx-1]+1]
233+
else:
234+
# If no previous peaks found take the last change in sign of gradient
235+
# (-1 r-step again)
236+
r_0_lb = r[np.argwhere(np.sign(np.diff(Tr[np.where(r <= r_0_ub)])) == -1)[-2]]
237+
# Find minimum
238+
preceding_minimum = scipy.optimize.minimize_scalar(Tr_interp,
239+
method='bounded',
240+
bounds=(r_0_lb, r_0_ub)).x
241+
# Check if minimum at RDF < 0
242+
if rdf_interp(preceding_minimum) < 0:
243+
# Redefine r_0 as last RDF = 0 crosing before peak
244+
# First find last sign-changing interval in discrete data from
245+
# preceding minimum to peak position
246+
check_interval = np.ravel(np.where((r >= preceding_minimum) & (r <= r_0_ub)))
247+
# Pad check_interval if len < 3
248+
if len(check_interval) < 3:
249+
check_interval = np.pad(check_interval, 1, 'edge')
250+
check_interval[0] -= 1
251+
check_interval[-1] += 1
252+
brent_a = check_interval[np.argwhere(np.sign(rdf[check_interval]) == -1)[-1]]
253+
# Find root using scipy.optimize.brentq
254+
r_0 = scipy.optimize.brentq(rdf_interp, r[brent_a], r[brent_a+1])
255+
else:
256+
r_0 = preceding_minimum
257+
188258
else:
189-
raise AttributeError('\'method\' must be \'first\' or \'prominent\'')
259+
# If peak idx match use located r_0
260+
peak_idx = peak_idx_first
190261

262+
# Get left and right base of peak
191263
peak_bases = (r[left_bases[peak_idx]], r[right_bases[peak_idx]])
264+
# Refine rp_max (peak centre in r g(r))
265+
rp_max = scipy.optimize.minimize_scalar(lambda x: -Tr_interp(x), method='bounded', bounds=peak_bases).x
266+
# Refine r_max (peak centre in r^2 g(r))
267+
r_max = scipy.optimize.minimize_scalar(lambda x: -rdf_interp(x), method='bounded', bounds=peak_bases).x
192268

193269
# Get approximate position of next peak to provide upper bound on r_min
194270
try:
195271
next_peak = r[peak_list[peak_idx+1]]
196272
# Handle IndexError if chosen peak is last in peak_list
197273
except IndexError:
198274
# Re-do peak search in g(r) if density (rho) available
275+
# This seems useful in rare cases, but usually the search_limit needs to be increased
199276
if rho != None:
200277
with np.errstate(divide='ignore', invalid='ignore'):
201278
gr = rdf/(4*np.pi*rho*r**2)
202279
gr_peak_list, _ = scipy.signal.find_peaks(gr[r<peak_search_limit])
203-
# Take the first peak after r0
204-
next_peak = r[gr_peak_list[np.argmax(gr_peak_list>brent_a)+1]]
280+
# Take the next peak after peak_list[peak_idx]
281+
try:
282+
next_peak = r[np.where((r[gr_peak_list] > r_max) & (r[gr_peak_list] > rp_max))][0]
283+
except IndexError:
284+
# Using peak_search_limit is non-ideal
285+
# Consider warning here to suggest increasing peak_search_limit
286+
next_peak = peak_search_limit
205287
# If rho not available use
206288
else:
289+
# Using peak_search_limit is non-ideal
290+
# Consider warning here to suggest increasing peak_search_limit
291+
# Programmatically increasing psl may lead to recursion issues
207292
next_peak = peak_search_limit
208293

209-
# Refine rp_max (peak centre in r g(r))
210-
rp_max = scipy.optimize.minimize_scalar(lambda x: -Tr_interp(x), method='bounded', bounds=peak_bases).x
211-
# Refine r_max (peak centre in r^2 g(r))
212-
r_max = scipy.optimize.minimize_scalar(lambda x: -rdf_interp(x), method='bounded', bounds=peak_bases).x
213-
214294
# Refine r_min (position of 1st minimum after 1st peak)
215295
# Note: r_min should be global minimum but optimisation may return local min
216296
r_min = scipy.optimize.minimize_scalar(rdf_interp, method='bounded', bounds=(r_max, next_peak)).x
217297

298+
# Check r_min is not at RDF < 0 in case where peak precedes final crossing of RDF = 0
299+
if (peak_idx_prom < peak_idx_first) and (rdf_interp(r_min) < 0):
300+
# Re-define r_min to preceding crossing of RDF = 0
301+
# Find first sign changing interval between peak and r_min
302+
check_interval = np.where((r >= rp_max) & (r <= r_min))
303+
brent_b = np.ravel(check_interval)[np.argwhere(np.sign(rdf[check_interval]) == -1)[0]]
304+
# Find root using scipy.optimize.brentq
305+
r_min = scipy.optimize.brentq(rdf_interp, r[brent_b-1], r[brent_b])
306+
218307
return r_0, rp_max, r_max, r_min
219308

220309

tests/core/test_data_utils.py

+37-13
Original file line numberDiff line numberDiff line change
@@ -77,19 +77,43 @@ def test_bkg_scaling_residual(self):
7777
class TestFindIntegrationLimits(unittest.TestCase, CustomAssertions):
7878

7979
def test_find_integration_limits(self):
80-
rdf_data = np.load(os.path.join(data_path, 'rdf_peaks.npy'))
81-
rdf_peaks = (2.239933089082285, 2.9162691080482155,
82-
2.9444851167454846, 3.4521306936532428)
83-
limits_a = data_utils.find_integration_limits(*rdf_data.T)
84-
limits_b = data_utils.find_integration_limits(*rdf_data.T, rho=0.05, peak_search_limit=10, search_method='first')
85-
limits_c = data_utils.find_integration_limits(*rdf_data.T, rho=0.05, peak_search_limit=20, search_method='first')
86-
limits_d = data_utils.find_integration_limits(*rdf_data.T, rho=0.05, peak_search_limit=20, search_method='prominent')
87-
limits_e = data_utils.find_integration_limits(*rdf_data.T, peak_search_limit=20, search_method='prominent')
88-
self.assertFloatArrayEqual(limits_a, rdf_peaks)
89-
self.assertFloatArrayEqual(limits_b, rdf_peaks)
90-
self.assertFloatArrayEqual(limits_c, rdf_peaks)
91-
self.assertFloatArrayEqual(limits_d, rdf_peaks)
92-
self.assertFloatArrayEqual(limits_e, rdf_peaks)
80+
# Load test RDF data
81+
rdf_data_a, rdf_data_b, rdf_data_c = np.load(os.path.join(data_path, 'rdf_peak_test_data.npy'), allow_pickle=True)
82+
# State expected limit positions
83+
expected_limits_a = (2.239933089082285, 2.9162691080482155,
84+
2.9444851167454846, 3.4521306936532428)
85+
expected_limits_b = (2.1482738727778155, 2.906024136949478,
86+
2.9433519729572653, 3.5421030348918663)
87+
expected_limits_c = (1.4413920965789064, 1.6158820019375424,
88+
1.621174657388145, 1.7887945926039175)
89+
90+
# Test regular case where peak_idx_prom == peak_idx_first
91+
limits_a = data_utils.find_integration_limits(*rdf_data_a.T)
92+
# Test kwargs, test higher peak search etc.
93+
limits_a_kw = data_utils.find_integration_limits(*rdf_data_a.T, rho=0.05, peak_search_limit=10)
94+
limits_a_psl = data_utils.find_integration_limits(*rdf_data_a.T, rho=0.05, peak_search_limit=50)
95+
96+
# Test case where no next peak, for rho and no rho
97+
limits_a_np = data_utils.find_integration_limits(*rdf_data_a.T, peak_search_limit=3.5)
98+
limits_a_np_rho = data_utils.find_integration_limits(*rdf_data_a.T, rho=0.05, peak_search_limit=3.5)
99+
100+
# Test case where peak_idx_prom > peak_idx_first (oscillations at RDF ~ 0 need to be discounted)
101+
limits_b = data_utils.find_integration_limits(*rdf_data_b.T)
102+
103+
# Test case where peak_idx_prom < peak_idx_first (right base at RDF < 0)
104+
limits_c = data_utils.find_integration_limits(*rdf_data_c.T)
105+
106+
self.assertFloatArrayEqual(limits_a, expected_limits_a)
107+
self.assertFloatArrayEqual(limits_a_kw, expected_limits_a)
108+
self.assertFloatArrayEqual(limits_a_kw, limits_a_psl)
109+
110+
self.assertFloatArrayEqual(limits_a_np[:-1], expected_limits_a[:-1])
111+
self.assertFloatArrayEqual(limits_a_np_rho[:-1], expected_limits_a[:-1])
112+
self.assertFloatArrayEqual(limits_a_np, expected_limits_a, atol=1.e-6)
113+
self.assertFloatArrayEqual(limits_a_np_rho, expected_limits_a, atol=1.e-6)
114+
115+
self.assertFloatArrayEqual(limits_b, expected_limits_b)
116+
self.assertFloatArrayEqual(limits_c, expected_limits_c)
93117

94118

95119
class TestRebinData(unittest.TestCase, CustomAssertions):

tests/data/rdf_peak_test_data.npy

48.4 KB
Binary file not shown.

tests/data/rdf_peaks.npy

-7.94 KB
Binary file not shown.

0 commit comments

Comments
 (0)