Merge pull request #53 from mathematicalmichael/hotfix/testing-domain

mathematicalmichael · web-flow · commit 97f35e1c396c · 2021-11-13T14:33:20.000-07:00
harden testing
diff --git a/docs/conf.py b/docs/conf.py
@@ -147,9 +147,9 @@
 
 # -- Options for HTML output -------------------------------------------------
 
-#html_theme = "alabaster"
+# html_theme = "alabaster"
 
-#html_theme_options = {"sidebar_width": "300px", "page_width": "1200px"}
+# html_theme_options = {"sidebar_width": "300px", "page_width": "1200px"}
 
 
 html_theme = "furo"
@@ -203,8 +203,8 @@
 
 
 html_css_files = [
-     "custom.css",
- ]
+    "custom.css",
+]
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -5,7 +5,7 @@ appdirs
 configupdater
 packaging
 furo
-setuptools>=38.3
+setuptools>=58.3
 setuptools_scm
 sphinx>=3.2.1
 sphinx-copybutton
diff --git a/src/mud/__init__.py b/src/mud/__init__.py
@@ -2,9 +2,11 @@
 
 if sys.version_info[:2] >= (3, 8):
     # TODO: Import directly (no need for conditional) when `python_requires = >= 3.8`
-    from importlib.metadata import PackageNotFoundError, version  # pragma: no cover
+    from importlib.metadata import (PackageNotFoundError,  # pragma: no cover
+                                    version)
 else:
-    from importlib_metadata import PackageNotFoundError, version  # pragma: no cover
+    from importlib_metadata import (PackageNotFoundError,  # pragma: no cover
+                                    version)
 
 try:
     # Change here if project is renamed and does not equal the package name
diff --git a/src/mud/base.py b/src/mud/base.py
@@ -1,3 +1,5 @@
+from typing import List, Union
+
 import numpy as np
 from scipy.stats import distributions as dist
 from scipy.stats import gaussian_kde as gkde
@@ -19,7 +21,7 @@ class DensityProblem(object):
     >>> Y = np.repeat(X, num_obs, 1)
     >>> y = np.ones(num_obs)*0.5 + np.random.randn(num_obs)*0.05
     >>> W = wme(Y, y)
-    >>> B = DensityProblem(X, W, np.array([[0,1], [0,1]]))
+    >>> B = DensityProblem(X, W, np.array([[0,1]]))
     >>> np.round(B.mud_point()[0],1)
     0.5
 
@@ -46,6 +48,15 @@ def _n_features(self):
     def _n_samples(self):
         return self.y.shape[0]
 
+    def set_weights(self, weights: Union[np.ndarray, List]):
+        if weights is not None:
+            assert (
+                len(weights) == self._n_samples
+            ), f"`weights` must size {self._n_samples}"
+            if isinstance(weights, list):
+                weights = np.array(weights)
+            self._weights = weights  # / weights.sum()
+
     def set_observed(self, distribution=dist.norm()):
         self._ob = distribution.pdf(self.y).prod(axis=1)
 
@@ -63,20 +74,30 @@ def set_initial(self, distribution=None):
         self._up = None
         self._pr = None
 
-    def set_predicted(self, distribution=None, **kwargs):
-        if "weights" not in kwargs:
-            kwargs["weights"] = self._weights
-        else:
-            self._weights = kwargs["weights"]
+    def set_predicted(self, distribution=None, bw_method=None, weights=None, **kwargs):
+        """
+        If no distribution is passed, `scipy.stats.gaussian_kde` is used and the
+        arguments `bw_method` and `weights` will be passed to it.
+        If `weights` is specified, it will be saved as the `self._weights`
+        attribute in the class. If omitted, `self._weights` will be used in its place.
+
+
+        Note: `distribution` should be a frozen distribution if using `scipy`.
+        """
+        if weights is None:
+            weights = self._weights
+        else:  # TODO: log this to the user as INFO
+            self.set_weights(weights)
+        weights = self._weights
 
         if distribution is None:
             # Reweight kde of predicted by weights from previous iteration if present
-            distribution = gkde(self.y.T, **kwargs)
-            pred_pdf = distribution.pdf(self.y.T).T
+            distribution = gkde(self.y.T, bw_method=bw_method, weights=weights)
+            pred_pdf_values = distribution.pdf(self.y.T).T
         else:
-            pred_pdf = distribution.pdf(self.y, **kwargs)
+            pred_pdf_values = distribution.pdf(self.y, **kwargs)
 
-        self._pr = pred_pdf
+        self._pr = pred_pdf_values
         self._up = None
 
     def fit(self, **kwargs):
@@ -125,7 +146,7 @@ class BayesProblem(object):
     >>> num_obs = 50
     >>> Y = np.repeat(X, num_obs, 1)
     >>> y = np.ones(num_obs)*0.5 + np.random.randn(num_obs)*0.05
-    >>> B = BayesProblem(X, Y, np.array([[0,1], [0,1]]))
+    >>> B = BayesProblem(X, Y, np.array([[0,1]]))
     >>> B.set_likelihood(ds.norm(loc=y, scale=0.05))
     >>> np.round(B.map_point()[0],1)
     0.5
diff --git a/src/mud/funs.py b/src/mud/funs.py
@@ -5,13 +5,14 @@
 """
 
 import argparse
-import sys
 import logging
+import sys
+
 import numpy as np
+from scipy.stats import distributions as dists
 
 from mud import __version__
-from mud.base import DensityProblem, BayesProblem
-from scipy.stats import distributions as dists
+from mud.base import BayesProblem, DensityProblem
 
 __author__ = "Mathematical Michael"
 __copyright__ = "Mathematical Michael"
diff --git a/src/mud/plot.py b/src/mud/plot.py
@@ -1,5 +1,6 @@
-from matplotlib import pyplot as plt
 import numpy as np
+from matplotlib import pyplot as plt
+
 from mud.util import null_space
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -7,31 +7,57 @@
     https://pytest.org/latest/plugins.html
 """
 
+import numpy as np
 import pytest
-from mud.base import DensityProblem, BayesProblem
-from mud.funs import wme
 from scipy.stats import distributions as ds
-import numpy as np
+
+from mud.base import BayesProblem, DensityProblem
+from mud.funs import wme
 
 
 @pytest.fixture
-def identity_1D_50_wme():
-    X = np.random.rand(100, 1)
-    num_observations = 50
-    y_pred = np.repeat(X, num_observations, 1)
-    y_true = 0.5
-    noise = 0.05
-    y_observed = y_true * np.ones(num_observations) + noise * np.random.randn(
-        num_observations
-    )
-    Y = wme(y_pred, y_observed, sd=noise)
-    return (X, Y)
+def dist_wo_weights():
+    class Dist:
+        @classmethod
+        def pdf(self, x, **kwargs):
+            return []
+
+    return Dist
+
+
+@pytest.fixture
+def problem_generator_identity_1D():
+    def identity_uniform_1D(
+        num_samples=2000, num_obs=20, y_true=0.5, noise=0.05, weights=None
+    ):
+        """
+        Sets up an inverse problem using the unit domain and uniform distribution
+        under an identity map. This is equivalent to studying a
+        \"steady state\" signal over time, or taking repeated measurements
+        of the same quantity to reduce variance in the uncertainty.
+        """
+        dist = ds.uniform(loc=0, scale=1)
+        X = dist.rvs(size=(num_samples, 1))
+        y_pred = np.repeat(X, num_obs, 1)
+        # data is truth + noise
+        y_observed = y_true * np.ones(num_obs) + noise * np.random.randn(num_obs)
+        Y = wme(y_pred, y_observed, sd=noise)
+        # analytical construction of predicted domain under identity map.
+        y_domain = np.repeat(np.array([[0], [1]]), num_obs, 1)
+        mn, mx = wme(y_domain, y_observed, sd=noise)
+        loc, scale = mn, mx - mn
+        dist = ds.uniform(loc=loc, scale=scale)
+
+        D = DensityProblem(X, Y, np.array([[0, 1]]), weights=weights)
+        D.set_predicted(dist)
+        return D
+
+    return identity_uniform_1D
 
 
 @pytest.fixture
-def identity_problem_mud_1D(identity_1D_50_wme):
-    X, Y = identity_1D_50_wme
-    return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]))
+def identity_problem_mud_1D(problem_generator_identity_1D):
+    return problem_generator_identity_1D()
 
 
 @pytest.fixture
@@ -44,22 +70,29 @@ def identity_problem_map_1D():
     y_observed = y_true * np.ones(num_observations) + noise * np.random.randn(
         num_observations
     )
-    B = BayesProblem(X, y_pred, np.array([[0, 1], [0, 1]]))
+    B = BayesProblem(X, y_pred, np.array([[0, 1]]))
     B.set_likelihood(ds.norm(loc=y_observed, scale=noise))
     return B
 
 
 @pytest.fixture
-def identity_problem_mud_1D_equal_weights(identity_1D_50_wme):
-    X, Y = identity_1D_50_wme
-    weights = np.ones(X.shape[0])
-    return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]), weights=weights)
+def identity_problem_mud_1D_equal_weights(problem_generator_identity_1D):
+    num_samples = 5000
+    return problem_generator_identity_1D(
+        num_samples=num_samples,
+        weights=np.ones(num_samples),
+    )
 
 
 @pytest.fixture
-def identity_problem_mud_1D_bias_weights(identity_1D_50_wme):
-    X, Y = identity_1D_50_wme
-    weights = np.ones(X.shape[0])
-    weights[X[:, 0] < 0.2] = 0.1
-    weights[X[:, 0] > 0.8] = 0.1
-    return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]), weights=weights)
+def identity_problem_mud_1D_bias_weights(problem_generator_identity_1D):
+    num_samples = 5000
+    weights = np.ones(num_samples)
+    D = problem_generator_identity_1D(
+        num_samples=num_samples,
+        weights=np.ones(num_samples),
+    )
+    weights[D.X[:, 0] < 0.2] = 0.1
+    weights[D.X[:, 0] > 0.8] = 0.1
+    D.set_weights(weights)
+    return D
diff --git a/tests/test_base.py b/tests/test_base.py
@@ -13,36 +13,11 @@ def test_identity_mud_problem_1D(identity_problem_mud_1D):
 
     # Act
     mud_point = D.estimate()
-    updated_density = D._up
-    ratio = D._r
-
-    # Assert
-    assert np.round(mud_point, 1) == 0.5
-    assert np.sum(updated_density) > 0
-    assert np.mean(ratio) > 0
-
-
-def test_we_can_set_weights_in_predicted(identity_problem_mud_1D_equal_weights):
-    """Mimicks existing usage in mud-examples"""
-    # Arrange
-    # weights were used for initialization
-    D = identity_problem_mud_1D_equal_weights
-    D.set_initial()  # domain has been set -> uniform as default
-    # want to make sure we can set weights on predicted
-    weights = np.random.rand(D._n_samples)
-    D.set_predicted(weights=weights)
-
-    # Act
-    mud_point = D.estimate()
-    updated_density = D._up
     ratio = D._r
 
     # Assert
-    # ensure weights were set correctly
-    assert np.linalg.norm(weights - D._weights) == 0
     assert np.round(mud_point, 1) == 0.5
-    assert np.sum(updated_density) > 0
-    assert np.mean(ratio) > 0
+    assert np.abs(np.mean(ratio) - 1) < 0.2
 
 
 def test_identity_mud_1D_with_equal_weights(identity_problem_mud_1D_equal_weights):
@@ -51,13 +26,11 @@ def test_identity_mud_1D_with_equal_weights(identity_problem_mud_1D_equal_weight
 
     # Act
     mud_point = D.estimate()
-    updated_density = D._up
     ratio = D._r
 
     # Assert
     assert np.round(mud_point, 1) == 0.5
-    assert np.sum(updated_density) > 0
-    assert np.mean(ratio) > 0
+    assert np.abs(np.mean(ratio) - 1) < 0.2
 
 
 def test_identity_mud_1D_with_biased_weights(identity_problem_mud_1D_bias_weights):
diff --git a/tests/test_funs.py b/tests/test_funs.py
@@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
 
 import unittest
-import mud.funs as mdf
+
 import numpy as np
 
+import mud.funs as mdf
+
 __author__ = "Mathematical Michael"
 __copyright__ = "Mathematical Michael"
 __license__ = "mit"
@@ -35,13 +37,14 @@ def test_solutions_with_orthogonal_map(self):
         err_map = sol_map - t
 
         # Assert
-        assert np.linalg.norm(err_mud) < 1e-8
-        assert np.linalg.norm(err_alt) < 1e-8
+        assert np.linalg.norm(err_mud) < 1e-6
+        assert np.linalg.norm(err_alt) < 1e-6
         assert np.linalg.norm(err_mud) < np.linalg.norm(err_map)
 
     def test_updated_cov_has_R_equal_zero_for_full_rank_A(self):
         up_cov = mdf.updated_cov(self.A, self.id, self.id)
-        assert np.linalg.norm(up_cov - np.linalg.inv(self.A.T @ self.A)) < 1e-6
+        absolute_error = np.linalg.norm(up_cov - np.linalg.inv(self.A.T @ self.A))
+        assert absolute_error / len(up_cov) < 1e-12
 
 
 class TestWME(unittest.TestCase):
diff --git a/tests/test_norm.py b/tests/test_norm.py
@@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
 
 import unittest
-import mud.norm as mdn
+
 import numpy as np
 
+import mud.norm as mdn
+
 __author__ = "Mathematical Michael"
 __copyright__ = "Mathematical Michael"
 __license__ = "mit"
diff --git a/tests/test_setting_predicted.py b/tests/test_setting_predicted.py
diff --git a/tests/test_util.py b/tests/test_util.py