From 60da0e0cf2ab94fcb11837f33ed904ef64b9ec5c Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Mon, 15 Jul 2024 14:38:51 -0400
Subject: [PATCH 01/25] added verbose flag to reduce print statements

---
 eks/command_line_args.py     |  6 ++++++
 eks/singlecam_smoother.py    | 11 ++++++-----
 scripts/singlecam_example.py |  4 +++-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/eks/command_line_args.py b/eks/command_line_args.py
index 9a46a04..090b9df 100644
--- a/eks/command_line_args.py
+++ b/eks/command_line_args.py
@@ -67,6 +67,12 @@ def handle_parse_args(script_type):
         default=[],
         type=parse_blocks,
     )
+    parser.add_argument(
+        '--verbose',
+        help='if set to true, displays smoothing parameter optimization iterations',
+        default='',
+        type=str,
+    )
     if script_type == 'singlecam':
         add_bodyparts(parser)
         add_s(parser)
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index a66aa8b..28f5f86 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -23,7 +23,8 @@
 def ensemble_kalman_smoother_singlecam(
         markers_3d_array, bodypart_list, smooth_param, s_frames, blocks=[],
         ensembling_mode='median',
-        zscore_threshold=2):
+        zscore_threshold=2,
+        verbose=False):
     """
     Perform Ensemble Kalman Smoothing on 3D marker data from a single camera.
 
@@ -59,7 +60,7 @@ def ensemble_kalman_smoother_singlecam(
     # Main smoothing function
     s_finals, ms, Vs = singlecam_optimize_smooth(
         cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
-        s_frames, smooth_param, blocks)
+        s_frames, smooth_param, blocks, verbose)
 
     y_m_smooths = np.zeros((n_keypoints, T, n_coords))
     y_v_smooths = np.zeros((n_keypoints, T, n_coords, n_coords))
@@ -205,7 +206,7 @@ def init_kalman(i, adjusted_x_obs, adjusted_y_obs):
 
 def singlecam_optimize_smooth(
         cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
-        s_frames, smooth_param, blocks=[], maxiter=1000):
+        s_frames, smooth_param, blocks=[], maxiter=1000, verbose=False):
     """
     Optimize smoothing parameter, and use the result to run the kalman filter-smoother
 
@@ -301,8 +302,8 @@ def step(s, opt_state):
                 start_time = time.time()
                 s_init, opt_state, loss = step(s_init, opt_state)
 
-                # if iteration % 10 == 0 or iteration == maxiter - 1:
-                #     print(f'Iteration {iteration}, Current loss: {loss}, Current s: {s_init}')
+                if verbose and iteration % 10 == 0 or iteration == maxiter - 1:
+                     print(f'Iteration {iteration}, Current loss: {loss}, Current s: {s_init}')
 
                 tol = 0.001 * jnp.abs(jnp.log(prev_loss))
                 if jnp.linalg.norm(loss - prev_loss) < tol + 1e-6:
diff --git a/scripts/singlecam_example.py b/scripts/singlecam_example.py
index 7c5ec4f..ff9277f 100644
--- a/scripts/singlecam_example.py
+++ b/scripts/singlecam_example.py
@@ -18,6 +18,7 @@
 s = args.s  # defaults to automatic optimization
 s_frames = args.s_frames  # frames to be used for automatic optimization (only if no --s flag)
 blocks = args.blocks
+verbose = True if args.verbose == 'True' else False
 
 
 # Load and format input files and prepare an empty DataFrame for output.
@@ -48,7 +49,8 @@
     bodypart_list,
     s,
     s_frames,
-    blocks
+    blocks,
+    verbose
 )
 
 keypoint_i = -1  # keypoint to be plotted

From 55b52c7b5f3a80ffbc56b2216376e909a7abb0db Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Mon, 15 Jul 2024 18:39:49 +0000
Subject: [PATCH 02/25] merging

---
 eks/utils.py | 2 +-
 setup.py     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/eks/utils.py b/eks/utils.py
index 8fdb484..8eb4976 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -267,4 +267,4 @@ def crop_frames(y, s_frames):
         result.append(y[start:end])
 
     # Concatenate all slices into a single numpy array
-    return np.concatenate(result)
\ No newline at end of file
+    return np.concatenate(result)
diff --git a/setup.py b/setup.py
index d4917b8..0096fc9 100644
--- a/setup.py
+++ b/setup.py
@@ -33,9 +33,9 @@ def get_version(rel_path):
     'scipy>=1.2.0',
     'tqdm',
     'typing',
-    'sleap_io'
-    'jax'
-    'jaxlib'
+    'sleap_io',
+    'jax',
+    'jaxlib',
 ]
 
 # additional requirements

From 40ab165275e15be343117f06d733676e7858db4a Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Mon, 15 Jul 2024 17:03:43 -0400
Subject: [PATCH 03/25] more verbose print removal

---
 eks/singlecam_smoother.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 28f5f86..4990c41 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -315,8 +315,10 @@ def step(s, opt_state):
                 prev_loss = loss
 
             s_final = jnp.exp(s_init)  # Convert back from log-space
+
             for b in block:
-                print(f's={s_final} for keypoint {b}')
+                if verbose:
+                    print(f's={s_final} for keypoint {b}')
                 s_finals.append(s_final)
 
     s_finals = np.array(s_finals)

From ac25be801adee69f3901979acdf91431802d9621 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Tue, 16 Jul 2024 15:32:05 -0400
Subject: [PATCH 04/25] more, more verbose print removal

---
 eks/singlecam_smoother.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 4990c41..2201385 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -233,12 +233,14 @@ def singlecam_optimize_smooth(
     if blocks == []:
         for n in range(n_keypoints):
             blocks.append([n])
-    print(f'Correlated keypoint blocks: {blocks}')
+    if verbose:
+        print(f'Correlated keypoint blocks: {blocks}')
 
     # Depending on whether we use GPU, choose parallel or sequential smoothing param optimization
     try:
         _ = jax.device_put(jax.numpy.ones(1), device=jax.devices('gpu')[0])
-        print("Using GPU")
+        if verbose:
+            print("Using GPU")
 
         @partial(jit)
         def nll_loss_parallel_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs):
@@ -248,7 +250,8 @@ def nll_loss_parallel_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs):
 
         loss_function = nll_loss_parallel_scan
     except:
-        print("Using CPU")
+        if verbose:
+            print("Using CPU")
 
         @partial(jit)
         def nll_loss_sequential_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs):

From d3e80c911fdcdee8764384cacbacbeddd63c573c Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Wed, 21 Aug 2024 17:01:46 +0000
Subject: [PATCH 05/25] outputs nll values

---
 eks/core.py               | 61 +++++++++++++++++++++++++++++++++++++++
 eks/singlecam_smoother.py | 18 ++++++++----
 eks/utils.py              |  2 +-
 3 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/eks/core.py b/eks/core.py
index 72f9622..ea70390 100644
--- a/eks/core.py
+++ b/eks/core.py
@@ -352,6 +352,36 @@ def kalman_filter_step(carry, curr_y):
     return (m_t, V_t, A, Q, C, R, nll_net), (m_t, V_t, nll_current)
 
 
+def kalman_filter_step_nlls(carry, curr_y):
+    m_prev, V_prev, A, Q, C, R, nll_net, nll_array, t = carry
+
+    # Predict
+    m_pred = jnp.dot(A, m_prev)
+    V_pred = jnp.dot(A, jnp.dot(V_prev, A.T)) + Q
+
+    # Update
+    innovation = curr_y - jnp.dot(C, m_pred)
+    innovation_cov = jnp.dot(C, jnp.dot(V_pred, C.T)) + R
+    K = jnp.dot(V_pred, jnp.dot(C.T, jnp.linalg.inv(innovation_cov)))
+    m_t = m_pred + jnp.dot(K, innovation)
+    V_t = V_pred - jnp.dot(K, jnp.dot(C, V_pred))
+
+    # Compute the negative log-likelihood for the current time step
+    nll_current = single_timestep_nll(innovation, innovation_cov)
+
+    # Accumulate the negative log-likelihood
+    nll_net = nll_net + nll_current
+
+    # Save the current NLL to the preallocated array
+    nll_array = nll_array.at[t].set(nll_current)
+
+    # Increment the time step
+    t = t + 1
+
+    # Return the updated state and outputs
+    return (m_t, V_t, A, Q, C, R, nll_net, nll_array, t), (m_t, V_t, nll_current)
+
+
 # Always run the sequential filter on CPU.
 # GPU will deploy individual kernels for each scan iteration, very slow.
 @partial(jit, backend='cpu')
@@ -381,6 +411,37 @@ def jax_forward_pass(y, m0, cov0, A, Q, C, R):
     return mfs, Vfs, nll_net
 
 
+def jax_forward_pass_nlls(y, m0, cov0, A, Q, C, R):
+    """
+    Kalman Filter for a single keypoint
+    (can be vectorized using vmap for handling multiple keypoints in parallel)
+    Parameters:
+        y: Shape (num_timepoints, observation_dimension).
+        m0: Shape (state_dim,). Initial state of system.
+        cov0: Shape (state_dim, state_dim). Initial covariance of state variable.
+        A: Shape (state_dim, state_dim). Process transition matrix.
+        Q: Shape (state_dim, state_dim). Process noise covariance matrix.
+        C: Shape (observation_dim, state_dim). Observation coefficient matrix.
+        R: Shape (observation_dim, observation_dim). Observation noise covar matrix.
+
+    Returns:
+        mfs: Shape (timepoints, state_dim). Mean filter state at each timepoint.
+        Vfs: Shape (timepoints, state_dim, state_dim). Covar for each filtered estimate.
+        nll_net: Shape (1,). Negative log likelihood observations -log (p(y_1, ..., y_T))
+        nll_array: Shape (num_timepoints,). Incremental negative log-likelihood at each timepoint.
+    """
+    # Initialize carry
+    num_timepoints = y.shape[0]
+    nll_array_init = jnp.zeros(num_timepoints)  # Preallocate an array with zeros
+    t_init = 0  # Initialize the time step counter
+    carry = (m0, cov0, A, Q, C, R, 0, nll_array_init, t_init)
+    carry, outputs = jax.lax.scan(kalman_filter_step_nlls, carry, y)
+    mfs, Vfs, _ = outputs
+    nll_net = carry[-3]  # Total NLL
+    nll_array = carry[-2]  # Array of incremental NLL values
+    return mfs, Vfs, nll_net, nll_array
+
+
 def kalman_smoother_step(carry, X):
     m_ahead_smooth, v_ahead_smooth, A, Q = carry
     m_curr_filter, v_curr_filter = X[0], X[1]
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 2201385..564827a 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -15,6 +15,7 @@
     jax_backward_pass,
     jax_ensemble,
     jax_forward_pass,
+    jax_forward_pass_nlls,
     pkf_and_loss,
 )
 from eks.utils import crop_frames, make_dlc_pandas_index
@@ -58,7 +59,7 @@ def ensemble_kalman_smoother_singlecam(
         scaled_ensemble_preds, adjusted_obs_dict, n_keypoints)
 
     # Main smoothing function
-    s_finals, ms, Vs = singlecam_optimize_smooth(
+    s_finals, ms, Vs, nlls = singlecam_optimize_smooth(
         cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
         s_frames, smooth_param, blocks, verbose)
 
@@ -83,11 +84,12 @@ def ensemble_kalman_smoother_singlecam(
                             ensemble_preds[:, k, :],
                             ensemble_vars[:, k, :],
                             min_ensemble_std=zscore_threshold)
+        nll = nlls[k]
 
         # Final Cleanup
         pdindex = make_dlc_pandas_index([bodypart_list[k]],
                                         labels=["x", "y", "likelihood", "x_var", "y_var",
-                                                "zscore"])
+                                                "zscore", "nll"])
         var = np.empty(y_m_smooths[k].T[0].shape)
         var[:] = np.nan
         pred_arr = np.vstack([
@@ -97,6 +99,7 @@ def ensemble_kalman_smoother_singlecam(
             y_v_smooths[k][:, 0, 0],
             y_v_smooths[k][:, 1, 1],
             zscore,
+            nll
         ]).T
         df = pd.DataFrame(pred_arr, columns=pdindex)
         dfs.append(df)
@@ -326,11 +329,11 @@ def step(s, opt_state):
 
     s_finals = np.array(s_finals)
     # Final smooth with optimized s
-    ms, Vs = final_forwards_backwards_pass(
+    ms, Vs, nlls = final_forwards_backwards_pass(
         cov_mats, s_finals,
         ys, m0s, S0s, Cs, As, Rs)
 
-    return s_finals, ms, Vs
+    return s_finals, ms, Vs, nlls
 
 
 ######
@@ -442,16 +445,19 @@ def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs):
     n_keypoints = ys.shape[0]
     ms_array = []
     Vs_array = []
+    nlls_array = []
     Qs = s[:, None, None] * process_cov
 
     # Run forward and backward pass for each keypoint
     for k in range(n_keypoints):
-        mf, Vf, nll = jax_forward_pass(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k])
+        mf, Vf, nll, nll_array = jax_forward_pass_nlls(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k])
         ms, Vs = jax_backward_pass(mf, Vf, As[k], Qs[k])
         ms_array.append(np.array(ms))
         Vs_array.append(np.array(Vs))
+        nlls_array.append(np.array(nll_array))
 
     smoothed_means = np.stack(ms_array, axis=0)
     smoothed_covariances = np.stack(Vs_array, axis=0)
+    nlls_final = np.stack(nlls_array, axis=0)
 
-    return smoothed_means, smoothed_covariances
+    return smoothed_means, smoothed_covariances, nlls_array
diff --git a/eks/utils.py b/eks/utils.py
index 46b0012..67530bc 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -179,7 +179,7 @@ def dataframe_to_csv(df, filename):
 
 def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df,
                               key_suffix=''):  # key_suffix only required for multi-camera setups
-    for coord in ['x', 'y', 'zscore']:
+    for coord in ['x', 'y', 'zscore', 'nll']:
         src_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}', coord)
         dst_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}' + key_suffix, coord)
         output_df.loc[:, dst_cols] = keypoint_df.loc[:, src_cols]

From cf99cacb721da1127b4cfb84bc58cf8a5f14ff49 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Wed, 21 Aug 2024 17:38:35 +0000
Subject: [PATCH 06/25] zscore threshold set

---
 eks/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/eks/core.py b/eks/core.py
index ea70390..7890eb5 100644
--- a/eks/core.py
+++ b/eks/core.py
@@ -673,7 +673,7 @@ def pkf_and_loss(y, m0, cov0, A, Q, C, R):
 # -------------------------------------------------------------------------------------
 
 
-def eks_zscore(eks_predictions, ensemble_means, ensemble_vars, min_ensemble_std=2):
+def eks_zscore(eks_predictions, ensemble_means, ensemble_vars, min_ensemble_std=1e-5):
     """Computes zscore between eks prediction and the ensemble for a single keypoint.
     Args:
         eks_predictions: list
@@ -683,7 +683,7 @@ def eks_zscore(eks_predictions, ensemble_means, ensemble_vars, min_ensemble_std=
         ensemble_vars: string
             Ensemble var for each coordinate (x and ys) for as single keypoint - (samples, 2)
         min_ensemble_std:
-            Minimum std threshold to reduce the effect of low ensemble std (default 2).
+            Minimum std threshold to reduce the effect of low ensemble std (default 1e-5).
     Returns:
         z_score
             z_score for each time point - (samples, 1)

From 8af094bf0686292942bf1cac2990da55be25b180 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Wed, 23 Oct 2024 19:16:40 -0400
Subject: [PATCH 07/25] eks scalar covariance inflation, initial pytest setup

---
 eks/singlecam_smoother.py        |   7 +-
 eks/utils.py                     | 119 +++++++++++++-
 eks/utils_saved.py               | 270 +++++++++++++++++++++++++++++++
 scripts/plotting_aeks.py         | 158 ++++++++++++++++++
 scripts/singlecam_example.py     |   2 +-
 tests/test_singlecam_smoother.py |  60 ++++++-
 6 files changed, 609 insertions(+), 7 deletions(-)
 create mode 100644 eks/utils_saved.py
 create mode 100644 scripts/plotting_aeks.py

diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 564827a..2886faa 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -209,7 +209,7 @@ def init_kalman(i, adjusted_x_obs, adjusted_y_obs):
 
 def singlecam_optimize_smooth(
         cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
-        s_frames, smooth_param, blocks=[], maxiter=1000, verbose=False):
+        s_frames, smooth_param, blocks=[], maxiter=1000, verbose=False, inflation_factor=1.1):
     """
     Optimize smoothing parameter, and use the result to run the kalman filter-smoother
 
@@ -225,6 +225,7 @@ def singlecam_optimize_smooth(
     s_frames (list): List of frames.
     smooth_param (float): Smoothing parameter.
     blocks (list): List of blocks.
+    inflation_factor (float): Inflation factor for the covariances (default = 1.1).
 
     Returns:
     tuple: Final smoothing parameters, smoothed means, smoothed covariances,
@@ -239,6 +240,10 @@ def singlecam_optimize_smooth(
     if verbose:
         print(f'Correlated keypoint blocks: {blocks}')
 
+    # Inflate the initial state covariance and process noise covariance matrices
+    S0s *= inflation_factor  # Inflating the initial state covariance
+    cov_mats *= inflation_factor  # Inflating the process noise covariance matrices
+
     # Depending on whether we use GPU, choose parallel or sequential smoothing param optimization
     try:
         _ = jax.device_put(jax.numpy.ones(1), device=jax.devices('gpu')[0])
diff --git a/eks/utils.py b/eks/utils.py
index 67530bc..0e34465 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -38,6 +38,40 @@ def convert_lp_dlc(df_lp, keypoint_names, model_name=None):
     return df_dlc
 
 
+def assign_identity_by_total_length(dfrow: pd.Series, fish_numbers: list) -> list:
+    """Assign fish identity based on total length, keeping numeric fish labels and returning them in sorted order."""
+    segments = ["mouth-head", "head-middle", "middle-tail"]
+
+    # Calculate total length for each fish
+    fish_lengths = {}
+    for fish_num in fish_numbers:
+        total_length = 0
+        for seg_key in segments:
+            try:
+                mouth_x = dfrow[f"{fish_num}_mouth_x"]
+                mouth_y = dfrow[f"{fish_num}_mouth_y"]
+                head_x = dfrow[f"{fish_num}_head_x"]
+                head_y = dfrow[f"{fish_num}_head_y"]
+                middle_x = dfrow[f"{fish_num}_middle_x"]
+                middle_y = dfrow[f"{fish_num}_middle_y"]
+                tail_x = dfrow[f"{fish_num}_tail_x"]
+                tail_y = dfrow[f"{fish_num}_tail_y"]
+
+                # Add segment lengths (mouth-head, head-middle, middle-tail)
+                total_length += np.sqrt((mouth_x - head_x) ** 2 + (mouth_y - head_y) ** 2)
+                total_length += np.sqrt((head_x - middle_x) ** 2 + (head_y - middle_y) ** 2)
+                total_length += np.sqrt((middle_x - tail_x) ** 2 + (middle_y - tail_y) ** 2)
+            except KeyError:
+                continue  # Skip fish if data for any segment is missing
+
+        fish_lengths[fish_num] = total_length
+
+    # Sort fish by total length in ascending order
+    sorted_fish = sorted(fish_lengths, key=fish_lengths.get)
+
+    return sorted_fish  # Return sorted fish numbers by size
+
+
 def convert_slp_dlc(base_dir, slp_file):
     # Read data from .slp file
     filepath = os.path.join(base_dir, slp_file)
@@ -46,7 +80,6 @@ def convert_slp_dlc(base_dir, slp_file):
     # Determine the maximum number of instances and keypoints
     max_instances = len(labels[0].instances)
     keypoint_names = [node.name for node in labels[0].instances[0].points.keys()]
-    print(keypoint_names)
     num_keypoints = len(keypoint_names)
 
     # Initialize a NumPy array to store the data
@@ -62,7 +95,7 @@ def convert_slp_dlc(base_dir, slp_file):
                 point = instance.points[keypoint_node]
                 data[i, j, k, 0] = point.x if not np.isnan(point.x) else 0
                 data[i, j, k, 1] = point.y if not np.isnan(point.y) else 0
-                data[i, j, k, 2] = point.score + 1e-6
+                data[i, j, k, 2] = getattr(point, 'score', 0) + 1e-6
 
     # Reshape data to 2D array for DataFrame creation
     reshaped_data = data.reshape(num_frames, -1)
@@ -75,11 +108,89 @@ def convert_slp_dlc(base_dir, slp_file):
 
     # Create DataFrame from the reshaped data
     df = pd.DataFrame(reshaped_data, columns=columns)
-    df.to_csv(f'{slp_file}.csv', index=False)
-    print(f"File read. See read-in data at {slp_file}.csv")
+
+    # Debug tracker to count frames with reassignment
+    reassignment_count = 0
+
+    # Process each row to assign fish identity by length and adjust both data and column names
+    fish_numbers = [1, 2, 3, 4]  # Assuming 4 fish
+    for idx, row in df.iterrows():
+        # Get identity mapping based on total length per frame
+        sorted_fish = assign_identity_by_total_length(row, fish_numbers)
+
+        # Check if reassignment is needed
+        if sorted_fish != fish_numbers:
+            reassignment_count += 1
+
+        # Prepare to store the reordered data
+        new_row = row.copy()
+
+        # For each fish (now sorted by size), move their data to the correct columns
+        for target_fish_num, actual_fish_num in enumerate(sorted_fish, start=1):
+            for keypoint_name in keypoint_names:
+                # Move x, y, and likelihood to the correct "target" fish position
+                new_row[f"{target_fish_num}_{keypoint_name}_x"] = row[
+                    f"{actual_fish_num}_{keypoint_name}_x"]
+                new_row[f"{target_fish_num}_{keypoint_name}_y"] = row[
+                    f"{actual_fish_num}_{keypoint_name}_y"]
+                new_row[f"{target_fish_num}_{keypoint_name}_likelihood"] = row[
+                    f"{actual_fish_num}_{keypoint_name}_likelihood"]
+
+        # Update the DataFrame with the reordered data for this frame
+        df.iloc[idx] = new_row
+
+    # Print total number of frames where reassignment took place
+    print(f"Total number of frames with reassignment: {reassignment_count}")
+
+    # Save the updated DataFrame to a CSV
+    df.to_csv(f'./data/fish-slp-new/{slp_file}_reassigned.csv', index=False)
+    print(f"File processed and saved as {slp_file}_reassigned.csv")
     return df
 
 
+# def convert_slp_dlc(base_dir, slp_file):
+#     # Read data from .slp file
+#     filepath = os.path.join(base_dir, slp_file)
+#     labels = read_labels(filepath)
+#
+#     # Determine the maximum number of instances and keypoints
+#     max_instances = len(labels[0].instances)
+#     keypoint_names = [node.name for node in labels[0].instances[0].points.keys()]
+#     print(keypoint_names)
+#     num_keypoints = len(keypoint_names)
+#
+#     # Initialize a NumPy array to store the data
+#     num_frames = len(labels.labeled_frames)
+#     data = np.zeros((num_frames, max_instances, num_keypoints, 3))  # 3 for x, y, likelihood
+#
+#     # Fill the NumPy array with data
+#     for i, labeled_frame in enumerate(labels.labeled_frames):
+#         for j, instance in enumerate(labeled_frame.instances):
+#             if j >= max_instances:
+#                 break
+#             for k, keypoint_node in enumerate(instance.points.keys()):
+#                 point = instance.points[keypoint_node]
+#                 data[i, j, k, 0] = point.x if not np.isnan(point.x) else 0
+#                 data[i, j, k, 1] = point.y if not np.isnan(point.y) else 0
+#                 # Check if 'score' exists, otherwise leave as 0
+#                 data[i, j, k, 2] = getattr(point, 'score', 0) + 1e-6
+#
+#     # Reshape data to 2D array for DataFrame creation
+#     reshaped_data = data.reshape(num_frames, -1)
+#     columns = []
+#     for j in range(max_instances):
+#         for keypoint_name in keypoint_names:
+#             columns.append(f"{j + 1}_{keypoint_name}_x")
+#             columns.append(f"{j + 1}_{keypoint_name}_y")
+#             columns.append(f"{j + 1}_{keypoint_name}_likelihood")
+#
+#     # Create DataFrame from the reshaped data
+#     df = pd.DataFrame(reshaped_data, columns=columns)
+#     df.to_csv(f'{slp_file}.csv', index=False)
+#     print(f"File read. See read-in data at {slp_file}.csv")
+#     return df
+
+
 def format_data(input_dir, data_type):
     input_files = os.listdir(input_dir)
     input_dfs_list = []
diff --git a/eks/utils_saved.py b/eks/utils_saved.py
new file mode 100644
index 0000000..19a023b
--- /dev/null
+++ b/eks/utils_saved.py
@@ -0,0 +1,270 @@
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from sleap_io.io.slp import read_labels
+
+
+def make_dlc_pandas_index(keypoint_names, labels=["x", "y", "likelihood"]):
+    pdindex = pd.MultiIndex.from_product(
+        [["ensemble-kalman_tracker"], keypoint_names, labels],
+        names=["scorer", "bodyparts", "coords"],
+    )
+    return pdindex
+
+
+def convert_lp_dlc(df_lp, keypoint_names, model_name=None):
+    df_dlc = {}
+    for feat in keypoint_names:
+        for feat2 in ['x', 'y', 'likelihood']:
+            try:
+                if model_name is None:
+                    col_tuple = (feat, feat2)
+                else:
+                    col_tuple = (model_name, feat, feat2)
+
+                # Skip columns with any unnamed level
+                if any(level.startswith('Unnamed') for level in col_tuple if
+                       isinstance(level, str)):
+                    continue
+
+                df_dlc[f'{feat}_{feat2}'] = df_lp.loc[:, col_tuple]
+            except KeyError:
+                # If the specified column does not exist, skip it
+                continue
+
+    df_dlc = pd.DataFrame(df_dlc, index=df_lp.index)
+    return df_dlc
+
+
+def convert_slp_dlc(base_dir, slp_file):
+    # Read data from .slp file
+    filepath = os.path.join(base_dir, slp_file)
+    labels = read_labels(filepath)
+
+    # Determine the maximum number of instances and keypoints
+    max_instances = len(labels[0].instances)
+    keypoint_names = [node.name for node in labels[0].instances[0].points.keys()]
+    print(keypoint_names)
+    num_keypoints = len(keypoint_names)
+
+    # Initialize a NumPy array to store the data
+    num_frames = len(labels.labeled_frames)
+    data = np.zeros((num_frames, max_instances, num_keypoints, 3))  # 3 for x, y, likelihood
+
+    # Fill the NumPy array with data
+    for i, labeled_frame in enumerate(labels.labeled_frames):
+        for j, instance in enumerate(labeled_frame.instances):
+            if j >= max_instances:
+                break
+            for k, keypoint_node in enumerate(instance.points.keys()):
+                point = instance.points[keypoint_node]
+                data[i, j, k, 0] = point.x if not np.isnan(point.x) else 0
+                data[i, j, k, 1] = point.y if not np.isnan(point.y) else 0
+                # Check if 'score' exists, otherwise leave as 0
+                data[i, j, k, 2] = getattr(point, 'score', 0) + 1e-6
+
+    # Reshape data to 2D array for DataFrame creation
+    reshaped_data = data.reshape(num_frames, -1)
+    columns = []
+    for j in range(max_instances):
+        for keypoint_name in keypoint_names:
+            columns.append(f"{j + 1}_{keypoint_name}_x")
+            columns.append(f"{j + 1}_{keypoint_name}_y")
+            columns.append(f"{j + 1}_{keypoint_name}_likelihood")
+
+    # Create DataFrame from the reshaped data
+    df = pd.DataFrame(reshaped_data, columns=columns)
+    df.to_csv(f'{slp_file}.csv', index=False)
+    print(f"File read. See read-in data at {slp_file}.csv")
+    return df
+
+
+def format_data(input_dir, data_type):
+    input_files = os.listdir(input_dir)
+    input_dfs_list = []
+    # Extracting markers from data
+    # Applies correct format conversion and stores each file's markers in a list
+    for input_file in input_files:
+
+        if data_type == 'slp':
+            if not input_file.endswith('.slp'):
+                continue
+            markers_curr = convert_slp_dlc(input_dir, input_file)
+            keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+            markers_curr_fmt = markers_curr
+        elif data_type == 'lp' or 'dlc':
+            if not input_file.endswith('csv'):
+                continue
+            markers_curr = pd.read_csv(
+                os.path.join(input_dir, input_file), header=[0, 1, 2], index_col=0)
+            keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+            model_name = markers_curr.columns[0][0]
+            if data_type == 'lp':
+                markers_curr_fmt = convert_lp_dlc(
+                    markers_curr, keypoint_names, model_name=model_name)
+            else:
+                markers_curr_fmt = markers_curr
+
+        # markers_curr_fmt.to_csv('fmt_input.csv', index=False)
+        input_dfs_list.append(markers_curr_fmt)
+
+    if len(input_dfs_list) == 0:
+        raise FileNotFoundError(f'No marker input files found in {input_dir}')
+
+    output_df = make_output_dataframe(markers_curr)
+    # returns both the formatted marker data and the empty dataframe for EKS output
+    return input_dfs_list, output_df, keypoint_names
+
+
+def make_output_dataframe(markers_curr):
+    ''' Makes empty DataFrame for EKS output '''
+    markers_eks = markers_curr.copy()
+
+    # Check if the columns Index is a MultiIndex
+    if isinstance(markers_eks.columns, pd.MultiIndex):
+        # Set the first level of the MultiIndex to 'ensemble-kalman_tracker'
+        markers_eks.columns = markers_eks.columns.set_levels(['ensemble-kalman_tracker'], level=0)
+    else:
+        # Convert the columns Index to a MultiIndex with three levels
+        new_columns = []
+
+        for col in markers_eks.columns:
+            # Extract instance number, keypoint name, and feature from the column name
+            parts = col.split('_')
+            instance_num = parts[0]
+            keypoint_name = '_'.join(parts[1:-1])  # Combine parts for keypoint name
+            feature = parts[-1]
+
+            # Construct new column names with desired MultiIndex structure
+            new_columns.append(
+                ('ensemble-kalman_tracker', f'{instance_num}_{keypoint_name}', feature))
+
+        # Convert the columns Index to a MultiIndex with three levels
+        markers_eks.columns = pd.MultiIndex.from_tuples(new_columns,
+                                                        names=['scorer', 'bodyparts', 'coords'])
+
+    # Iterate over columns and set values
+    for col in markers_eks.columns:
+        if col[-1] == 'likelihood':
+            # Set likelihood values to 1.0
+            markers_eks[col].values[:] = 1.0
+        else:
+            # Set other values to NaN
+            markers_eks[col].values[:] = np.nan
+
+    # Write DataFrame to CSV
+    # output_csv = 'output_dataframe.csv'
+    # dataframe_to_csv(markers_eks, output_csv)
+
+    return markers_eks
+
+
+def dataframe_to_csv(df, filename):
+    """
+    Converts a DataFrame to a CSV file.
+
+    Parameters:
+        df (pandas.DataFrame): The DataFrame to be converted.
+        filename (str): The name of the CSV file to be created.
+
+    Returns:
+        None
+    """
+    try:
+        df.to_csv(filename, index=False)
+    except Exception as e:
+        print("Error:", e)
+
+
+def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df,
+                              key_suffix=''):  # key_suffix only required for multi-camera setups
+    for coord in ['x', 'y', 'zscore', 'nll']:
+        src_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}', coord)
+        dst_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}' + key_suffix, coord)
+        output_df.loc[:, dst_cols] = keypoint_df.loc[:, src_cols]
+
+    return output_df
+
+
+def plot_results(output_df, input_dfs_list,
+                 key, s_final, nll_values, idxs, save_dir, smoother_type):
+    if nll_values is None:
+        fig, axes = plt.subplots(4, 1, figsize=(9, 10))
+    else:
+        fig, axes = plt.subplots(5, 1)
+
+    for ax, coord in zip(axes, ['x', 'y', 'likelihood', 'zscore']):
+        # Rename axes label for likelihood and zscore coordinates
+        if coord == 'likelihood':
+            ylabel = 'model likelihoods'
+        elif coord == 'zscore':
+            ylabel = 'EKS disagreement'
+        else:
+            ylabel = coord
+
+        # plot individual models
+        ax.set_ylabel(ylabel, fontsize=12)
+        if coord == 'zscore':
+            ax.plot(output_df.loc[slice(*idxs), ('ensemble-kalman_tracker', key, coord)],
+                    color='k', linewidth=2)
+            ax.set_xlabel('Time (frames)', fontsize=12)
+            continue
+        for m, markers_curr in enumerate(input_dfs_list):
+            ax.plot(
+                markers_curr.loc[slice(*idxs), key + f'_{coord}'], color=[0.5, 0.5, 0.5],
+                label='Individual models' if m == 0 else None,
+            )
+        # plot eks
+        if coord == 'likelihood':
+            continue
+        ax.plot(
+            output_df.loc[slice(*idxs), ('ensemble-kalman_tracker', key, coord)],
+            color='k', linewidth=2, label='EKS',
+        )
+        if coord == 'x':
+            ax.legend()
+
+        # Plot nll_values against the time axis
+        if nll_values is not None:
+            nll_values_subset = nll_values[idxs[0]:idxs[1]]
+            axes[-1].plot(range(*idxs), nll_values_subset, color='k', linewidth=2)
+            axes[-1].set_ylabel('EKS NLL', fontsize=12)
+
+    plt.suptitle(f'EKS results for {key}, smoothing = {s_final}', fontsize=14)
+    plt.tight_layout()
+    save_file = os.path.join(save_dir,
+                             f'{smoother_type}_{key}.pdf')
+    plt.savefig(save_file)
+    plt.close()
+    print(f'see example EKS output at {save_file}')
+
+
+def crop_frames(y, s_frames):
+    """ Crops frames as specified by s_frames to be used for auto-tuning s."""
+    # Create an empty list to store arrays
+    result = []
+
+    for frame in s_frames:
+        # Unpack the frame, setting defaults for empty start or end
+        start, end = frame
+        # Default start to 0 if not specified (and adjust for zero indexing)
+        start = start - 1 if start is not None else 0
+        # Default end to the length of ys if not specified
+        end = end if end is not None else len(y)
+
+        # Cap the indices within valid range
+        start = max(0, start)
+        end = min(len(y), end)
+
+        # Validate the keys
+        if start >= end:
+            raise ValueError(f"Index range ({start + 1}, {end}) "
+                             f"is out of bounds for the list of length {len(y)}.")
+
+        # Use numpy slicing to preserve the data structure
+        result.append(y[start:end])
+
+    # Concatenate all slices into a single numpy array
+    return np.concatenate(result)
diff --git a/scripts/plotting_aeks.py b/scripts/plotting_aeks.py
new file mode 100644
index 0000000..f22eff9
--- /dev/null
+++ b/scripts/plotting_aeks.py
@@ -0,0 +1,158 @@
+import copy
+import os
+import sys
+
+import cv2
+import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+sys.path.append(
+    os.path.abspath(os.path.join(os.path.dirname(__file__), '../../tracking-diagnostics')))
+
+from diagnostics.video import get_frames_from_idxs
+
+from eks.utils import convert_lp_dlc, format_data
+
+
+def format_data(ensemble_dir):
+    input_files = os.listdir(ensemble_dir)
+    markers_list = []
+    for input_file in input_files:
+        markers_curr = pd.read_csv(
+            os.path.join(ensemble_dir, input_file), header=[0, 1, 2], index_col=0)
+        keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+        model_name = markers_curr.columns[0][0]
+        markers_curr_fmt = convert_lp_dlc(
+            markers_curr, keypoint_names, model_name=model_name)
+        markers_curr_fmt.to_csv('fmt_input.csv', index=False)
+        markers_list.append(markers_curr_fmt)
+    return markers_list
+
+
+import os
+import subprocess
+
+
+def save_video(save_file, tmp_dir, framerate, frame_pattern='frame_%06d.jpeg'):
+    call_str = f'ffmpeg -r {framerate} -i {os.path.join(tmp_dir, frame_pattern)} -c:v libx264 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" {save_file}'
+
+    if os.name == 'nt':  # If the OS is Windows
+        subprocess.run(['ffmpeg', '-r', str(framerate), '-i', f'{tmp_dir}/frame_%06d.jpeg',
+                        '-c:v', 'libx264', '-vf', "pad=ceil(iw/2)*2:ceil(ih/2)*2",
+                        save_file],
+                       check=True)
+    else:  # If the OS is Unix/Linux
+        subprocess.run(['/bin/bash', '-c', call_str], check=True)
+
+
+# load eks
+eks_path = f'/eks/outputs/eks_test_vid.csv'
+markers_curr = pd.read_csv(eks_path, header=[0, 1, 2], index_col=0)
+keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+model_name = markers_curr.columns[0][0]
+eks_pd = convert_lp_dlc(markers_curr, keypoint_names, model_name)
+
+# load aeks
+eks_path = f'/eks/outputs/aeks_test_vid.csv'
+markers_curr = pd.read_csv(eks_path, header=[0, 1, 2], index_col=0)
+keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+model_name = markers_curr.columns[0][0]
+eks_pd2 = convert_lp_dlc(markers_curr, keypoint_names, model_name)
+
+# load ensembles
+ensemble_dir = f'/eks/data/mirror-mouse-aeks/expanded-networks'
+ensemble_pd_list = format_data(ensemble_dir)
+animal_ids = [1]
+body_parts = ['paw1LH_top', 'paw2LF_top', 'paw3RF_top', 'paw4RH_top', 'tailBase_top',
+              'tailMid_top', 'nose_top', 'obs_top',
+              'paw1LH_bot', 'paw2LF_bot', 'paw3RF_bot', 'paw4RH_bot', 'tailBase_bot',
+              'tailMid_bot', 'nose_bot', 'obsHigh_bot', 'obsLow_bot'
+              ]
+to_plot = []
+for animal_id in animal_ids:
+    for body_part in body_parts:
+        to_plot.append(body_part)
+
+save_path = '/eks/videos'
+video_name = 'test_vid.mp4'
+video_path = f'/eks/videos/{video_name}'
+cap = cv2.VideoCapture(video_path)
+
+start_frame = 0
+frame_idxs = None
+n_frames = 993
+idxs = np.arange(start_frame, start_frame + n_frames)
+framerate = 20
+
+
+def plot_video_markers(markers_pd, ax, n, body_part, color, alphas, markers, model_id=0,
+                       markersize=8):
+    x_key = body_part + '_x'
+    y_key = body_part + '_y'
+    markers_x = markers_pd[x_key][n]
+    markers_y = markers_pd[y_key][n]
+    ax.scatter(markers_x, markers_y, alpha=alphas[model_id], marker="o", color=color)
+
+
+colors = ['cyan', 'pink', 'purple']
+alphas = [.8] * len(ensemble_pd_list) + [1.0]
+markers = ['.'] * len(ensemble_pd_list) + ['x']
+model_labels = ['expanded-network rng0', 'eks', 'aeks']
+model_colors = colors
+fr = 60
+
+for body_part in to_plot:
+    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+    tmp_dir = os.path.join(save_path, f'tmp_{body_part}')
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+    save_file = os.path.join(save_path, f'test_vid_{body_part}.mp4')
+
+    txt_fr_kwargs = {
+        'fontsize': 14, 'color': [1, 1, 1], 'horizontalalignment': 'left',
+        'verticalalignment': 'top', 'fontname': 'monospace',
+        'bbox': dict(facecolor='k', alpha=0.25, edgecolor='none'),
+        'transform': ax.transAxes
+    }
+    save_imgs = True
+    if save_imgs:
+        markersize = 18
+    else:
+        markersize = 12
+    for idx in tqdm(range(len(idxs))):
+        n = idxs[idx]
+        ax.clear()
+        frame = get_frames_from_idxs(cap, [n])
+        ax.imshow(frame[0, 0], vmin=0, vmax=255, cmap='gray')
+        ax.set_xticks([])
+        ax.set_yticks([])
+        patches = []
+        # ensemble
+        for model_id, markers_pd in enumerate(ensemble_pd_list):
+            markers_pd_copy = copy.deepcopy(markers_pd)
+            plot_video_markers(markers_pd_copy, ax, n, body_part, colors[0], alphas, markers,
+                               model_id=model_id, markersize=markersize)
+        # eks_ind
+        for model_id, markers_pd in enumerate([eks_pd]):
+            markers_pd_copy = copy.deepcopy(markers_pd)
+            plot_video_markers(markers_pd_copy, ax, n, body_part, colors[1], alphas, markers,
+                               model_id=model_id, markersize=markersize)
+        # eks_cdnm
+        for model_id, markers_pd in enumerate([eks_pd2]):
+            markers_pd_copy = copy.deepcopy(markers_pd)
+            plot_video_markers(markers_pd_copy, ax, n, body_part, colors[2], alphas, markers,
+                               model_id=model_id, markersize=markersize)
+        # legend
+        for i, model_label in enumerate(model_labels):
+            patches.append(mpatches.Patch(color=model_colors[i], label=model_label))
+        ax.legend(handles=patches, prop={'size': 12}, loc='upper right')
+        im = ax.text(0.02, 0.98, f'frame {n}', **txt_fr_kwargs)
+        plt.savefig(os.path.join(tmp_dir, 'frame_%06d.jpeg' % idx))
+    save_video(save_file, tmp_dir, framerate, frame_pattern='frame_%06d.jpeg')
+    # Clean up temporary directory
+    for file in os.listdir(tmp_dir):
+        os.remove(os.path.join(tmp_dir, file))
+    os.rmdir(tmp_dir)
diff --git a/scripts/singlecam_example.py b/scripts/singlecam_example.py
index ff9277f..e94d3a4 100644
--- a/scripts/singlecam_example.py
+++ b/scripts/singlecam_example.py
@@ -50,7 +50,7 @@
     s,
     s_frames,
     blocks,
-    verbose
+    verbose=verbose
 )
 
 keypoint_i = -1  # keypoint to be plotted
diff --git a/tests/test_singlecam_smoother.py b/tests/test_singlecam_smoother.py
index c720ec8..9753858 100644
--- a/tests/test_singlecam_smoother.py
+++ b/tests/test_singlecam_smoother.py
@@ -1,5 +1,63 @@
+import pytest
+import numpy as np
+import pandas as pd
+from eks.singlecam_smoother import ensemble_kalman_smoother_singlecam
 
 
+# Function to generate simulated data
+def simulate_marker_data():
+    np.random.seed(0)
+    num_frames = 100
+    num_keypoints = 5
+    markers_3d_array = np.random.randn(num_frames, num_frames,
+                                       num_keypoints * 3)  # Simulating 3D data for keypoints
+    bodypart_list = [f'bodypart_{i}' for i in range(num_keypoints)]
+    smooth_param = 0.1
+    s_frames = list(range(num_frames))
+    blocks = []
+    ensembling_mode = 'median'
+    zscore_threshold = 2
+    return markers_3d_array, bodypart_list, smooth_param, s_frames, blocks, ensembling_mode, zscore_threshold
+
+
+# Function to generate random likelihoods
+def generate_random_likelihoods(num_frames, num_keypoints):
+    return np.random.rand(num_frames, num_keypoints)
+
+
+# Test function for the ensemble Kalman smoother
 def test_ensemble_kalman_smoother_singlecam():
+    markers_3d_array, bodypart_list, smooth_param, s_frames, blocks, ensembling_mode, zscore_threshold = simulate_marker_data()
+
+    # Add random likelihoods to the simulated data
+    likelihoods = generate_random_likelihoods(markers_3d_array.shape[0],
+                                              markers_3d_array.shape[2] // 3)
+
+    # Call the smoother function
+    df_dicts, s_finals = ensemble_kalman_smoother_singlecam(
+        markers_3d_array, bodypart_list, smooth_param, s_frames, blocks,
+        ensembling_mode, zscore_threshold)
+
+    # Basic checks to ensure the function runs and returns expected types
+    assert isinstance(df_dicts, list), "Expected df_dicts to be a list"
+    assert all(
+        isinstance(d, dict) for d in df_dicts), "Expected elements of df_dicts to be dictionaries"
+    assert isinstance(s_finals, (list, np.ndarray)), "Expected s_finals to be a list or an ndarray"
+
+    # Additional checks can include verifying contents of the dataframes
+    for df_dict in df_dicts:
+        for key, df in df_dict.items():
+            assert isinstance(df, pd.DataFrame), f"Expected {key} to be a pandas DataFrame"
+            #add more detailed checks here
+            assert 'likelihood' in df.columns.get_level_values(
+                1), "Expected 'likelihood' in DataFrame columns"
+            assert 'x_var' in df.columns.get_level_values(
+                1), "Expected 'x_var' in DataFrame columns"
+            assert 'y_var' in df.columns.get_level_values(
+                1), "Expected 'y_var' in DataFrame columns"
+            assert 'zscore' in df.columns.get_level_values(
+                1), "Expected 'zscore' in DataFrame columns"
+
 
-    from eks.singlecam_smoother import ensemble_kalman_smoother_singlecam
+if __name__ == "__main__":
+    pytest.main()

From aa94ac320911ad4fc6c2d1e89aae5f2d2a3645b5 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Wed, 23 Oct 2024 19:18:18 -0400
Subject: [PATCH 08/25] removed SLEAP fish workaround

---
 eks/utils.py       | 118 +-------------------
 eks/utils_saved.py | 270 ---------------------------------------------
 2 files changed, 4 insertions(+), 384 deletions(-)
 delete mode 100644 eks/utils_saved.py

diff --git a/eks/utils.py b/eks/utils.py
index 0e34465..19a023b 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -38,40 +38,6 @@ def convert_lp_dlc(df_lp, keypoint_names, model_name=None):
     return df_dlc
 
 
-def assign_identity_by_total_length(dfrow: pd.Series, fish_numbers: list) -> list:
-    """Assign fish identity based on total length, keeping numeric fish labels and returning them in sorted order."""
-    segments = ["mouth-head", "head-middle", "middle-tail"]
-
-    # Calculate total length for each fish
-    fish_lengths = {}
-    for fish_num in fish_numbers:
-        total_length = 0
-        for seg_key in segments:
-            try:
-                mouth_x = dfrow[f"{fish_num}_mouth_x"]
-                mouth_y = dfrow[f"{fish_num}_mouth_y"]
-                head_x = dfrow[f"{fish_num}_head_x"]
-                head_y = dfrow[f"{fish_num}_head_y"]
-                middle_x = dfrow[f"{fish_num}_middle_x"]
-                middle_y = dfrow[f"{fish_num}_middle_y"]
-                tail_x = dfrow[f"{fish_num}_tail_x"]
-                tail_y = dfrow[f"{fish_num}_tail_y"]
-
-                # Add segment lengths (mouth-head, head-middle, middle-tail)
-                total_length += np.sqrt((mouth_x - head_x) ** 2 + (mouth_y - head_y) ** 2)
-                total_length += np.sqrt((head_x - middle_x) ** 2 + (head_y - middle_y) ** 2)
-                total_length += np.sqrt((middle_x - tail_x) ** 2 + (middle_y - tail_y) ** 2)
-            except KeyError:
-                continue  # Skip fish if data for any segment is missing
-
-        fish_lengths[fish_num] = total_length
-
-    # Sort fish by total length in ascending order
-    sorted_fish = sorted(fish_lengths, key=fish_lengths.get)
-
-    return sorted_fish  # Return sorted fish numbers by size
-
-
 def convert_slp_dlc(base_dir, slp_file):
     # Read data from .slp file
     filepath = os.path.join(base_dir, slp_file)
@@ -80,6 +46,7 @@ def convert_slp_dlc(base_dir, slp_file):
     # Determine the maximum number of instances and keypoints
     max_instances = len(labels[0].instances)
     keypoint_names = [node.name for node in labels[0].instances[0].points.keys()]
+    print(keypoint_names)
     num_keypoints = len(keypoint_names)
 
     # Initialize a NumPy array to store the data
@@ -95,6 +62,7 @@ def convert_slp_dlc(base_dir, slp_file):
                 point = instance.points[keypoint_node]
                 data[i, j, k, 0] = point.x if not np.isnan(point.x) else 0
                 data[i, j, k, 1] = point.y if not np.isnan(point.y) else 0
+                # Check if 'score' exists, otherwise leave as 0
                 data[i, j, k, 2] = getattr(point, 'score', 0) + 1e-6
 
     # Reshape data to 2D array for DataFrame creation
@@ -108,89 +76,11 @@ def convert_slp_dlc(base_dir, slp_file):
 
     # Create DataFrame from the reshaped data
     df = pd.DataFrame(reshaped_data, columns=columns)
-
-    # Debug tracker to count frames with reassignment
-    reassignment_count = 0
-
-    # Process each row to assign fish identity by length and adjust both data and column names
-    fish_numbers = [1, 2, 3, 4]  # Assuming 4 fish
-    for idx, row in df.iterrows():
-        # Get identity mapping based on total length per frame
-        sorted_fish = assign_identity_by_total_length(row, fish_numbers)
-
-        # Check if reassignment is needed
-        if sorted_fish != fish_numbers:
-            reassignment_count += 1
-
-        # Prepare to store the reordered data
-        new_row = row.copy()
-
-        # For each fish (now sorted by size), move their data to the correct columns
-        for target_fish_num, actual_fish_num in enumerate(sorted_fish, start=1):
-            for keypoint_name in keypoint_names:
-                # Move x, y, and likelihood to the correct "target" fish position
-                new_row[f"{target_fish_num}_{keypoint_name}_x"] = row[
-                    f"{actual_fish_num}_{keypoint_name}_x"]
-                new_row[f"{target_fish_num}_{keypoint_name}_y"] = row[
-                    f"{actual_fish_num}_{keypoint_name}_y"]
-                new_row[f"{target_fish_num}_{keypoint_name}_likelihood"] = row[
-                    f"{actual_fish_num}_{keypoint_name}_likelihood"]
-
-        # Update the DataFrame with the reordered data for this frame
-        df.iloc[idx] = new_row
-
-    # Print total number of frames where reassignment took place
-    print(f"Total number of frames with reassignment: {reassignment_count}")
-
-    # Save the updated DataFrame to a CSV
-    df.to_csv(f'./data/fish-slp-new/{slp_file}_reassigned.csv', index=False)
-    print(f"File processed and saved as {slp_file}_reassigned.csv")
+    df.to_csv(f'{slp_file}.csv', index=False)
+    print(f"File read. See read-in data at {slp_file}.csv")
     return df
 
 
-# def convert_slp_dlc(base_dir, slp_file):
-#     # Read data from .slp file
-#     filepath = os.path.join(base_dir, slp_file)
-#     labels = read_labels(filepath)
-#
-#     # Determine the maximum number of instances and keypoints
-#     max_instances = len(labels[0].instances)
-#     keypoint_names = [node.name for node in labels[0].instances[0].points.keys()]
-#     print(keypoint_names)
-#     num_keypoints = len(keypoint_names)
-#
-#     # Initialize a NumPy array to store the data
-#     num_frames = len(labels.labeled_frames)
-#     data = np.zeros((num_frames, max_instances, num_keypoints, 3))  # 3 for x, y, likelihood
-#
-#     # Fill the NumPy array with data
-#     for i, labeled_frame in enumerate(labels.labeled_frames):
-#         for j, instance in enumerate(labeled_frame.instances):
-#             if j >= max_instances:
-#                 break
-#             for k, keypoint_node in enumerate(instance.points.keys()):
-#                 point = instance.points[keypoint_node]
-#                 data[i, j, k, 0] = point.x if not np.isnan(point.x) else 0
-#                 data[i, j, k, 1] = point.y if not np.isnan(point.y) else 0
-#                 # Check if 'score' exists, otherwise leave as 0
-#                 data[i, j, k, 2] = getattr(point, 'score', 0) + 1e-6
-#
-#     # Reshape data to 2D array for DataFrame creation
-#     reshaped_data = data.reshape(num_frames, -1)
-#     columns = []
-#     for j in range(max_instances):
-#         for keypoint_name in keypoint_names:
-#             columns.append(f"{j + 1}_{keypoint_name}_x")
-#             columns.append(f"{j + 1}_{keypoint_name}_y")
-#             columns.append(f"{j + 1}_{keypoint_name}_likelihood")
-#
-#     # Create DataFrame from the reshaped data
-#     df = pd.DataFrame(reshaped_data, columns=columns)
-#     df.to_csv(f'{slp_file}.csv', index=False)
-#     print(f"File read. See read-in data at {slp_file}.csv")
-#     return df
-
-
 def format_data(input_dir, data_type):
     input_files = os.listdir(input_dir)
     input_dfs_list = []
diff --git a/eks/utils_saved.py b/eks/utils_saved.py
deleted file mode 100644
index 19a023b..0000000
--- a/eks/utils_saved.py
+++ /dev/null
@@ -1,270 +0,0 @@
-import os
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from sleap_io.io.slp import read_labels
-
-
-def make_dlc_pandas_index(keypoint_names, labels=["x", "y", "likelihood"]):
-    pdindex = pd.MultiIndex.from_product(
-        [["ensemble-kalman_tracker"], keypoint_names, labels],
-        names=["scorer", "bodyparts", "coords"],
-    )
-    return pdindex
-
-
-def convert_lp_dlc(df_lp, keypoint_names, model_name=None):
-    df_dlc = {}
-    for feat in keypoint_names:
-        for feat2 in ['x', 'y', 'likelihood']:
-            try:
-                if model_name is None:
-                    col_tuple = (feat, feat2)
-                else:
-                    col_tuple = (model_name, feat, feat2)
-
-                # Skip columns with any unnamed level
-                if any(level.startswith('Unnamed') for level in col_tuple if
-                       isinstance(level, str)):
-                    continue
-
-                df_dlc[f'{feat}_{feat2}'] = df_lp.loc[:, col_tuple]
-            except KeyError:
-                # If the specified column does not exist, skip it
-                continue
-
-    df_dlc = pd.DataFrame(df_dlc, index=df_lp.index)
-    return df_dlc
-
-
-def convert_slp_dlc(base_dir, slp_file):
-    # Read data from .slp file
-    filepath = os.path.join(base_dir, slp_file)
-    labels = read_labels(filepath)
-
-    # Determine the maximum number of instances and keypoints
-    max_instances = len(labels[0].instances)
-    keypoint_names = [node.name for node in labels[0].instances[0].points.keys()]
-    print(keypoint_names)
-    num_keypoints = len(keypoint_names)
-
-    # Initialize a NumPy array to store the data
-    num_frames = len(labels.labeled_frames)
-    data = np.zeros((num_frames, max_instances, num_keypoints, 3))  # 3 for x, y, likelihood
-
-    # Fill the NumPy array with data
-    for i, labeled_frame in enumerate(labels.labeled_frames):
-        for j, instance in enumerate(labeled_frame.instances):
-            if j >= max_instances:
-                break
-            for k, keypoint_node in enumerate(instance.points.keys()):
-                point = instance.points[keypoint_node]
-                data[i, j, k, 0] = point.x if not np.isnan(point.x) else 0
-                data[i, j, k, 1] = point.y if not np.isnan(point.y) else 0
-                # Check if 'score' exists, otherwise leave as 0
-                data[i, j, k, 2] = getattr(point, 'score', 0) + 1e-6
-
-    # Reshape data to 2D array for DataFrame creation
-    reshaped_data = data.reshape(num_frames, -1)
-    columns = []
-    for j in range(max_instances):
-        for keypoint_name in keypoint_names:
-            columns.append(f"{j + 1}_{keypoint_name}_x")
-            columns.append(f"{j + 1}_{keypoint_name}_y")
-            columns.append(f"{j + 1}_{keypoint_name}_likelihood")
-
-    # Create DataFrame from the reshaped data
-    df = pd.DataFrame(reshaped_data, columns=columns)
-    df.to_csv(f'{slp_file}.csv', index=False)
-    print(f"File read. See read-in data at {slp_file}.csv")
-    return df
-
-
-def format_data(input_dir, data_type):
-    input_files = os.listdir(input_dir)
-    input_dfs_list = []
-    # Extracting markers from data
-    # Applies correct format conversion and stores each file's markers in a list
-    for input_file in input_files:
-
-        if data_type == 'slp':
-            if not input_file.endswith('.slp'):
-                continue
-            markers_curr = convert_slp_dlc(input_dir, input_file)
-            keypoint_names = [c[1] for c in markers_curr.columns[::3]]
-            markers_curr_fmt = markers_curr
-        elif data_type == 'lp' or 'dlc':
-            if not input_file.endswith('csv'):
-                continue
-            markers_curr = pd.read_csv(
-                os.path.join(input_dir, input_file), header=[0, 1, 2], index_col=0)
-            keypoint_names = [c[1] for c in markers_curr.columns[::3]]
-            model_name = markers_curr.columns[0][0]
-            if data_type == 'lp':
-                markers_curr_fmt = convert_lp_dlc(
-                    markers_curr, keypoint_names, model_name=model_name)
-            else:
-                markers_curr_fmt = markers_curr
-
-        # markers_curr_fmt.to_csv('fmt_input.csv', index=False)
-        input_dfs_list.append(markers_curr_fmt)
-
-    if len(input_dfs_list) == 0:
-        raise FileNotFoundError(f'No marker input files found in {input_dir}')
-
-    output_df = make_output_dataframe(markers_curr)
-    # returns both the formatted marker data and the empty dataframe for EKS output
-    return input_dfs_list, output_df, keypoint_names
-
-
-def make_output_dataframe(markers_curr):
-    ''' Makes empty DataFrame for EKS output '''
-    markers_eks = markers_curr.copy()
-
-    # Check if the columns Index is a MultiIndex
-    if isinstance(markers_eks.columns, pd.MultiIndex):
-        # Set the first level of the MultiIndex to 'ensemble-kalman_tracker'
-        markers_eks.columns = markers_eks.columns.set_levels(['ensemble-kalman_tracker'], level=0)
-    else:
-        # Convert the columns Index to a MultiIndex with three levels
-        new_columns = []
-
-        for col in markers_eks.columns:
-            # Extract instance number, keypoint name, and feature from the column name
-            parts = col.split('_')
-            instance_num = parts[0]
-            keypoint_name = '_'.join(parts[1:-1])  # Combine parts for keypoint name
-            feature = parts[-1]
-
-            # Construct new column names with desired MultiIndex structure
-            new_columns.append(
-                ('ensemble-kalman_tracker', f'{instance_num}_{keypoint_name}', feature))
-
-        # Convert the columns Index to a MultiIndex with three levels
-        markers_eks.columns = pd.MultiIndex.from_tuples(new_columns,
-                                                        names=['scorer', 'bodyparts', 'coords'])
-
-    # Iterate over columns and set values
-    for col in markers_eks.columns:
-        if col[-1] == 'likelihood':
-            # Set likelihood values to 1.0
-            markers_eks[col].values[:] = 1.0
-        else:
-            # Set other values to NaN
-            markers_eks[col].values[:] = np.nan
-
-    # Write DataFrame to CSV
-    # output_csv = 'output_dataframe.csv'
-    # dataframe_to_csv(markers_eks, output_csv)
-
-    return markers_eks
-
-
-def dataframe_to_csv(df, filename):
-    """
-    Converts a DataFrame to a CSV file.
-
-    Parameters:
-        df (pandas.DataFrame): The DataFrame to be converted.
-        filename (str): The name of the CSV file to be created.
-
-    Returns:
-        None
-    """
-    try:
-        df.to_csv(filename, index=False)
-    except Exception as e:
-        print("Error:", e)
-
-
-def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df,
-                              key_suffix=''):  # key_suffix only required for multi-camera setups
-    for coord in ['x', 'y', 'zscore', 'nll']:
-        src_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}', coord)
-        dst_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}' + key_suffix, coord)
-        output_df.loc[:, dst_cols] = keypoint_df.loc[:, src_cols]
-
-    return output_df
-
-
-def plot_results(output_df, input_dfs_list,
-                 key, s_final, nll_values, idxs, save_dir, smoother_type):
-    if nll_values is None:
-        fig, axes = plt.subplots(4, 1, figsize=(9, 10))
-    else:
-        fig, axes = plt.subplots(5, 1)
-
-    for ax, coord in zip(axes, ['x', 'y', 'likelihood', 'zscore']):
-        # Rename axes label for likelihood and zscore coordinates
-        if coord == 'likelihood':
-            ylabel = 'model likelihoods'
-        elif coord == 'zscore':
-            ylabel = 'EKS disagreement'
-        else:
-            ylabel = coord
-
-        # plot individual models
-        ax.set_ylabel(ylabel, fontsize=12)
-        if coord == 'zscore':
-            ax.plot(output_df.loc[slice(*idxs), ('ensemble-kalman_tracker', key, coord)],
-                    color='k', linewidth=2)
-            ax.set_xlabel('Time (frames)', fontsize=12)
-            continue
-        for m, markers_curr in enumerate(input_dfs_list):
-            ax.plot(
-                markers_curr.loc[slice(*idxs), key + f'_{coord}'], color=[0.5, 0.5, 0.5],
-                label='Individual models' if m == 0 else None,
-            )
-        # plot eks
-        if coord == 'likelihood':
-            continue
-        ax.plot(
-            output_df.loc[slice(*idxs), ('ensemble-kalman_tracker', key, coord)],
-            color='k', linewidth=2, label='EKS',
-        )
-        if coord == 'x':
-            ax.legend()
-
-        # Plot nll_values against the time axis
-        if nll_values is not None:
-            nll_values_subset = nll_values[idxs[0]:idxs[1]]
-            axes[-1].plot(range(*idxs), nll_values_subset, color='k', linewidth=2)
-            axes[-1].set_ylabel('EKS NLL', fontsize=12)
-
-    plt.suptitle(f'EKS results for {key}, smoothing = {s_final}', fontsize=14)
-    plt.tight_layout()
-    save_file = os.path.join(save_dir,
-                             f'{smoother_type}_{key}.pdf')
-    plt.savefig(save_file)
-    plt.close()
-    print(f'see example EKS output at {save_file}')
-
-
-def crop_frames(y, s_frames):
-    """ Crops frames as specified by s_frames to be used for auto-tuning s."""
-    # Create an empty list to store arrays
-    result = []
-
-    for frame in s_frames:
-        # Unpack the frame, setting defaults for empty start or end
-        start, end = frame
-        # Default start to 0 if not specified (and adjust for zero indexing)
-        start = start - 1 if start is not None else 0
-        # Default end to the length of ys if not specified
-        end = end if end is not None else len(y)
-
-        # Cap the indices within valid range
-        start = max(0, start)
-        end = min(len(y), end)
-
-        # Validate the keys
-        if start >= end:
-            raise ValueError(f"Index range ({start + 1}, {end}) "
-                             f"is out of bounds for the list of length {len(y)}.")
-
-        # Use numpy slicing to preserve the data structure
-        result.append(y[start:end])
-
-    # Concatenate all slices into a single numpy array
-    return np.concatenate(result)

From 6900f65bd01054a008ac444180c7440b8407edc3 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Wed, 23 Oct 2024 23:57:52 +0000
Subject: [PATCH 09/25] merge

---
 eks/core.py               | 2 +-
 eks/singlecam_smoother.py | 7 ++++---
 eks/utils.py              | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/eks/core.py b/eks/core.py
index 7890eb5..91b1e9d 100644
--- a/eks/core.py
+++ b/eks/core.py
@@ -698,7 +698,7 @@ def eks_zscore(eks_predictions, ensemble_means, ensemble_vars, min_ensemble_std=
     thresh_ensemble_std = ensemble_std.copy()
     thresh_ensemble_std[thresh_ensemble_std < min_ensemble_std] = min_ensemble_std
     z_score = num / thresh_ensemble_std
-    return z_score
+    return z_score, ensemble_std
 
 
 def compute_covariance_matrix(ensemble_preds):
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 564827a..2d228d5 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -80,7 +80,7 @@ def ensemble_kalman_smoother_singlecam(
         eks_preds_array[k] = y_m_smooths[k].copy()
         eks_preds_array[k] = np.asarray([eks_preds_array[k].T[0] + mean_x_obs,
                                          eks_preds_array[k].T[1] + mean_y_obs]).T
-        zscore = eks_zscore(eks_preds_array[k],
+        zscore, ensemble_std = eks_zscore(eks_preds_array[k],
                             ensemble_preds[:, k, :],
                             ensemble_vars[:, k, :],
                             min_ensemble_std=zscore_threshold)
@@ -89,7 +89,7 @@ def ensemble_kalman_smoother_singlecam(
         # Final Cleanup
         pdindex = make_dlc_pandas_index([bodypart_list[k]],
                                         labels=["x", "y", "likelihood", "x_var", "y_var",
-                                                "zscore", "nll"])
+                                                "zscore", "nll", "ensemble_std"])
         var = np.empty(y_m_smooths[k].T[0].shape)
         var[:] = np.nan
         pred_arr = np.vstack([
@@ -99,7 +99,8 @@ def ensemble_kalman_smoother_singlecam(
             y_v_smooths[k][:, 0, 0],
             y_v_smooths[k][:, 1, 1],
             zscore,
-            nll
+            nll,
+            ensemble_std
         ]).T
         df = pd.DataFrame(pred_arr, columns=pdindex)
         dfs.append(df)
diff --git a/eks/utils.py b/eks/utils.py
index 67530bc..681f5f7 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -179,7 +179,7 @@ def dataframe_to_csv(df, filename):
 
 def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df,
                               key_suffix=''):  # key_suffix only required for multi-camera setups
-    for coord in ['x', 'y', 'zscore', 'nll']:
+    for coord in ['x', 'y', 'zscore', 'nll', 'ensemble_std']:
         src_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}', coord)
         dst_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}' + key_suffix, coord)
         output_df.loc[:, dst_cols] = keypoint_df.loc[:, src_cols]

From 167a655566751a14a0711050886315cecf01623c Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Mon, 28 Oct 2024 12:57:06 -0400
Subject: [PATCH 10/25] added posterior var to eks output csvs

---
 eks/utils.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/eks/utils.py b/eks/utils.py
index 19a023b..35f6789 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -119,7 +119,7 @@ def format_data(input_dir, data_type):
 
 
 def make_output_dataframe(markers_curr):
-    ''' Makes empty DataFrame for EKS output '''
+    ''' Makes empty DataFrame for EKS output, including x_var and y_var '''
     markers_eks = markers_curr.copy()
 
     # Check if the columns Index is a MultiIndex
@@ -145,19 +145,18 @@ def make_output_dataframe(markers_curr):
         markers_eks.columns = pd.MultiIndex.from_tuples(new_columns,
                                                         names=['scorer', 'bodyparts', 'coords'])
 
-    # Iterate over columns and set values
+    # Iterate over columns and set initial values for likelihood and variance
     for col in markers_eks.columns:
         if col[-1] == 'likelihood':
             # Set likelihood values to 1.0
             markers_eks[col].values[:] = 1.0
+        elif col[-1] in ['x_var', 'y_var']:
+            # Set x_var and y_var to NaN to indicate that they need to be filled with variance values
+            markers_eks[col].values[:] = np.nan
         else:
             # Set other values to NaN
             markers_eks[col].values[:] = np.nan
 
-    # Write DataFrame to CSV
-    # output_csv = 'output_dataframe.csv'
-    # dataframe_to_csv(markers_eks, output_csv)
-
     return markers_eks
 
 
@@ -178,9 +177,9 @@ def dataframe_to_csv(df, filename):
         print("Error:", e)
 
 
-def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df,
-                              key_suffix=''):  # key_suffix only required for multi-camera setups
-    for coord in ['x', 'y', 'zscore', 'nll']:
+def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df, key_suffix=''):
+    # Include 'x', 'y', 'zscore', 'nll', 'x_var', and 'y_var' in the coordinates to transfer
+    for coord in ['x', 'y', 'zscore', 'nll', 'x_var', 'y_var']:
         src_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}', coord)
         dst_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}' + key_suffix, coord)
         output_df.loc[:, dst_cols] = keypoint_df.loc[:, src_cols]

From 754206f06b093d5827de45e256e8ad218cf24065 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Thu, 31 Oct 2024 10:13:13 -0400
Subject: [PATCH 11/25] ens var dynamic update fix

---
 eks/core.py               | 26 +++++++++++++++++++-------
 eks/singlecam_smoother.py | 28 ++++++++++++++++++----------
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/eks/core.py b/eks/core.py
index 7890eb5..4cb55c4 100644
--- a/eks/core.py
+++ b/eks/core.py
@@ -352,8 +352,13 @@ def kalman_filter_step(carry, curr_y):
     return (m_t, V_t, A, Q, C, R, nll_net), (m_t, V_t, nll_current)
 
 
-def kalman_filter_step_nlls(carry, curr_y):
+def kalman_filter_step_nlls(carry, inputs):
+    # Unpack carry and inputs
     m_prev, V_prev, A, Q, C, R, nll_net, nll_array, t = carry
+    curr_y, curr_ensemble_var = inputs
+
+    # Update R with the current ensemble variance
+    R = jnp.diag(curr_ensemble_var)
 
     # Predict
     m_pred = jnp.dot(A, m_prev)
@@ -365,6 +370,7 @@ def kalman_filter_step_nlls(carry, curr_y):
     K = jnp.dot(V_pred, jnp.dot(C.T, jnp.linalg.inv(innovation_cov)))
     m_t = m_pred + jnp.dot(K, innovation)
     V_t = V_pred - jnp.dot(K, jnp.dot(C, V_pred))
+    #V_t = jnp.dot((jnp.eye(V_pred.shape[0]) - jnp.dot(K, C)), V_pred)
 
     # Compute the negative log-likelihood for the current time step
     nll_current = single_timestep_nll(innovation, innovation_cov)
@@ -385,7 +391,7 @@ def kalman_filter_step_nlls(carry, curr_y):
 # Always run the sequential filter on CPU.
 # GPU will deploy individual kernels for each scan iteration, very slow.
 @partial(jit, backend='cpu')
-def jax_forward_pass(y, m0, cov0, A, Q, C, R):
+def jax_forward_pass(y, m0, cov0, A, Q, C, R, ensemble_vars):
     """
     Kalman Filter for a single keypoint
     (can be vectorized using vmap for handling multiple keypoints in parallel)
@@ -397,6 +403,7 @@ def jax_forward_pass(y, m0, cov0, A, Q, C, R):
         Q: Shape (state_dim, state_dim). Process noise covariance matrix.
         C: Shape (observation_dim, state_dim). Observation coefficient matrix.
         R: Shape (observation_dim, observation_dim). Observation noise covar matrix.
+        ensemble_vars: Shape (num_timepoints, observation_dimension). Time-varying observation noise variances.
 
     Returns:
         mfs: Shape (timepoints, state_dim). Mean filter state at each timepoint.
@@ -405,13 +412,15 @@ def jax_forward_pass(y, m0, cov0, A, Q, C, R):
     """
     # Initialize carry
     carry = (m0, cov0, A, Q, C, R, 0)
-    carry, outputs = jax.lax.scan(kalman_filter_step, carry, y)
+
+    # Run the scan, passing y and ensemble_vars as inputs to kalman_filter_step
+    carry, outputs = jax.lax.scan(kalman_filter_step, carry, (y, ensemble_vars))
     mfs, Vfs, _ = outputs
     nll_net = carry[-1]
     return mfs, Vfs, nll_net
 
 
-def jax_forward_pass_nlls(y, m0, cov0, A, Q, C, R):
+def jax_forward_pass_nlls(y, m0, cov0, A, Q, C, R, ensemble_vars):
     """
     Kalman Filter for a single keypoint
     (can be vectorized using vmap for handling multiple keypoints in parallel)
@@ -435,10 +444,13 @@ def jax_forward_pass_nlls(y, m0, cov0, A, Q, C, R):
     nll_array_init = jnp.zeros(num_timepoints)  # Preallocate an array with zeros
     t_init = 0  # Initialize the time step counter
     carry = (m0, cov0, A, Q, C, R, 0, nll_array_init, t_init)
-    carry, outputs = jax.lax.scan(kalman_filter_step_nlls, carry, y)
+
+    # Run the scan, passing y and ensemble_vars
+    carry, outputs = jax.lax.scan(kalman_filter_step_nlls, carry, (y, ensemble_vars))
     mfs, Vfs, _ = outputs
     nll_net = carry[-3]  # Total NLL
     nll_array = carry[-2]  # Array of incremental NLL values
+
     return mfs, Vfs, nll_net, nll_array
 
 
@@ -451,7 +463,7 @@ def kalman_smoother_step(carry, X):
 
     smoothing_gain = jsc.linalg.solve(ahead_cov, jnp.dot(A, v_curr_filter.T)).T
     smoothed_state = m_curr_filter + jnp.dot(smoothing_gain, m_ahead_smooth - m_curr_filter)
-    smoothed_cov = v_curr_filter + jnp.dot(jnp.dot(smoothing_gain, m_ahead_smooth - ahead_cov),
+    smoothed_cov = v_curr_filter + jnp.dot(jnp.dot(smoothing_gain, v_ahead_smooth - ahead_cov),
                                            smoothing_gain.T)
 
     return (smoothed_state, smoothed_cov, A, Q), (smoothed_state, smoothed_cov)
@@ -728,7 +740,7 @@ def compute_covariance_matrix(ensemble_preds):
     cov_mats = []
     for i in range(n_keypoints):
         E_block = extract_submatrix(E, i)
-        cov_mats.append(E_block)
+        cov_mats.append([[1,0],[0,1]])
     cov_mats = jnp.array(cov_mats)
     return cov_mats
 
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 2886faa..f02deaf 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -105,6 +105,11 @@ def ensemble_kalman_smoother_singlecam(
         dfs.append(df)
         df_dicts.append({bodypart_list[k] + '_df': df})
 
+        # Save each DataFrame to a CSV for debugging
+        output_csv_path = f"./{bodypart_list[k]}_smoothing_output.csv"
+        df.to_csv(output_csv_path, index=True)
+        print(f"Debug CSV saved for {bodypart_list[k]} at {output_csv_path}")
+
     return df_dicts, s_finals
 
 
@@ -209,7 +214,7 @@ def init_kalman(i, adjusted_x_obs, adjusted_y_obs):
 
 def singlecam_optimize_smooth(
         cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
-        s_frames, smooth_param, blocks=[], maxiter=1000, verbose=False, inflation_factor=1.1):
+        s_frames, smooth_param, blocks=[], maxiter=1000, verbose=False, inflation_factor=1):
     """
     Optimize smoothing parameter, and use the result to run the kalman filter-smoother
 
@@ -262,9 +267,9 @@ def nll_loss_parallel_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs):
             print("Using CPU")
 
         @partial(jit)
-        def nll_loss_sequential_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs):
+        def nll_loss_sequential_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs, ensemble_vars):
             s = jnp.exp(s)  # To ensure positivity
-            return singlecam_smooth_min(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs)
+            return singlecam_smooth_min(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs, ensemble_vars)
 
         loss_function = nll_loss_sequential_scan
 
@@ -336,7 +341,7 @@ def step(s, opt_state):
     # Final smooth with optimized s
     ms, Vs, nlls = final_forwards_backwards_pass(
         cov_mats, s_finals,
-        ys, m0s, S0s, Cs, As, Rs)
+        ys, m0s, S0s, Cs, As, Rs, ensemble_vars)
 
     return s_finals, ms, Vs, nlls
 
@@ -346,9 +351,9 @@ def step(s, opt_state):
 ## Note: this code is set up to always run on CPU.
 ######
 
-def inner_smooth_min_routine(y, m0, S0, A, Q, C, R):
+def inner_smooth_min_routine(y, m0, S0, A, Q, C, R, ensemble_vars):
     # Run filtering with the current smooth_param
-    _, _, nll = jax_forward_pass(y, m0, S0, A, Q, C, R)
+    _, _, nll = jax_forward_pass(y, m0, S0, A, Q, C, R, ensemble_vars)
     return nll
 
 
@@ -356,7 +361,7 @@ def inner_smooth_min_routine(y, m0, S0, A, Q, C, R):
 
 
 def singlecam_smooth_min(
-        smooth_param, cov_mats, ys, m0s, S0s, Cs, As, Rs):
+        smooth_param, cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars):
     """
     Smooths once using the given smooth_param. Returns only the nll, which is the parameter to
     be minimized using the scipy.minimize() function.
@@ -426,7 +431,7 @@ def singlecam_smooth_min_parallel(
     return jnp.sum(values)
 
 
-def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs):
+def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs, ensemble_vars):
     """
     Perform final smoothing with the optimized smoothing parameters.
 
@@ -452,11 +457,14 @@ def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs):
     Vs_array = []
     nlls_array = []
     Qs = s[:, None, None] * process_cov
-
+    print(f'ys.shape: {ys.shape}')
+    print(f'ensemble_vars.shape: {ensemble_vars.shape}')
     # Run forward and backward pass for each keypoint
     for k in range(n_keypoints):
-        mf, Vf, nll, nll_array = jax_forward_pass_nlls(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k])
+        mf, Vf, nll, nll_array = jax_forward_pass_nlls(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k], ensemble_vars[:,k,:])
+        print(f'Vf: {Vf}')
         ms, Vs = jax_backward_pass(mf, Vf, As[k], Qs[k])
+        print(f'Vs: {Vs}')
         ms_array.append(np.array(ms))
         Vs_array.append(np.array(Vs))
         nlls_array.append(np.array(nll_array))

From 747e58f60131d7196cb34743a613c9d21ca4943b Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Thu, 31 Oct 2024 14:16:15 +0000
Subject: [PATCH 12/25] merge

---
 eks/core.py               | 24 +++++++++++++++++++-----
 eks/singlecam_smoother.py | 18 +++++++++++++-----
 eks/utils.py              |  6 ------
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/eks/core.py b/eks/core.py
index 91b1e9d..cae5a58 100644
--- a/eks/core.py
+++ b/eks/core.py
@@ -344,7 +344,7 @@ def kalman_filter_step(carry, curr_y):
     innovation_cov = jnp.dot(C, jnp.dot(V_pred, C.T)) + R
     K = jnp.dot(V_pred, jnp.dot(C.T, jnp.linalg.inv(innovation_cov)))
     m_t = m_pred + jnp.dot(K, innovation)
-    V_t = V_pred - jnp.dot(K, jnp.dot(C, V_pred))
+    V_t = jnp.dot((jnp.eye(V_pred.shape[0]) - jnp.dot(K, C)), V_pred)
 
     nll_current = single_timestep_nll(innovation, innovation_cov)
     nll_net = nll_net + nll_current
@@ -352,8 +352,13 @@ def kalman_filter_step(carry, curr_y):
     return (m_t, V_t, A, Q, C, R, nll_net), (m_t, V_t, nll_current)
 
 
-def kalman_filter_step_nlls(carry, curr_y):
+def kalman_filter_step_nlls(carry, inputs):
+        # Unpack carry and inputs
     m_prev, V_prev, A, Q, C, R, nll_net, nll_array, t = carry
+    curr_y, curr_ensemble_var = inputs
+
+    # Update R with the current ensemble variance
+    R = jnp.diag(curr_ensemble_var)
 
     # Predict
     m_pred = jnp.dot(A, m_prev)
@@ -365,6 +370,8 @@ def kalman_filter_step_nlls(carry, curr_y):
     K = jnp.dot(V_pred, jnp.dot(C.T, jnp.linalg.inv(innovation_cov)))
     m_t = m_pred + jnp.dot(K, innovation)
     V_t = V_pred - jnp.dot(K, jnp.dot(C, V_pred))
+    # Alternatively, you could use the stable form:
+    # V_t = jnp.dot((jnp.eye(V_pred.shape[0]) - jnp.dot(K, C)), V_pred)
 
     # Compute the negative log-likelihood for the current time step
     nll_current = single_timestep_nll(innovation, innovation_cov)
@@ -411,7 +418,7 @@ def jax_forward_pass(y, m0, cov0, A, Q, C, R):
     return mfs, Vfs, nll_net
 
 
-def jax_forward_pass_nlls(y, m0, cov0, A, Q, C, R):
+def jax_forward_pass_nlls(y, m0, cov0, A, Q, C, R, ensemble_vars):
     """
     Kalman Filter for a single keypoint
     (can be vectorized using vmap for handling multiple keypoints in parallel)
@@ -430,15 +437,22 @@ def jax_forward_pass_nlls(y, m0, cov0, A, Q, C, R):
         nll_net: Shape (1,). Negative log likelihood observations -log (p(y_1, ..., y_T))
         nll_array: Shape (num_timepoints,). Incremental negative log-likelihood at each timepoint.
     """
+    # Ensure R is a (2, 2) matrix
+    if R.ndim == 1:
+        R = jnp.diag(R)
+    
     # Initialize carry
     num_timepoints = y.shape[0]
     nll_array_init = jnp.zeros(num_timepoints)  # Preallocate an array with zeros
     t_init = 0  # Initialize the time step counter
     carry = (m0, cov0, A, Q, C, R, 0, nll_array_init, t_init)
-    carry, outputs = jax.lax.scan(kalman_filter_step_nlls, carry, y)
+    
+    # Run the scan, passing y and ensemble_vars
+    carry, outputs = jax.lax.scan(kalman_filter_step_nlls, carry, (y, ensemble_vars))
     mfs, Vfs, _ = outputs
     nll_net = carry[-3]  # Total NLL
     nll_array = carry[-2]  # Array of incremental NLL values
+    
     return mfs, Vfs, nll_net, nll_array
 
 
@@ -728,7 +742,7 @@ def compute_covariance_matrix(ensemble_preds):
     cov_mats = []
     for i in range(n_keypoints):
         E_block = extract_submatrix(E, i)
-        cov_mats.append(E_block)
+        cov_mats.append([[1, 0], [0, 1]])
     cov_mats = jnp.array(cov_mats)
     return cov_mats
 
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index a413d53..4136ebd 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -57,7 +57,6 @@ def ensemble_kalman_smoother_singlecam(
     # Initialize Kalman filter values
     m0s, S0s, As, cov_mats, Cs, Rs, ys = initialize_kalman_filter(
         scaled_ensemble_preds, adjusted_obs_dict, n_keypoints)
-
     # Main smoothing function
     s_finals, ms, Vs, nlls = singlecam_optimize_smooth(
         cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
@@ -105,6 +104,11 @@ def ensemble_kalman_smoother_singlecam(
         df = pd.DataFrame(pred_arr, columns=pdindex)
         dfs.append(df)
         df_dicts.append({bodypart_list[k] + '_df': df})
+    
+        # Save each DataFrame to a CSV for debugging
+        output_csv_path = f"./{bodypart_list[k]}_smoothing_output.csv"
+        df.to_csv(output_csv_path, index=True)
+        print(f"Debug CSV saved for {bodypart_list[k]} at {output_csv_path}")
 
     return df_dicts, s_finals
 
@@ -210,7 +214,7 @@ def init_kalman(i, adjusted_x_obs, adjusted_y_obs):
 
 def singlecam_optimize_smooth(
         cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
-        s_frames, smooth_param, blocks=[], maxiter=1000, verbose=False, inflation_factor=1.1):
+        s_frames, smooth_param, blocks=[], maxiter=1000, verbose=False, inflation_factor=1):
     """
     Optimize smoothing parameter, and use the result to run the kalman filter-smoother
 
@@ -242,6 +246,7 @@ def singlecam_optimize_smooth(
         print(f'Correlated keypoint blocks: {blocks}')
 
     # Inflate the initial state covariance and process noise covariance matrices
+    print(f'Multiplying covariance by a scale of {inflation_factor}')
     S0s *= inflation_factor  # Inflating the initial state covariance
     cov_mats *= inflation_factor  # Inflating the process noise covariance matrices
 
@@ -337,7 +342,7 @@ def step(s, opt_state):
     # Final smooth with optimized s
     ms, Vs, nlls = final_forwards_backwards_pass(
         cov_mats, s_finals,
-        ys, m0s, S0s, Cs, As, Rs)
+        ys, m0s, S0s, Cs, As, Rs, ensemble_vars)
 
     return s_finals, ms, Vs, nlls
 
@@ -427,7 +432,7 @@ def singlecam_smooth_min_parallel(
     return jnp.sum(values)
 
 
-def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs):
+def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs, ensemble_vars):
     """
     Perform final smoothing with the optimized smoothing parameters.
 
@@ -453,11 +458,14 @@ def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs):
     Vs_array = []
     nlls_array = []
     Qs = s[:, None, None] * process_cov
+    print(Qs)
 
     # Run forward and backward pass for each keypoint
     for k in range(n_keypoints):
-        mf, Vf, nll, nll_array = jax_forward_pass_nlls(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k])
+        mf, Vf, nll, nll_array = jax_forward_pass_nlls(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k], ensemble_vars)
+        print(f'Vf: {Vf}')
         ms, Vs = jax_backward_pass(mf, Vf, As[k], Qs[k])
+        # print(f'Vs: {Vs}')
         ms_array.append(np.array(ms))
         Vs_array.append(np.array(Vs))
         nlls_array.append(np.array(nll_array))
diff --git a/eks/utils.py b/eks/utils.py
index 73af87f..35f6789 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -177,15 +177,9 @@ def dataframe_to_csv(df, filename):
         print("Error:", e)
 
 
-<<<<<<< HEAD
-def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df,
-                              key_suffix=''):  # key_suffix only required for multi-camera setups
-    for coord in ['x', 'y', 'zscore', 'nll', 'ensemble_std']:
-=======
 def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df, key_suffix=''):
     # Include 'x', 'y', 'zscore', 'nll', 'x_var', and 'y_var' in the coordinates to transfer
     for coord in ['x', 'y', 'zscore', 'nll', 'x_var', 'y_var']:
->>>>>>> 167a655566751a14a0711050886315cecf01623c
         src_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}', coord)
         dst_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}' + key_suffix, coord)
         output_df.loc[:, dst_cols] = keypoint_df.loc[:, src_cols]

From 64d1ad0be22c49b16ff62b7a6b5516e0636acc3a Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Thu, 31 Oct 2024 14:22:48 +0000
Subject: [PATCH 13/25] removed debug prints

---
 eks/singlecam_smoother.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 6f47ab9..192adb4 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -105,11 +105,6 @@ def ensemble_kalman_smoother_singlecam(
         dfs.append(df)
         df_dicts.append({bodypart_list[k] + '_df': df})
 
-        # Save each DataFrame to a CSV for debugging
-        output_csv_path = f"./{bodypart_list[k]}_smoothing_output.csv"
-        df.to_csv(output_csv_path, index=True)
-        print(f"Debug CSV saved for {bodypart_list[k]} at {output_csv_path}")
-
     return df_dicts, s_finals
 
 
@@ -458,14 +453,10 @@ def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs, ense
     Vs_array = []
     nlls_array = []
     Qs = s[:, None, None] * process_cov
-    print(f'ys.shape: {ys.shape}')
-    print(f'ensemble_vars.shape: {ensemble_vars.shape}')
     # Run forward and backward pass for each keypoint
     for k in range(n_keypoints):
         mf, Vf, nll, nll_array = jax_forward_pass_nlls(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k], ensemble_vars[:,k,:])
-        print(f'Vf: {Vf}')
         ms, Vs = jax_backward_pass(mf, Vf, As[k], Qs[k])
-        print(f'Vs: {Vs}')
         ms_array.append(np.array(ms))
         Vs_array.append(np.array(Vs))
         nlls_array.append(np.array(nll_array))

From 62e53b74913a656faf12b20cb4a71e70fdec09eb Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Fri, 1 Nov 2024 13:41:52 -0400
Subject: [PATCH 14/25] fixed zscore indexing

---
 eks/ibl_paw_multiview_smoother.py | 2 +-
 eks/ibl_pupil_smoother.py         | 2 +-
 eks/multicam_smoother.py          | 2 +-
 eks/singlecam_smoother.py         | 9 ---------
 eks/utils.py                      | 6 ------
 5 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/eks/ibl_paw_multiview_smoother.py b/eks/ibl_paw_multiview_smoother.py
index 8831d9c..b29ceff 100644
--- a/eks/ibl_paw_multiview_smoother.py
+++ b/eks/ibl_paw_multiview_smoother.py
@@ -312,7 +312,7 @@ def ensemble_kalman_smoother_ibl_paw(
                                           scaled_y_m_smooth.T[1 + 2 * i]]).T
             ensemble_preds = scaled_y[:, 2 * i:2 * (i + 1)]
             ensemble_vars_curr = ensemble_vars[:, 2 * i:2 * (i + 1)]
-            zscore = eks_zscore(eks_predictions, ensemble_preds, ensemble_vars_curr,
+            zscore, _ = eks_zscore(eks_predictions, ensemble_preds, ensemble_vars_curr,
                                 min_ensemble_std=4)
             pred_arr.append(zscore)
             ###
diff --git a/eks/ibl_pupil_smoother.py b/eks/ibl_pupil_smoother.py
index feed93d..a3818b3 100644
--- a/eks/ibl_pupil_smoother.py
+++ b/eks/ibl_pupil_smoother.py
@@ -214,7 +214,7 @@ def ensemble_kalman_smoother_ibl_pupil(
             np.asarray([processed_arr_dict[key_pair[0]], processed_arr_dict[key_pair[1]]]).T
         ensemble_preds_curr = ensemble_preds[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
         ensemble_vars_curr = ensemble_vars[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
-        zscore = eks_zscore(eks_predictions, ensemble_preds_curr, ensemble_vars_curr,
+        zscore, _ = eks_zscore(eks_predictions, ensemble_preds_curr, ensemble_vars_curr,
                             min_ensemble_std=zscore_threshold)
         pred_arr.append(zscore)
 
diff --git a/eks/multicam_smoother.py b/eks/multicam_smoother.py
index 5f2cdc0..c320246 100644
--- a/eks/multicam_smoother.py
+++ b/eks/multicam_smoother.py
@@ -180,7 +180,7 @@ def ensemble_kalman_smoother_multicam(
             y_m_smooth.T[camera_indices[camera][1]] + means_camera[camera_indices[camera][1]]
         # compute zscore for EKS to see how it deviates from the ensemble
         eks_predictions = np.asarray([eks_pred_x, eks_pred_y]).T
-        zscore = eks_zscore(eks_predictions, cam_ensemble_preds[camera], cam_ensemble_vars[camera],
+        zscore, _ = eks_zscore(eks_predictions, cam_ensemble_preds[camera], cam_ensemble_vars[camera],
                             min_ensemble_std=zscore_threshold)
         pred_arr = np.vstack([
             eks_pred_x,
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 0f56293..aaa2a12 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -106,11 +106,6 @@ def ensemble_kalman_smoother_singlecam(
         dfs.append(df)
         df_dicts.append({bodypart_list[k] + '_df': df})
 
-        # Save each DataFrame to a CSV for debugging
-        output_csv_path = f"./{bodypart_list[k]}_smoothing_output.csv"
-        df.to_csv(output_csv_path, index=True)
-        print(f"Debug CSV saved for {bodypart_list[k]} at {output_csv_path}")
-
     return df_dicts, s_finals
 
 
@@ -458,14 +453,10 @@ def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs, ense
     Vs_array = []
     nlls_array = []
     Qs = s[:, None, None] * process_cov
-    print(f'ys.shape: {ys.shape}')
-    print(f'ensemble_vars.shape: {ensemble_vars.shape}')
     # Run forward and backward pass for each keypoint
     for k in range(n_keypoints):
         mf, Vf, nll, nll_array = jax_forward_pass_nlls(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k], ensemble_vars[:,k,:])
-        print(f'Vf: {Vf}')
         ms, Vs = jax_backward_pass(mf, Vf, As[k], Qs[k])
-        print(f'Vs: {Vs}')
         ms_array.append(np.array(ms))
         Vs_array.append(np.array(Vs))
         nlls_array.append(np.array(nll_array))
diff --git a/eks/utils.py b/eks/utils.py
index 73af87f..5ed3686 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -177,15 +177,9 @@ def dataframe_to_csv(df, filename):
         print("Error:", e)
 
 
-<<<<<<< HEAD
 def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df,
                               key_suffix=''):  # key_suffix only required for multi-camera setups
     for coord in ['x', 'y', 'zscore', 'nll', 'ensemble_std']:
-=======
-def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df, key_suffix=''):
-    # Include 'x', 'y', 'zscore', 'nll', 'x_var', and 'y_var' in the coordinates to transfer
-    for coord in ['x', 'y', 'zscore', 'nll', 'x_var', 'y_var']:
->>>>>>> 167a655566751a14a0711050886315cecf01623c
         src_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}', coord)
         dst_cols = ('ensemble-kalman_tracker', f'{keypoint_ensemble}' + key_suffix, coord)
         output_df.loc[:, dst_cols] = keypoint_df.loc[:, src_cols]

From 3149b1395ad96a567a1b50264cd1d1a102684933 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Sun, 3 Nov 2024 16:37:41 -0500
Subject: [PATCH 15/25] removed debug print for covariance scaling

---
 eks/singlecam_smoother.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 192adb4..27f0a65 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -240,8 +240,6 @@ def singlecam_optimize_smooth(
     if verbose:
         print(f'Correlated keypoint blocks: {blocks}')
 
-    # Inflate the initial state covariance and process noise covariance matrices
-    print(f'Multiplying covariance by a scale of {inflation_factor}')
     S0s *= inflation_factor  # Inflating the initial state covariance
     cov_mats *= inflation_factor  # Inflating the process noise covariance matrices
 

From 6ada39b37b392c3cb05f30261a46e6150e32a2c5 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Sun, 3 Nov 2024 16:53:15 -0500
Subject: [PATCH 16/25] flake8

---
 eks/core.py                       |  9 ++++-----
 eks/ibl_paw_multiview_smoother.py |  2 +-
 eks/ibl_pupil_smoother.py         |  7 +++++--
 eks/multicam_smoother.py          |  5 +++--
 eks/singlecam_smoother.py         | 23 ++++++++++-------------
 eks/utils.py                      |  1 -
 6 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/eks/core.py b/eks/core.py
index 50da6d3..eadd2e7 100644
--- a/eks/core.py
+++ b/eks/core.py
@@ -96,7 +96,7 @@ def ensemble(markers_list, keys, mode='median'):
     ensemble_vars = np.asarray(ensemble_vars).T
     ensemble_stacks = np.asarray(ensemble_stacks).T
     return ensemble_preds, ensemble_vars, ensemble_stacks, \
-           keypoints_avg_dict, keypoints_var_dict, keypoints_stack_dict
+        keypoints_avg_dict, keypoints_var_dict, keypoints_stack_dict
 
 
 def forward_pass(y, m0, S0, C, R, A, Q, ensemble_vars):
@@ -370,7 +370,6 @@ def kalman_filter_step_nlls(carry, inputs):
     K = jnp.dot(V_pred, jnp.dot(C.T, jnp.linalg.inv(innovation_cov)))
     m_t = m_pred + jnp.dot(K, innovation)
     V_t = V_pred - jnp.dot(K, jnp.dot(C, V_pred))
-    #V_t = jnp.dot((jnp.eye(V_pred.shape[0]) - jnp.dot(K, C)), V_pred)
 
     # Compute the negative log-likelihood for the current time step
     nll_current = single_timestep_nll(innovation, innovation_cov)
@@ -403,7 +402,7 @@ def jax_forward_pass(y, m0, cov0, A, Q, C, R, ensemble_vars):
         Q: Shape (state_dim, state_dim). Process noise covariance matrix.
         C: Shape (observation_dim, state_dim). Observation coefficient matrix.
         R: Shape (observation_dim, observation_dim). Observation noise covar matrix.
-        ensemble_vars: Shape (num_timepoints, observation_dimension). Time-varying observation noise variances.
+        ensemble_vars: Shape (num_timepoints, observation_dimension). Time-varying obs noise var.
 
     Returns:
         mfs: Shape (timepoints, state_dim). Mean filter state at each timepoint.
@@ -442,7 +441,7 @@ def jax_forward_pass_nlls(y, m0, cov0, A, Q, C, R, ensemble_vars):
     # Ensure R is a (2, 2) matrix
     if R.ndim == 1:
         R = jnp.diag(R)
-    
+
     # Initialize carry
     num_timepoints = y.shape[0]
     nll_array_init = jnp.zeros(num_timepoints)  # Preallocate an array with zeros
@@ -744,7 +743,7 @@ def compute_covariance_matrix(ensemble_preds):
     cov_mats = []
     for i in range(n_keypoints):
         E_block = extract_submatrix(E, i)
-        cov_mats.append([[1,0],[0,1]])
+        cov_mats.append([[1, 0], [0, 1]])
     cov_mats = jnp.array(cov_mats)
     return cov_mats
 
diff --git a/eks/ibl_paw_multiview_smoother.py b/eks/ibl_paw_multiview_smoother.py
index b29ceff..bc17103 100644
--- a/eks/ibl_paw_multiview_smoother.py
+++ b/eks/ibl_paw_multiview_smoother.py
@@ -313,7 +313,7 @@ def ensemble_kalman_smoother_ibl_paw(
             ensemble_preds = scaled_y[:, 2 * i:2 * (i + 1)]
             ensemble_vars_curr = ensemble_vars[:, 2 * i:2 * (i + 1)]
             zscore, _ = eks_zscore(eks_predictions, ensemble_preds, ensemble_vars_curr,
-                                min_ensemble_std=4)
+                                   min_ensemble_std=4)
             pred_arr.append(zscore)
             ###
         pred_arr = np.asarray(pred_arr)
diff --git a/eks/ibl_pupil_smoother.py b/eks/ibl_pupil_smoother.py
index a3818b3..bf16a21 100644
--- a/eks/ibl_pupil_smoother.py
+++ b/eks/ibl_pupil_smoother.py
@@ -214,8 +214,11 @@ def ensemble_kalman_smoother_ibl_pupil(
             np.asarray([processed_arr_dict[key_pair[0]], processed_arr_dict[key_pair[1]]]).T
         ensemble_preds_curr = ensemble_preds[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
         ensemble_vars_curr = ensemble_vars[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
-        zscore, _ = eks_zscore(eks_predictions, ensemble_preds_curr, ensemble_vars_curr,
-                            min_ensemble_std=zscore_threshold)
+        zscore, _ = eks_zscore(
+            eks_predictions,
+            ensemble_preds_curr,
+            ensemble_vars_curr,
+            min_ensemble_std=zscore_threshold)
         pred_arr.append(zscore)
 
     pred_arr = np.asarray(pred_arr)
diff --git a/eks/multicam_smoother.py b/eks/multicam_smoother.py
index c320246..1ce5bb2 100644
--- a/eks/multicam_smoother.py
+++ b/eks/multicam_smoother.py
@@ -180,8 +180,9 @@ def ensemble_kalman_smoother_multicam(
             y_m_smooth.T[camera_indices[camera][1]] + means_camera[camera_indices[camera][1]]
         # compute zscore for EKS to see how it deviates from the ensemble
         eks_predictions = np.asarray([eks_pred_x, eks_pred_y]).T
-        zscore, _ = eks_zscore(eks_predictions, cam_ensemble_preds[camera], cam_ensemble_vars[camera],
-                            min_ensemble_std=zscore_threshold)
+        zscore, _ = eks_zscore(
+            eks_predictions, cam_ensemble_preds[camera], cam_ensemble_vars[camera],
+            min_ensemble_std=zscore_threshold)
         pred_arr = np.vstack([
             eks_pred_x,
             eks_pred_y,
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 27f0a65..151723b 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -1,4 +1,3 @@
-import time
 from functools import partial
 
 import jax
@@ -79,10 +78,11 @@ def ensemble_kalman_smoother_singlecam(
         eks_preds_array[k] = y_m_smooths[k].copy()
         eks_preds_array[k] = np.asarray([eks_preds_array[k].T[0] + mean_x_obs,
                                          eks_preds_array[k].T[1] + mean_y_obs]).T
-        zscore, ensemble_std = eks_zscore(eks_preds_array[k],
-                            ensemble_preds[:, k, :],
-                            ensemble_vars[:, k, :],
-                            min_ensemble_std=zscore_threshold)
+        zscore, ensemble_std = eks_zscore(
+            eks_preds_array[k],
+            ensemble_preds[:, k, :],
+            ensemble_vars[:, k, :],
+            min_ensemble_std=zscore_threshold)
         nll = nlls[k]
 
         # Final Cleanup
@@ -263,7 +263,8 @@ def nll_loss_parallel_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs):
         @partial(jit)
         def nll_loss_sequential_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs, ensemble_vars):
             s = jnp.exp(s)  # To ensure positivity
-            return singlecam_smooth_min(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs, ensemble_vars)
+            return singlecam_smooth_min(
+                s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs, ensemble_vars)
 
         loss_function = nll_loss_sequential_scan
 
@@ -309,17 +310,13 @@ def step(s, opt_state):
 
             prev_loss = jnp.inf
             for iteration in range(maxiter):
-                start_time = time.time()
                 s_init, opt_state, loss = step(s_init, opt_state)
 
                 if verbose and iteration % 10 == 0 or iteration == maxiter - 1:
-                     print(f'Iteration {iteration}, Current loss: {loss}, Current s: {s_init}')
+                    print(f'Iteration {iteration}, Current loss: {loss}, Current s: {s_init}')
 
                 tol = 0.001 * jnp.abs(jnp.log(prev_loss))
                 if jnp.linalg.norm(loss - prev_loss) < tol + 1e-6:
-                #    print(
-                #        f'Converged at iteration {iteration} with '
-                #        f'smoothing parameter {jnp.exp(s_init)}. NLL={loss}')
                     break
 
                 prev_loss = loss
@@ -453,7 +450,8 @@ def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs, ense
     Qs = s[:, None, None] * process_cov
     # Run forward and backward pass for each keypoint
     for k in range(n_keypoints):
-        mf, Vf, nll, nll_array = jax_forward_pass_nlls(ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k], ensemble_vars[:,k,:])
+        mf, Vf, nll, nll_array = jax_forward_pass_nlls(
+            ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k], ensemble_vars[:, k, :])
         ms, Vs = jax_backward_pass(mf, Vf, As[k], Qs[k])
         ms_array.append(np.array(ms))
         Vs_array.append(np.array(Vs))
@@ -461,6 +459,5 @@ def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs, ense
 
     smoothed_means = np.stack(ms_array, axis=0)
     smoothed_covariances = np.stack(Vs_array, axis=0)
-    nlls_final = np.stack(nlls_array, axis=0)
 
     return smoothed_means, smoothed_covariances, nlls_array
diff --git a/eks/utils.py b/eks/utils.py
index 35f6789..6d981f3 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -151,7 +151,6 @@ def make_output_dataframe(markers_curr):
             # Set likelihood values to 1.0
             markers_eks[col].values[:] = 1.0
         elif col[-1] in ['x_var', 'y_var']:
-            # Set x_var and y_var to NaN to indicate that they need to be filled with variance values
             markers_eks[col].values[:] = np.nan
         else:
             # Set other values to NaN

From aa25a02256e2f88cd1efe3d501eca5aa829fdb74 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Tue, 5 Nov 2024 16:46:02 -0500
Subject: [PATCH 17/25] pytests for core functions WIP

---
 tests/run_tests.py               |  23 ++
 tests/test_core.py               | 566 +++++++++++++++++++++++++++++++
 tests/test_singlecam_smoother.py |   4 -
 3 files changed, 589 insertions(+), 4 deletions(-)
 create mode 100644 tests/run_tests.py
 create mode 100644 tests/test_core.py

diff --git a/tests/run_tests.py b/tests/run_tests.py
new file mode 100644
index 0000000..2c045cc
--- /dev/null
+++ b/tests/run_tests.py
@@ -0,0 +1,23 @@
+import pytest
+import sys
+
+def main():
+    # Get arguments from the command line (excluding the script name)
+    args = sys.argv[1:]
+
+    # Default to running both test files if no arguments are provided
+    if not args:
+        test_files = ["test_core.py", "test_singlecam_smoother.py"]
+    else:
+        # Use provided arguments as the list of test files to run
+        test_files = args
+
+    # Run pytest on the specified test files
+    result = pytest.main(["-v"] + test_files)
+    if result == 0:
+        print("All tests passed successfully!")
+    else:
+        print("Some tests failed.")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/tests/test_core.py b/tests/test_core.py
new file mode 100644
index 0000000..deceae3
--- /dev/null
+++ b/tests/test_core.py
@@ -0,0 +1,566 @@
+import pytest
+import numpy as np
+import jax.numpy as jnp
+import jax
+import pandas as pd
+from eks.core import ensemble, kalman_dot, forward_pass, backward_pass, compute_nll, jax_ensemble
+from collections import defaultdict
+
+
+def test_ensemble():
+    # Simulate marker data with three models, each with two keypoints and 5 samples
+    np.random.seed(0)
+    num_samples = 5
+    num_keypoints = 2
+    markers_list = []
+    keys = ['keypoint_1', 'keypoint_2']
+
+    # Create random data for three different marker DataFrames
+    for i in range(3):
+        data = {
+            'keypoint_1': np.random.rand(num_samples),
+            'keypoint_1likelihood': np.random.rand(num_samples),
+            'keypoint_2': np.random.rand(num_samples),
+            'keypoint_2likelihood': np.random.rand(num_samples)
+        }
+        markers_list.append(pd.DataFrame(data))
+
+    # Run the ensemble function with 'median' mode
+    ensemble_preds, ensemble_vars, ensemble_stacks, keypoints_avg_dict, \
+    keypoints_var_dict, keypoints_stack_dict = ensemble(markers_list, keys, mode='median')
+
+    # Verify shapes of output arrays
+    assert ensemble_preds.shape == (num_samples, num_keypoints), \
+        f"Expected shape {(num_samples, num_keypoints)}, got {ensemble_preds.shape}"
+    assert ensemble_vars.shape == (num_samples, num_keypoints), \
+        f"Expected shape {(num_samples, num_keypoints)}, got {ensemble_vars.shape}"
+    assert ensemble_stacks.shape == (3, num_samples, num_keypoints), \
+        f"Expected shape {(3, num_samples, num_keypoints)}, got {ensemble_stacks.shape}"
+
+    # Verify contents of dictionaries
+    assert set(keypoints_avg_dict.keys()) == set(keys), \
+        f"Expected keys {keys}, got {keypoints_avg_dict.keys()}"
+    assert set(keypoints_var_dict.keys()) == set(keys), \
+        f"Expected keys {keys}, got {keypoints_var_dict.keys()}"
+    assert len(keypoints_stack_dict) == 3, \
+        f"Expected 3 models, got {len(keypoints_stack_dict)}"
+
+    # Check values for a keypoint (manually compute median and variance)
+    for key in keys:
+        stack = np.array([df[key].values for df in markers_list]).T
+        expected_median = np.nanmedian(stack, axis=1)
+        expected_variance = np.nanvar(stack, axis=1)
+
+        assert np.allclose(keypoints_avg_dict[key], expected_median), \
+            f"Expected {expected_median} for {key}, got {keypoints_avg_dict[key]}"
+        assert np.allclose(keypoints_var_dict[key], expected_variance), \
+            f"Expected {expected_variance} for {key}, got {keypoints_var_dict[key]}"
+
+    # Run the ensemble function with 'confidence_weighted_mean' mode
+    ensemble_preds, ensemble_vars, ensemble_stacks, keypoints_avg_dict, \
+    keypoints_var_dict, keypoints_stack_dict = ensemble(markers_list, keys,
+                                                        mode='confidence_weighted_mean')
+
+    # Verify shapes of output arrays again
+    assert ensemble_preds.shape == (num_samples, num_keypoints), \
+        f"Expected shape {(num_samples, num_keypoints)}, got {ensemble_preds.shape}"
+    assert ensemble_vars.shape == (num_samples, num_keypoints), \
+        f"Expected shape {(num_samples, num_keypoints)}, got {ensemble_vars.shape}"
+    assert ensemble_stacks.shape == (3, num_samples, num_keypoints), \
+        f"Expected shape {(3, num_samples, num_keypoints)}, got {ensemble_stacks.shape}"
+
+    # Verify likelihood-based weighted averaging calculations
+    for key in keys:
+        stack = np.array([df[key].values for df in markers_list]).T
+        likelihood_stack = np.array([df[key + 'likelihood'].values for df in markers_list]).T
+        conf_per_keypoint = np.sum(likelihood_stack, axis=1)
+        weighted_mean = np.sum(stack * likelihood_stack, axis=1) / conf_per_keypoint
+        expected_variance = np.nanvar(stack, axis=1) / (
+                np.sum(likelihood_stack, axis=1) / likelihood_stack.shape[1])
+
+        assert np.allclose(keypoints_avg_dict[key], weighted_mean), \
+            f"Expected {weighted_mean} for {key}, got {keypoints_avg_dict[key]}"
+        assert np.allclose(keypoints_var_dict[key], expected_variance), \
+            f"Expected {expected_variance} for {key}, got {keypoints_var_dict[key]}"
+
+
+def test_kalman_dot_basic():
+    # Basic test with random matrices
+    n_keypoints = 5
+    n_latents = 3
+
+    innovation = np.random.randn(n_keypoints)
+    V = np.eye(n_latents)
+    C = np.random.randn(n_keypoints, n_latents)
+    R = np.eye(n_keypoints)
+
+    # Run kalman_dot
+    Ks, innovation_cov = kalman_dot(innovation, V, C, R)
+
+    # Check output shapes
+    assert Ks.shape == (n_latents,), f"Expected shape {(n_latents,)}, got {Ks.shape}"
+    assert innovation_cov.shape == (
+    n_keypoints,), f"Expected shape {(n_keypoints,)}, got {innovation_cov.shape}"
+
+
+def test_kalman_dot_zero_matrices():
+    # Test with zero matrices for stability
+    n_keypoints = 4
+    n_latents = 2
+
+    innovation = np.zeros(n_keypoints)
+    V = np.zeros((n_latents, n_latents))
+    C = np.zeros((n_keypoints, n_latents))
+    R = np.zeros((n_keypoints, n_keypoints))
+
+    # Run kalman_dot
+    Ks, innovation_cov = kalman_dot(innovation, V, C, R)
+
+    # Check if outputs are zero as expected
+    assert np.allclose(Ks, 0), "Expected Ks to be zero with zero inputs"
+    assert np.allclose(innovation_cov, 0), "Expected innovation_cov to be zero with zero inputs"
+
+
+def test_kalman_dot_singular_innovation_cov():
+    # Test for singular innovation_cov by making R and C*V*C.T equal
+    n_keypoints = 3
+    n_latents = 2
+
+    innovation = np.random.randn(n_keypoints)
+    V = np.eye(n_latents)
+    C = np.ones((n_keypoints, n_latents))  # Constant values lead to rank-deficient product
+    R = -np.dot(C, np.dot(V, C.T))  # Makes innovation_cov close to zero matrix
+
+    # Run kalman_dot and check stability
+    try:
+        Ks, innovation_cov = kalman_dot(innovation, V, C, R)
+        assert np.allclose(innovation_cov,
+                           0), "Expected nearly zero innovation_cov with constructed singularity"
+    except np.linalg.LinAlgError:
+        pytest.fail("kalman_dot raised LinAlgError with nearly singular innovation_cov")
+
+
+def test_kalman_dot_random_values():
+    # Randomized test to ensure function works with arbitrary valid values
+    n_keypoints = 5
+    n_latents = 4
+
+    innovation = np.random.randn(n_keypoints)
+    V = np.random.randn(n_latents, n_latents)
+    V = np.dot(V, V.T)  # Make V symmetric positive semi-definite
+    C = np.random.randn(n_keypoints, n_latents)
+    R = np.random.randn(n_keypoints, n_keypoints)
+    R = np.dot(R, R.T)  # Make R symmetric positive semi-definite
+
+    # Run kalman_dot
+    Ks, innovation_cov = kalman_dot(innovation, V, C, R)
+
+    # Check if innovation_cov is positive definite (eigenvalues should be positive or close to zero)
+    eigvals = np.linalg.eigvalsh(innovation_cov)
+    assert np.all(eigvals >= -1e-8), "Expected innovation_cov to be positive semi-definite"
+    assert Ks.shape == (n_latents,), f"Expected shape {(n_latents,)}, got {Ks.shape}"
+    assert innovation_cov.shape == (
+    n_keypoints,), f"Expected shape {(n_keypoints,)}, got {innovation_cov.shape}"
+
+
+import pytest
+import numpy as np
+
+
+def test_forward_pass_basic():
+    # Set up basic test data
+    T = 10
+    n_keypoints = 5
+    n_latents = 3
+
+    y = np.random.randn(T, n_keypoints)
+    m0 = np.random.randn(n_latents)
+    S0 = np.eye(n_latents)
+    C = np.random.randn(n_keypoints, n_latents)
+    R = np.eye(n_keypoints)
+    A = np.eye(n_latents)
+    Q = np.eye(n_latents)
+    ensemble_vars = np.abs(np.random.randn(T, n_keypoints))  # Variance should be non-negative
+
+    # Run forward_pass
+    mf, Vf, S, innovations, innovation_cov = forward_pass(y, m0, S0, C, R, A, Q, ensemble_vars)
+
+    # Check output shapes
+    assert mf.shape == (T, n_latents), f"Expected shape {(T, n_latents)}, got {mf.shape}"
+    assert Vf.shape == (
+    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vf.shape}"
+    assert S.shape == (
+    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {S.shape}"
+    assert innovations.shape == (
+    T, n_keypoints), f"Expected shape {(T, n_keypoints)}, got {innovations.shape}"
+    assert innovation_cov.shape == (T, n_keypoints,
+                                    n_keypoints), f"Expected shape {(T, n_keypoints, n_keypoints)}, got {innovation_cov.shape}"
+
+
+def test_forward_pass_with_nan_values():
+    # Test with some NaN values in y
+    T = 10
+    n_keypoints = 5
+    n_latents = 3
+
+    y = np.random.randn(T, n_keypoints)
+    y[2, 1] = np.nan  # Insert NaN value
+    m0 = np.random.randn(n_latents)
+    S0 = np.eye(n_latents)
+    C = np.random.randn(n_keypoints, n_latents)
+    R = np.eye(n_keypoints)
+    A = np.eye(n_latents)
+    Q = np.eye(n_latents)
+    ensemble_vars = np.abs(np.random.randn(T, n_keypoints))
+
+    # Run forward_pass
+    mf, Vf, S, innovations, innovation_cov = forward_pass(y, m0, S0, C, R, A, Q, ensemble_vars)
+
+    # Check if outputs are still valid despite NaN in inputs
+    assert np.isfinite(mf).all(), "Non-finite values found in mf"
+    assert np.isfinite(Vf).all(), "Non-finite values found in Vf"
+    assert np.isfinite(S).all(), "Non-finite values found in S"
+    assert np.isnan(innovations).sum() > 0, "Expected some NaNs in innovations due to NaNs in y"
+
+
+def test_forward_pass_single_sample():
+    # Test with a single sample (edge case)
+    T = 1
+    n_keypoints = 5
+    n_latents = 3
+
+    y = np.random.randn(T, n_keypoints)
+    m0 = np.random.randn(n_latents)
+    S0 = np.eye(n_latents)
+    C = np.random.randn(n_keypoints, n_latents)
+    R = np.eye(n_keypoints)
+    A = np.eye(n_latents)
+    Q = np.eye(n_latents)
+    ensemble_vars = np.abs(np.random.randn(T, n_keypoints))
+
+    # Run forward_pass
+    mf, Vf, S, innovations, innovation_cov = forward_pass(y, m0, S0, C, R, A, Q, ensemble_vars)
+
+    # Check output shapes with a single sample
+    assert mf.shape == (T, n_latents), f"Expected shape {(T, n_latents)}, got {mf.shape}"
+    assert Vf.shape == (
+    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vf.shape}"
+    assert S.shape == (
+    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {S.shape}"
+    assert innovations.shape == (
+    T, n_keypoints), f"Expected shape {(T, n_keypoints)}, got {innovations.shape}"
+    assert innovation_cov.shape == (T, n_keypoints, n_keypoints), \
+        f"Expected shape {(T, n_keypoints, n_keypoints)}, got {innovation_cov.shape}"
+
+
+def test_forward_pass_zero_ensemble_vars():
+    # Test with zero ensemble_vars to check stability
+    T = 10
+    n_keypoints = 5
+    n_latents = 3
+
+    y = np.random.randn(T, n_keypoints)
+    m0 = np.random.randn(n_latents)
+    S0 = np.eye(n_latents)
+    C = np.random.randn(n_keypoints, n_latents)
+    R = np.eye(n_keypoints)
+    A = np.eye(n_latents)
+    Q = np.eye(n_latents)
+    ensemble_vars = np.zeros((T, n_keypoints))  # Ensemble vars set to zero
+
+    # Run forward_pass
+    mf, Vf, S, innovations, innovation_cov = forward_pass(y, m0, S0, C, R, A, Q, ensemble_vars)
+
+    # Check if outputs are finite and correctly shaped
+    assert np.isfinite(mf).all(), "Non-finite values found in mf with zero ensemble_vars"
+    assert np.isfinite(Vf).all(), "Non-finite values found in Vf with zero ensemble_vars"
+    assert np.isfinite(S).all(), "Non-finite values found in S with zero ensemble_vars"
+    assert np.isfinite(
+        innovations).all(), "Non-finite values found in innovations with zero ensemble_vars"
+    assert np.isfinite(
+        innovation_cov).all(), "Non-finite values found in innovation_cov with zero ensemble_vars"
+
+
+def test_backward_pass_basic():
+    # Set up basic test data
+    T = 10
+    n_keypoints = 5
+    n_latents = 3
+
+    y = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_keypoints)
+    Vf = np.random.randn(T, n_latents, n_latents)
+    Vf = np.array([np.dot(v, v.T) for v in Vf])  # Make Vf positive semi-definite
+    S = np.copy(Vf)  # Use S as the same structure as Vf
+    A = np.eye(n_latents)
+
+    # Run backward_pass
+    ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
+
+    # Check output shapes
+    assert ms.shape == (T, n_keypoints), f"Expected shape {(T, n_keypoints)}, got {ms.shape}"
+    assert Vs.shape == (
+    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
+    assert CV.shape == (
+    T - 1, n_latents, n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+
+
+def test_backward_pass_with_nan_values():
+    # Test with some NaN values in y
+    T = 10
+    n_keypoints = 5
+    n_latents = 3
+
+    y = np.random.randn(T, n_keypoints)
+    y[2, 1] = np.nan  # Insert NaN value
+    mf = np.random.randn(T, n_keypoints)
+    Vf = np.random.randn(T, n_latents, n_latents)
+    Vf = np.array([np.dot(v, v.T) for v in Vf])  # Make Vf positive semi-definite
+    S = np.copy(Vf)
+    A = np.eye(n_latents)
+
+    # Run backward_pass
+    ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
+
+    # Check if outputs are still valid despite NaN in inputs
+    assert np.isfinite(ms).all(), "Non-finite values found in ms"
+    assert np.isfinite(Vs).all(), "Non-finite values found in Vs"
+    assert np.isfinite(CV).all(), "Non-finite values found in CV"
+
+
+def test_backward_pass_single_timestep():
+    # Test with only one timestep (edge case)
+    T = 1
+    n_keypoints = 5
+    n_latents = 3
+
+    y = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_keypoints)
+    Vf = np.eye(n_latents)[None, :, :]  # Shape (1, n_latents, n_latents)
+    S = np.copy(Vf)
+    A = np.eye(n_latents)
+
+    # Run backward_pass
+    ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
+
+    # Check output shapes with a single timestep
+    assert ms.shape == (T, n_keypoints), f"Expected shape {(T, n_keypoints)}, got {ms.shape}"
+    assert Vs.shape == (
+    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
+    assert CV.shape == (
+    0, n_latents, n_latents), f"Expected shape {(0, n_latents, n_latents)}, got {CV.shape}"
+
+
+def test_backward_pass_singular_S_matrix():
+    # Test with singular S matrix
+    T = 10
+    n_keypoints = 5
+    n_latents = 3
+
+    y = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_keypoints)
+    Vf = np.random.randn(T, n_latents, n_latents)
+    Vf = np.array([np.dot(v, v.T) for v in Vf])  # Make Vf positive semi-definite
+    S = np.zeros((T, n_latents, n_latents))  # Singular S matrix (all zeros)
+    A = np.eye(n_latents)
+
+    # Run backward_pass and check stability
+    try:
+        ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
+    except np.linalg.LinAlgError:
+        pytest.fail("backward_pass raised LinAlgError with singular S matrix")
+
+
+def test_backward_pass_random_values():
+    # Randomized test to ensure function works with arbitrary valid values
+    T = 10
+    n_keypoints = 6
+    n_latents = 4
+
+    y = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_keypoints)
+    Vf = np.random.randn(T, n_latents, n_latents)
+    Vf = np.array([np.dot(v, v.T) for v in Vf])  # Make Vf positive semi-definite
+    S = np.copy(Vf)
+    A = np.eye(n_latents)
+
+    # Run backward_pass
+    ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
+
+    # Verify shapes and finite values
+    assert ms.shape == (T, n_keypoints), f"Expected shape {(T, n_keypoints)}, got {ms.shape}"
+    assert Vs.shape == (
+    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
+    assert CV.shape == (
+    T - 1, n_latents, n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+    assert np.isfinite(ms).all(), "Non-finite values found in ms"
+    assert np.isfinite(Vs).all(), "Non-finite values found in Vs"
+    assert np.isfinite(CV).all(), "Non-finite values found in CV"
+
+
+def test_compute_nll_basic():
+    # Set up basic test data
+    T = 10
+    n_coords = 3
+
+    innovations = np.random.randn(T, n_coords)
+    innovation_covs = np.array([np.eye(n_coords) for _ in range(T)])  # Identity matrices
+
+    # Run compute_nll
+    nll, nll_values = compute_nll(innovations, innovation_covs)
+
+    # Check output types
+    assert isinstance(nll, float), f"Expected nll to be float, got {type(nll)}"
+    assert isinstance(nll_values, list), f"Expected nll_values to be list, got {type(nll_values)}"
+
+    # Check nll_values length
+    assert len(nll_values) == T, f"Expected length {T}, got {len(nll_values)}"
+
+    # Check that all values in nll_values are positive
+    assert all(v >= 0 for v in nll_values), "Expected all nll_values to be non-negative"
+
+
+def test_compute_nll_with_nan_innovations():
+    # Test with some NaN values in innovations
+    T = 10
+    n_coords = 3
+
+    innovations = np.random.randn(T, n_coords)
+    innovations[2, 1] = np.nan  # Insert NaN value
+    innovation_covs = np.array([np.eye(n_coords) for _ in range(T)])
+
+    # Run compute_nll
+    nll, nll_values = compute_nll(innovations, innovation_covs)
+
+    # Check nll_values length
+    assert len(nll_values) == T - 1, f"Expected length {T - 1}, got {len(nll_values)}"
+    # Check that nll is finite
+    assert np.isfinite(nll), "Expected finite nll despite NaN in innovations"
+
+
+def test_compute_nll_zero_innovation_covs():
+    # Test with zero matrices for innovation_covs
+    T = 5
+    n_coords = 2
+
+    innovations = np.random.randn(T, n_coords)
+    innovation_covs = np.zeros((T, n_coords, n_coords))  # Zero matrices for innovation_covs
+
+    # Run compute_nll
+    nll, nll_values = compute_nll(innovations, innovation_covs, epsilon=1e-6)
+
+    # Check nll is finite and values are positive due to epsilon regularization
+    assert np.isfinite(nll), "Expected finite nll with zero innovation_covs"
+    assert all(v >= 0 for v in nll_values), "Expected all nll_values to be non-negative"
+
+
+def test_compute_nll_small_epsilon():
+    # Test with a small epsilon to ensure stability with near-singular innovation_covs
+    T = 10
+    n_coords = 3
+
+    innovations = np.random.randn(T, n_coords)
+    # Make innovation_covs near-singular by making all elements small
+    innovation_covs = np.full((T, n_coords, n_coords), 1e-8)
+
+    # Run compute_nll with a very small epsilon
+    nll, nll_values = compute_nll(innovations, innovation_covs, epsilon=1e-10)
+
+    # Check that nll is finite
+    assert np.isfinite(nll), "Expected finite nll with small epsilon"
+    assert len(nll_values) == T, f"Expected length {T}, got {len(nll_values)}"
+    # Ensure all values in nll_values are positive
+    assert all(v >= 0 for v in nll_values), "Expected all nll_values to be non-negative"
+
+
+def test_compute_nll_random_values():
+    # Randomized test with arbitrary valid values
+    T = 8
+    n_coords = 4
+
+    innovations = np.random.randn(T, n_coords)
+    innovation_covs = np.random.randn(T, n_coords, n_coords)
+    # Make innovation_covs positive semi-definite
+    innovation_covs = np.array([np.dot(c, c.T) for c in innovation_covs])
+
+    # Run compute_nll
+    nll, nll_values = compute_nll(innovations, innovation_covs)
+
+    # Check nll and nll_values length
+    assert isinstance(nll, float), f"Expected nll to be float, got {type(nll)}"
+    assert len(nll_values) == T, f"Expected length {T}, got {len(nll_values)}"
+    # Ensure finite values
+    assert np.isfinite(nll), "Expected finite nll"
+    assert all(
+        np.isfinite(nll_val) for nll_val in nll_values), "Expected all nll_values to be finite"
+
+
+def test_jax_ensemble_basic():
+    # Basic test data
+    n_models = 4
+    n_timepoints = 5
+    n_keypoints = 3
+    markers_3d_array = np.random.rand(n_models, n_timepoints, n_keypoints * 3)
+
+    # Run jax_ensemble in median mode
+    ensemble_preds, ensemble_vars, keypoints_avg_dict = jax_ensemble(markers_3d_array, mode='median')
+
+    # Check output shapes
+    assert ensemble_preds.shape == (n_timepoints, n_keypoints, 2), \
+        f"Expected shape {(n_timepoints, n_keypoints, 2)}, got {ensemble_preds.shape}"
+    assert ensemble_vars.shape == (n_timepoints, n_keypoints, 2), \
+        f"Expected shape {(n_timepoints, n_keypoints, 2)}, got {ensemble_vars.shape}"
+    assert len(keypoints_avg_dict) == n_keypoints * 2, \
+        f"Expected {n_keypoints * 2} entries in keypoints_avg_dict, got {len(keypoints_avg_dict)}"
+
+def test_jax_ensemble_median_mode():
+    # Test median mode
+    n_models = 4
+    n_timepoints = 5
+    n_keypoints = 3
+    markers_3d_array = np.random.rand(n_models, n_timepoints, n_keypoints * 3)
+
+    # Run jax_ensemble
+    ensemble_preds, ensemble_vars, _ = jax_ensemble(markers_3d_array, mode='median')
+
+    # Check that ensemble_preds and ensemble_vars are finite
+    assert jnp.isfinite(ensemble_preds).all(), "Expected finite values in ensemble_preds"
+    assert jnp.isfinite(ensemble_vars).all(), "Expected finite values in ensemble_vars"
+
+def test_jax_ensemble_mean_mode():
+    # Test mean mode
+    n_models = 4
+    n_timepoints = 5
+    n_keypoints = 3
+    markers_3d_array = np.random.rand(n_models, n_timepoints, n_keypoints * 3)
+
+    # Run jax_ensemble in mean mode
+    ensemble_preds, ensemble_vars, _ = jax_ensemble(markers_3d_array, mode='mean')
+
+    # Check that ensemble_preds and ensemble_vars are finite
+    assert jnp.isfinite(ensemble_preds).all(), "Expected finite values in ensemble_preds"
+    assert jnp.isfinite(ensemble_vars).all(), "Expected finite values in ensemble_vars"
+
+def test_jax_ensemble_confidence_weighted_mean_mode():
+    # Test confidence-weighted mean mode
+    n_models = 4
+    n_timepoints = 5
+    n_keypoints = 3
+    markers_3d_array = np.random.rand(n_models, n_timepoints, n_keypoints * 3)
+
+    # Run jax_ensemble in confidence_weighted_mean mode
+    ensemble_preds, ensemble_vars, _ = jax_ensemble(markers_3d_array, mode='confidence_weighted_mean')
+
+    # Check that ensemble_preds and ensemble_vars are finite
+    assert jnp.isfinite(ensemble_preds).all(), "Expected finite values in ensemble_preds"
+    assert jnp.isfinite(ensemble_vars).all(), "Expected finite values in ensemble_vars"
+
+def test_jax_ensemble_unsupported_mode():
+    # Test that unsupported mode raises ValueError
+    n_models = 4
+    n_timepoints = 5
+    n_keypoints = 3
+    markers_3d_array = np.random.rand(n_models, n_timepoints, n_keypoints * 3)
+
+    with pytest.raises(ValueError, match="averaging not supported"):
+        jax_ensemble(markers_3d_array, mode='unsupported')
\ No newline at end of file
diff --git a/tests/test_singlecam_smoother.py b/tests/test_singlecam_smoother.py
index 9753858..2ffd263 100644
--- a/tests/test_singlecam_smoother.py
+++ b/tests/test_singlecam_smoother.py
@@ -57,7 +57,3 @@ def test_ensemble_kalman_smoother_singlecam():
                 1), "Expected 'y_var' in DataFrame columns"
             assert 'zscore' in df.columns.get_level_values(
                 1), "Expected 'zscore' in DataFrame columns"
-
-
-if __name__ == "__main__":
-    pytest.main()

From 24edd5e0825bde87ac78bb561882107939179aa2 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Fri, 15 Nov 2024 12:04:37 -0500
Subject: [PATCH 18/25] pytests and refactoring for cleaner file i/o

---
 eks/command_line_args.py         |   6 +-
 eks/ibl_pupil_smoother.py        |  37 ++++-
 eks/multicam_smoother.py         |   6 +-
 eks/singlecam_smoother.py        |  73 ++++++++-
 eks/utils.py                     |  63 +++++---
 scripts/ibl_pupil_example.py     |  84 +++++-----
 scripts/multicam_example.py      |   3 +-
 scripts/singlecam_example.py     |  93 +++++------
 tests/test_core.py               | 157 ++++++++++++------
 tests/test_ibl_pupil_smoother.py | 262 +++++++++++++++++++++++++++++++
 tests/test_singlecam_smoother.py | 127 +++++++++++++--
 11 files changed, 732 insertions(+), 179 deletions(-)
 create mode 100644 tests/test_ibl_pupil_smoother.py

diff --git a/eks/command_line_args.py b/eks/command_line_args.py
index 090b9df..8196e58 100644
--- a/eks/command_line_args.py
+++ b/eks/command_line_args.py
@@ -27,10 +27,14 @@ def handle_parse_args(script_type):
     parser = argparse.ArgumentParser()
     parser.add_argument(
         '--input-dir',
-        required=True,
         help='directory of model prediction csv files',
         type=str,
     )
+    parser.add_argument(
+        '--input-files',
+        help='list model prediction csv files in various directories',
+        nargs='+'
+    )
     parser.add_argument(
         '--save-dir',
         help='save directory for outputs (default is input-dir)',
diff --git a/eks/ibl_pupil_smoother.py b/eks/ibl_pupil_smoother.py
index bf16a21..9258d25 100644
--- a/eks/ibl_pupil_smoother.py
+++ b/eks/ibl_pupil_smoother.py
@@ -5,7 +5,7 @@
 from scipy.optimize import minimize
 
 from eks.core import backward_pass, compute_nll, eks_zscore, ensemble, forward_pass
-from eks.utils import crop_frames, make_dlc_pandas_index
+from eks.utils import crop_frames, make_dlc_pandas_index, format_data
 
 
 # -----------------------
@@ -79,6 +79,41 @@ def add_mean_to_array(pred_arr, keys, mean_x, mean_y):
     return processed_arr_dict
 
 
+def fit_eks_pupil(input_source, data_type, save_dir, smooth_params, s_frames):
+    """
+    Wrapper function to fit the Ensemble Kalman Smoother for the ibl-pupil dataset.
+
+    Args:
+        input_source (str or list): Directory path or list of input CSV files.
+        data_type (str): Type of data (e.g., 'csv', 'slp').
+        save_dir (str): Directory to save outputs.
+        smooth_params (list): List containing diameter_s and com_s.
+        s_frames (list or None): Frames for automatic optimization if needed.
+
+    Returns:
+        df_dicts (dict): Dictionary containing smoothed DataFrames.
+        smooth_params (list): Final smoothing parameters used.
+        input_dfs_list (list): List of input DataFrames.
+        keypoint_names (list): List of keypoint names.
+        nll_values (list): List of NLL values.
+    """
+    # Load and format input files
+    input_dfs_list, output_df, keypoint_names = format_data(input_source, data_type)
+
+    print(f"Input data loaded for keypoints: {keypoint_names}")
+
+    # Run the ensemble Kalman smoother
+    df_dicts, smooth_params, nll_values = ensemble_kalman_smoother_ibl_pupil(
+        markers_list=input_dfs_list,
+        keypoint_names=keypoint_names,
+        tracker_name='ensemble-kalman_tracker',
+        smooth_params=smooth_params,
+        s_frames=s_frames
+    )
+
+    return df_dicts, smooth_params, input_dfs_list, keypoint_names, nll_values
+
+
 def ensemble_kalman_smoother_ibl_pupil(
     markers_list,
     keypoint_names,
diff --git a/eks/multicam_smoother.py b/eks/multicam_smoother.py
index 1ce5bb2..1c5000e 100644
--- a/eks/multicam_smoother.py
+++ b/eks/multicam_smoother.py
@@ -166,7 +166,7 @@ def ensemble_kalman_smoother_multicam(
     # final cleanup
     # --------------------------------------
     pdindex = make_dlc_pandas_index([keypoint_ensemble],
-                                    labels=["x", "y", "likelihood", "x_var", "y_var", "zscore"])
+                                    labels=["x", "y", "likelihood", "x_var", "y_var", "zscore", "nll", "ensemble_std"])
     camera_indices = []
     for camera in range(num_cameras):
         camera_indices.append([camera * 2, camera * 2 + 1])
@@ -180,7 +180,7 @@ def ensemble_kalman_smoother_multicam(
             y_m_smooth.T[camera_indices[camera][1]] + means_camera[camera_indices[camera][1]]
         # compute zscore for EKS to see how it deviates from the ensemble
         eks_predictions = np.asarray([eks_pred_x, eks_pred_y]).T
-        zscore, _ = eks_zscore(
+        zscore, ensemble_std = eks_zscore(
             eks_predictions, cam_ensemble_preds[camera], cam_ensemble_vars[camera],
             min_ensemble_std=zscore_threshold)
         pred_arr = np.vstack([
@@ -190,6 +190,8 @@ def ensemble_kalman_smoother_multicam(
             y_v_smooth[:, camera_indices[camera][0], camera_indices[camera][0]],
             y_v_smooth[:, camera_indices[camera][1], camera_indices[camera][1]],
             zscore,
+            nll_values,
+            ensemble_std
         ]).T
         camera_dfs[camera_name + '_df'] = pd.DataFrame(pred_arr, columns=pdindex)
     return camera_dfs, smooth_param_final, nll_values
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 151723b..075da91 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -1,5 +1,5 @@
 from functools import partial
-
+import os
 import jax
 import jax.numpy as jnp
 import numpy as np
@@ -17,7 +17,76 @@
     jax_forward_pass_nlls,
     pkf_and_loss,
 )
-from eks.utils import crop_frames, make_dlc_pandas_index
+from eks.utils import crop_frames, make_dlc_pandas_index, format_data, populate_output_dataframe
+
+
+def fit_eks_singlecam(input_source, data_type, save_dir, save_filename, bodypart_list, s, s_frames,
+                      blocks, verbose):
+    """
+    Function to fit the Ensemble Kalman Smoother for single-camera data.
+
+    Args:
+        input_source (str or list): Directory path or list of CSV file paths.
+        data_type (str): Type of data (e.g., 'csv', 'slp').
+        save_dir (str): Directory to save outputs.
+        save_filename (str): Name of the output file.
+        bodypart_list (list): List of body parts to analyze.
+        s (float or None): Smoothing factor.
+        s_frames (list or None): Frames for automatic optimization if s is not provided.
+        blocks (int): Number of blocks for processing.
+        verbose (bool): If True, enables verbose output.
+
+    Returns:
+        output_df (DataFrame): DataFrame containing the smoothed results.
+        s_finals (list): List of optimized smoothing factors for each keypoint.
+        input_dfs (list): List of input DataFrames for plotting.
+        bodypart_list (list): List of body parts used.
+    """
+    # Load and format input files using the unified format_data function
+    input_dfs, output_df, keypoint_names = format_data(input_source, data_type)
+
+    if bodypart_list is None:
+        bodypart_list = keypoint_names
+    print(f'Input data has been read in for the following keypoints:\n{bodypart_list}')
+
+    # Convert list of DataFrames to a 3D NumPy array
+    data_arrays = [df.to_numpy() for df in input_dfs]
+    markers_3d_array = np.stack(data_arrays, axis=0)
+
+    # Map keypoint names to indices and crop markers_3d_array
+    keypoint_is = {}
+    keys = []
+    for i, col in enumerate(input_dfs[0].columns):
+        keypoint_is[col] = i
+    for part in bodypart_list:
+        keys.append(keypoint_is[part + '_x'])
+        keys.append(keypoint_is[part + '_y'])
+        keys.append(keypoint_is[part + '_likelihood'])
+    key_cols = np.array(keys)
+    markers_3d_array = markers_3d_array[:, :, key_cols]
+
+    # Call the smoother function
+    df_dicts, s_finals = ensemble_kalman_smoother_singlecam(
+        markers_3d_array,
+        bodypart_list,
+        s,
+        s_frames,
+        blocks,
+        verbose=verbose
+    )
+
+    # Save eks results in new DataFrames and .csv output files
+    keypoint_i = -1  # keypoint to be plotted
+    for k in range(len(bodypart_list)):
+        df = df_dicts[k][bodypart_list[k] + '_df']
+        output_df = populate_output_dataframe(df, bodypart_list[k], output_df)
+
+    # Save the output DataFrame to CSV
+    save_filename = save_filename or f'singlecam_{s_finals[keypoint_i]}.csv'
+    output_df.to_csv(os.path.join(save_dir, save_filename))
+    print("DataFrames successfully converted to CSV")
+
+    return output_df, s_finals, input_dfs, bodypart_list
 
 
 def ensemble_kalman_smoother_singlecam(
diff --git a/eks/utils.py b/eks/utils.py
index 6d981f3..75137ae 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -81,40 +81,63 @@ def convert_slp_dlc(base_dir, slp_file):
     return df
 
 
-def format_data(input_dir, data_type):
-    input_files = os.listdir(input_dir)
+def format_data(input_source, data_type):
+    """
+    Load and format input files from a directory or a list of file paths.
+
+    Args:
+        input_source (str or list): Directory path or list of file paths.
+        data_type (str): Type of data (e.g., 'csv', 'slp').
+
+    Returns:
+        input_dfs_list (list): List of formatted DataFrames.
+        output_df (DataFrame): Empty DataFrame for storing results.
+        keypoint_names (list): List of keypoint names.
+    """
     input_dfs_list = []
-    # Extracting markers from data
-    # Applies correct format conversion and stores each file's markers in a list
-    for input_file in input_files:
+    keypoint_names = None
+
+    # Determine if input_source is a directory or a list of file paths
+    if isinstance(input_source, str) and os.path.isdir(input_source):
+        # If it's a directory, list all files in the directory
+        input_files = os.listdir(input_source)
+        file_paths = [os.path.join(input_source, file) for file in input_files]
+    elif isinstance(input_source, list):
+        # If it's a list of file paths, use it directly
+        file_paths = input_source
+    else:
+        raise ValueError("input_source must be a directory path or a list of file paths")
 
-        if data_type == 'slp':
-            if not input_file.endswith('.slp'):
-                continue
-            markers_curr = convert_slp_dlc(input_dir, input_file)
+    # Process each file based on the data type
+    for file_path in file_paths:
+        if data_type == 'slp' and file_path.endswith('.slp'):
+            markers_curr = convert_slp_dlc(os.path.dirname(file_path), os.path.basename(file_path))
             keypoint_names = [c[1] for c in markers_curr.columns[::3]]
             markers_curr_fmt = markers_curr
-        elif data_type == 'lp' or 'dlc':
-            if not input_file.endswith('csv'):
-                continue
-            markers_curr = pd.read_csv(
-                os.path.join(input_dir, input_file), header=[0, 1, 2], index_col=0)
+
+        elif data_type in ['lp', 'dlc'] and file_path.endswith('.csv'):
+            markers_curr = pd.read_csv(file_path, header=[0, 1, 2], index_col=0)
             keypoint_names = [c[1] for c in markers_curr.columns[::3]]
             model_name = markers_curr.columns[0][0]
+
             if data_type == 'lp':
-                markers_curr_fmt = convert_lp_dlc(
-                    markers_curr, keypoint_names, model_name=model_name)
+                markers_curr_fmt = convert_lp_dlc(markers_curr, keypoint_names,
+                                                  model_name=model_name)
             else:
                 markers_curr_fmt = markers_curr
 
-        # markers_curr_fmt.to_csv('fmt_input.csv', index=False)
+        else:
+            continue
+
         input_dfs_list.append(markers_curr_fmt)
 
+    # Check if we found any valid input files
     if len(input_dfs_list) == 0:
-        raise FileNotFoundError(f'No marker input files found in {input_dir}')
+        raise FileNotFoundError(f'No valid marker input files found in {input_source}')
+
+    # Create an empty output DataFrame using the last processed DataFrame as a template
+    output_df = make_output_dataframe(input_dfs_list[0])
 
-    output_df = make_output_dataframe(markers_curr)
-    # returns both the formatted marker data and the empty dataframe for EKS output
     return input_dfs_list, output_df, keypoint_names
 
 
diff --git a/scripts/ibl_pupil_example.py b/scripts/ibl_pupil_example.py
index b6848e7..7a9ebc4 100644
--- a/scripts/ibl_pupil_example.py
+++ b/scripts/ibl_pupil_example.py
@@ -2,52 +2,56 @@
 import os
 
 from eks.command_line_args import handle_io, handle_parse_args
-from eks.ibl_pupil_smoother import ensemble_kalman_smoother_ibl_pupil
+from eks.ibl_pupil_smoother import fit_eks_pupil
 from eks.utils import format_data, plot_results
 
-# Collect User-Provided Args
+# Collect User-Provided Arguments
 smoother_type = 'pupil'
 args = handle_parse_args(smoother_type)
-input_dir = os.path.abspath(args.input_dir)
-data_type = args.data_type  # Note: LP and DLC are .csv, SLP is .slp
-save_dir = handle_io(input_dir, args.save_dir)  # defaults to outputs\
+
+# Determine input source (directory or list of files)
+input_source = args.input_dir if isinstance(args.input_dir, str) else args.input_files
+data_type = args.data_type  # LP and DLC are .csv, SLP is .slp
+
+# Set up the save directory
+if isinstance(input_source, str):
+    input_dir = os.path.abspath(input_source)
+else:
+    input_dir = os.path.abspath(os.path.dirname(input_source[0]))
+save_dir = handle_io(input_dir, args.save_dir)
 save_filename = args.save_filename
-diameter_s = args.diameter_s  # defaults to automatic optimization
-com_s = args.com_s  # defaults to automatic optimization
-s_frames = args.s_frames  # frames to be used for automatic optimization (only if no --s flag)
-
-# Load and format input files and prepare an empty DataFrame for output.
-input_dfs_list, output_df, keypoint_names = format_data(input_dir, data_type)
-
-# run eks
-df_dicts, smooth_params, nll_values = ensemble_kalman_smoother_ibl_pupil(
-    markers_list=input_dfs_list,
-    keypoint_names=keypoint_names,
-    tracker_name='ensemble-kalman_tracker',
+
+# Parameters for smoothing
+diameter_s = args.diameter_s
+com_s = args.com_s
+s_frames = args.s_frames
+
+# Run the smoothing function
+df_dicts, smooth_params, input_dfs_list, keypoint_names, nll_values = fit_eks_pupil(
+    input_source=input_source,
+    data_type=data_type,
+    save_dir=save_dir,
     smooth_params=[diameter_s, com_s],
     s_frames=s_frames
 )
 
-save_file = os.path.join(save_dir, 'kalman_smoothed_pupil_traces.csv')
-print(f'saving smoothed predictions to {save_file}')
-df_dicts['markers_df'].to_csv(save_file)
-
-save_file = os.path.join(save_dir, 'kalman_smoothed_latents.csv')
-print(f'saving latents to {save_file}')
-df_dicts['latents_df'].to_csv(save_file)
-
-
-# ---------------------------------------------
-# plot results
-# ---------------------------------------------
-
-# plot results
-plot_results(output_df=df_dicts['markers_df'],
-             input_dfs_list=input_dfs_list,
-             key=f'{keypoint_names[-1]}',
-             idxs=(0, 500),
-             s_final=(smooth_params[0], smooth_params[1]),
-             nll_values=nll_values,
-             save_dir=save_dir,
-             smoother_type=smoother_type
-             )
+# Save the results
+print("Saving smoothed predictions and latents...")
+markers_save_file = os.path.join(save_dir, 'kalman_smoothed_pupil_traces.csv')
+latents_save_file = os.path.join(save_dir, 'kalman_smoothed_latents.csv')
+df_dicts['markers_df'].to_csv(markers_save_file)
+print(f'Smoothed predictions saved to {markers_save_file}')
+df_dicts['latents_df'].to_csv(latents_save_file)
+print(f'Latents saved to {latents_save_file}')
+
+# Plot results
+plot_results(
+    output_df=df_dicts['markers_df'],
+    input_dfs_list=input_dfs_list,
+    key=f'{keypoint_names[-1]}',
+    idxs=(0, 500),
+    s_final=(smooth_params[0], smooth_params[1]),
+    nll_values=nll_values,
+    save_dir=save_dir,
+    smoother_type=smoother_type
+)
diff --git a/scripts/multicam_example.py b/scripts/multicam_example.py
index 290b6c2..7fc61e4 100644
--- a/scripts/multicam_example.py
+++ b/scripts/multicam_example.py
@@ -27,6 +27,7 @@
 
 # loop over keypoints; apply eks to each individually
 # Note: all camera views must be stored in the same csv file
+# TODO: dictionary where keys are view names, values are lists of csv paths
 for keypoint_ensemble in bodypart_list:
     # Separate body part predictions by camera view
     marker_list_by_cam = [[] for _ in range(len(camera_names))]
@@ -51,7 +52,7 @@
     # put results into new dataframe
     for camera in camera_names:
         cameras_df = cameras_df_dict[f'{camera}_df']
-        populate_output_dataframe(cameras_df, keypoint_ensemble, output_df,
+        output_df = populate_output_dataframe(cameras_df, keypoint_ensemble, output_df,
                                   key_suffix=f'_{camera}')
 
 # save eks results
diff --git a/scripts/singlecam_example.py b/scripts/singlecam_example.py
index e94d3a4..a770c92 100644
--- a/scripts/singlecam_example.py
+++ b/scripts/singlecam_example.py
@@ -1,73 +1,52 @@
 """Example script for single-camera datasets."""
 import os
-
-import numpy as np
-
 from eks.command_line_args import handle_io, handle_parse_args
-from eks.singlecam_smoother import ensemble_kalman_smoother_singlecam
-from eks.utils import format_data, plot_results, populate_output_dataframe
+from eks.singlecam_smoother import fit_eks_singlecam
+from eks.utils import plot_results
 
 # Collect User-Provided Args
 smoother_type = 'singlecam'
 args = handle_parse_args(smoother_type)
-input_dir = os.path.abspath(args.input_dir)
+input_source = args.input_dir if isinstance(args.input_dir, str) else args.input_files
 data_type = args.data_type  # Note: LP and DLC are .csv, SLP is .slp
-save_dir = handle_io(input_dir, args.save_dir)  # defaults to outputs\
+# Determine the input directory path
+if isinstance(input_source, str):
+    input_dir = os.path.abspath(input_source)
+else:
+    input_dir = os.path.abspath(os.path.dirname(input_source[0]))
 save_filename = args.save_filename
+# Set up the save directory
+save_dir = handle_io(input_dir, args.save_dir)
 bodypart_list = args.bodypart_list
-s = args.s  # defaults to automatic optimization
-s_frames = args.s_frames  # frames to be used for automatic optimization (only if no --s flag)
+s = args.s  # Defaults to automatic optimization
+s_frames = args.s_frames  # Frames to be used for automatic optimization if s is not provided
 blocks = args.blocks
 verbose = True if args.verbose == 'True' else False
 
-
-# Load and format input files and prepare an empty DataFrame for output.
-input_dfs, output_df, keypoint_names = format_data(args.input_dir, data_type)
-if bodypart_list is None:
-    bodypart_list = keypoint_names
-print(f'Input data has been read in for the following keypoints:\n{bodypart_list}')
-
-# Convert list of DataFrames to a 3D NumPy array
-data_arrays = [df.to_numpy() for df in input_dfs]
-markers_3d_array = np.stack(data_arrays, axis=0)
-
-# Map keypoint names to keys in input_dfs and crop markers_3d_array
-keypoint_is = {}
-keys = []
-for i, col in enumerate(input_dfs[0].columns):
-    keypoint_is[col] = i
-for part in bodypart_list:
-    keys.append(keypoint_is[part + '_x'])
-    keys.append(keypoint_is[part + '_y'])
-    keys.append(keypoint_is[part + '_likelihood'])
-key_cols = np.array(keys)
-markers_3d_array = markers_3d_array[:, :, key_cols]
-
-# Call the smoother function
-df_dicts, s_finals = ensemble_kalman_smoother_singlecam(
-    markers_3d_array,
-    bodypart_list,
-    s,
-    s_frames,
-    blocks,
+# Fit EKS using the provided input data
+output_df, s_finals, input_dfs, bodypart_list = fit_eks_singlecam(
+    input_source=input_source,
+    data_type=data_type,
+    save_dir=save_dir,
+    save_filename=save_filename,
+    bodypart_list=bodypart_list,
+    s=s,
+    s_frames=s_frames,
+    blocks=blocks,
     verbose=verbose
 )
 
-keypoint_i = -1  # keypoint to be plotted
-# Save eks results in new DataFrames and .csv output files
-for k in range(len(bodypart_list)):
-    df = df_dicts[k][bodypart_list[k] + '_df']
-    output_df = populate_output_dataframe(df, bodypart_list[k], output_df)
-    save_filename = save_filename or f'{smoother_type}_{s_finals[keypoint_i]}.csv'
-    output_df.to_csv(os.path.join(save_dir, save_filename))
-print("DataFrames successfully converted to CSV")
-# Plot results
-plot_results(output_df=output_df,
-             input_dfs_list=input_dfs,
-             key=f'{bodypart_list[keypoint_i]}',
-             idxs=(0, 500),
-             s_final=s_finals[keypoint_i],
-             nll_values=None,
-             save_dir=save_dir,
-             smoother_type=smoother_type
-             )
+# Plot results for a specific keypoint (default to last keypoint)
+keypoint_i = -1
+plot_results(
+    output_df=output_df,
+    input_dfs_list=input_dfs,
+    key=f'{bodypart_list[keypoint_i]}',
+    idxs=(0, 500),
+    s_final=s_finals[keypoint_i],
+    nll_values=None,
+    save_dir=save_dir,
+    smoother_type=smoother_type
+)
+
+print("Ensemble Kalman Smoothing complete. Results saved and plotted successfully.")
diff --git a/tests/test_core.py b/tests/test_core.py
index deceae3..40f959a 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -16,12 +16,13 @@ def test_ensemble():
     keys = ['keypoint_1', 'keypoint_2']
 
     # Create random data for three different marker DataFrames
+    # Adjust column names to match the function's expected 'keypoint_likelihood' format
     for i in range(3):
         data = {
             'keypoint_1': np.random.rand(num_samples),
-            'keypoint_1likelihood': np.random.rand(num_samples),
+            'keypoint_likelihood': np.random.rand(num_samples),  # Expected naming format
             'keypoint_2': np.random.rand(num_samples),
-            'keypoint_2likelihood': np.random.rand(num_samples)
+            'keypoint_likelihod': np.random.rand(num_samples)    # Expected naming format
         }
         markers_list.append(pd.DataFrame(data))
 
@@ -72,7 +73,7 @@ def test_ensemble():
     # Verify likelihood-based weighted averaging calculations
     for key in keys:
         stack = np.array([df[key].values for df in markers_list]).T
-        likelihood_stack = np.array([df[key + 'likelihood'].values for df in markers_list]).T
+        likelihood_stack = np.array([df[key[:-1] + 'likelihood'].values for df in markers_list]).T
         conf_per_keypoint = np.sum(likelihood_stack, axis=1)
         weighted_mean = np.sum(stack * likelihood_stack, axis=1) / conf_per_keypoint
         expected_variance = np.nanvar(stack, axis=1) / (
@@ -99,8 +100,17 @@ def test_kalman_dot_basic():
 
     # Check output shapes
     assert Ks.shape == (n_latents,), f"Expected shape {(n_latents,)}, got {Ks.shape}"
-    assert innovation_cov.shape == (
-    n_keypoints,), f"Expected shape {(n_keypoints,)}, got {innovation_cov.shape}"
+    assert innovation_cov.shape == (n_keypoints, n_keypoints), \
+        f"Expected shape {(n_keypoints, n_keypoints)}, got {innovation_cov.shape}"
+
+    # Ensure that innovation_cov is symmetric and positive semi-definite
+    assert np.allclose(innovation_cov, innovation_cov.T), "Expected innovation_cov to be symmetric"
+    eigvals = np.linalg.eigvalsh(innovation_cov)
+    assert np.all(eigvals >= 0), "Expected innovation_cov to be positive semi-definite"
+
+    # Check that Ks and innovation_cov have finite values
+    assert np.isfinite(Ks).all(), "Expected finite values in Ks"
+    assert np.isfinite(innovation_cov).all(), "Expected finite values in innovation_cov"
 
 
 def test_kalman_dot_zero_matrices():
@@ -113,12 +123,24 @@ def test_kalman_dot_zero_matrices():
     C = np.zeros((n_keypoints, n_latents))
     R = np.zeros((n_keypoints, n_keypoints))
 
+    # Add a small regularization term to R to avoid singularity
+    epsilon = 1e-6
+    R += epsilon * np.eye(n_keypoints)
+
     # Run kalman_dot
     Ks, innovation_cov = kalman_dot(innovation, V, C, R)
 
-    # Check if outputs are zero as expected
-    assert np.allclose(Ks, 0), "Expected Ks to be zero with zero inputs"
-    assert np.allclose(innovation_cov, 0), "Expected innovation_cov to be zero with zero inputs"
+    # Verify that the output shapes are as expected
+    assert Ks.shape == (n_latents,), f"Expected shape {(n_latents,)}, got {Ks.shape}"
+    assert innovation_cov.shape == (n_keypoints, n_keypoints), \
+        f"Expected shape {(n_keypoints, n_keypoints)}, got {innovation_cov.shape}"
+
+    # Check that innovation_cov has finite values and is symmetric
+    assert np.isfinite(innovation_cov).all(), "Expected finite values in innovation_cov"
+    assert np.allclose(innovation_cov, innovation_cov.T), "Expected innovation_cov to be symmetric"
+
+    # Check that Ks has finite values
+    assert np.isfinite(Ks).all(), "Expected finite values in Ks"
 
 
 def test_kalman_dot_singular_innovation_cov():
@@ -131,11 +153,15 @@ def test_kalman_dot_singular_innovation_cov():
     C = np.ones((n_keypoints, n_latents))  # Constant values lead to rank-deficient product
     R = -np.dot(C, np.dot(V, C.T))  # Makes innovation_cov close to zero matrix
 
+    # Add a small regularization term to R to avoid singularity
+    epsilon = 1e-6
+    R += epsilon * np.eye(n_keypoints)
+
     # Run kalman_dot and check stability
     try:
         Ks, innovation_cov = kalman_dot(innovation, V, C, R)
-        assert np.allclose(innovation_cov,
-                           0), "Expected nearly zero innovation_cov with constructed singularity"
+        assert np.allclose(innovation_cov, 0, atol=epsilon), \
+            "Expected nearly zero innovation_cov with constructed singularity"
     except np.linalg.LinAlgError:
         pytest.fail("kalman_dot raised LinAlgError with nearly singular innovation_cov")
 
@@ -155,16 +181,16 @@ def test_kalman_dot_random_values():
     # Run kalman_dot
     Ks, innovation_cov = kalman_dot(innovation, V, C, R)
 
-    # Check if innovation_cov is positive definite (eigenvalues should be positive or close to zero)
+    # Check if innovation_cov is positive semi-definite (eigenvalues should be non-negative or close to zero)
     eigvals = np.linalg.eigvalsh(innovation_cov)
     assert np.all(eigvals >= -1e-8), "Expected innovation_cov to be positive semi-definite"
     assert Ks.shape == (n_latents,), f"Expected shape {(n_latents,)}, got {Ks.shape}"
-    assert innovation_cov.shape == (
-    n_keypoints,), f"Expected shape {(n_keypoints,)}, got {innovation_cov.shape}"
+    assert innovation_cov.shape == (n_keypoints, n_keypoints), \
+        f"Expected shape {(n_keypoints, n_keypoints)}, got {innovation_cov.shape}"
 
-
-import pytest
-import numpy as np
+    # Check that innovation_cov and Ks have finite values
+    assert np.isfinite(innovation_cov).all(), "Expected finite values in innovation_cov"
+    assert np.isfinite(Ks).all(), "Expected finite values in Ks"
 
 
 def test_forward_pass_basic():
@@ -216,11 +242,23 @@ def test_forward_pass_with_nan_values():
     # Run forward_pass
     mf, Vf, S, innovations, innovation_cov = forward_pass(y, m0, S0, C, R, A, Q, ensemble_vars)
 
-    # Check if outputs are still valid despite NaN in inputs
-    assert np.isfinite(mf).all(), "Non-finite values found in mf"
+    # Check that non-NaN entries in y yield finite results in mf until the first NaN propagation
+    found_nan_propagation = False
+    for t in range(T):
+        if np.isnan(y[t]).any():
+            found_nan_propagation = True
+            assert np.isnan(mf[t]).all(), f"Expected NaNs in mf at time {t}, found finite values"
+        else:
+            if found_nan_propagation:
+                # Once NaNs are expected, allow them to propagate
+                assert np.isnan(mf[t]).all(), f"Expected NaNs in mf at time {t} due to propagation, found finite values"
+            else:
+                # Check for finite values up until the first NaN propagation
+                assert np.isfinite(mf[t]).all(), f"Expected finite values in mf at time {t}, found NaNs"
+
+    # Ensure Vf and innovation_cov have finite values where possible
     assert np.isfinite(Vf).all(), "Non-finite values found in Vf"
-    assert np.isfinite(S).all(), "Non-finite values found in S"
-    assert np.isnan(innovations).sum() > 0, "Expected some NaNs in innovations due to NaNs in y"
+    assert np.isfinite(innovation_cov).all(), "Non-finite values found in innovation_cov"
 
 
 def test_forward_pass_single_sample():
@@ -288,7 +326,7 @@ def test_backward_pass_basic():
     n_latents = 3
 
     y = np.random.randn(T, n_keypoints)
-    mf = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_latents)  # Should match n_latents
     Vf = np.random.randn(T, n_latents, n_latents)
     Vf = np.array([np.dot(v, v.T) for v in Vf])  # Make Vf positive semi-definite
     S = np.copy(Vf)  # Use S as the same structure as Vf
@@ -297,12 +335,15 @@ def test_backward_pass_basic():
     # Run backward_pass
     ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
 
-    # Check output shapes
-    assert ms.shape == (T, n_keypoints), f"Expected shape {(T, n_keypoints)}, got {ms.shape}"
-    assert Vs.shape == (
-    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
-    assert CV.shape == (
-    T - 1, n_latents, n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+    # Verify shapes of output arrays
+    assert ms.shape == (T, n_latents), f"Expected shape {(T, n_latents)}, got {ms.shape}"
+    assert Vs.shape == (T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
+    assert CV.shape == (T - 1, n_latents, n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+
+    # Check that ms, Vs, and CV contain finite values
+    assert np.isfinite(ms).all(), "Non-finite values found in ms"
+    assert np.isfinite(Vs).all(), "Non-finite values found in Vs"
+    assert np.isfinite(CV).all(), "Non-finite values found in CV"
 
 
 def test_backward_pass_with_nan_values():
@@ -313,7 +354,7 @@ def test_backward_pass_with_nan_values():
 
     y = np.random.randn(T, n_keypoints)
     y[2, 1] = np.nan  # Insert NaN value
-    mf = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_latents)  # Adjust shape to match n_latents
     Vf = np.random.randn(T, n_latents, n_latents)
     Vf = np.array([np.dot(v, v.T) for v in Vf])  # Make Vf positive semi-definite
     S = np.copy(Vf)
@@ -322,7 +363,12 @@ def test_backward_pass_with_nan_values():
     # Run backward_pass
     ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
 
-    # Check if outputs are still valid despite NaN in inputs
+    # Verify shapes of output arrays
+    assert ms.shape == (T, n_latents), f"Expected shape {(T, n_latents)}, got {ms.shape}"
+    assert Vs.shape == (T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
+    assert CV.shape == (T - 1, n_latents, n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+
+    # Check that ms, Vs, and CV contain finite values
     assert np.isfinite(ms).all(), "Non-finite values found in ms"
     assert np.isfinite(Vs).all(), "Non-finite values found in Vs"
     assert np.isfinite(CV).all(), "Non-finite values found in CV"
@@ -335,7 +381,7 @@ def test_backward_pass_single_timestep():
     n_latents = 3
 
     y = np.random.randn(T, n_keypoints)
-    mf = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_latents)  # Adjust shape to match n_latents
     Vf = np.eye(n_latents)[None, :, :]  # Shape (1, n_latents, n_latents)
     S = np.copy(Vf)
     A = np.eye(n_latents)
@@ -343,12 +389,14 @@ def test_backward_pass_single_timestep():
     # Run backward_pass
     ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
 
-    # Check output shapes with a single timestep
-    assert ms.shape == (T, n_keypoints), f"Expected shape {(T, n_keypoints)}, got {ms.shape}"
-    assert Vs.shape == (
-    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
-    assert CV.shape == (
-    0, n_latents, n_latents), f"Expected shape {(0, n_latents, n_latents)}, got {CV.shape}"
+    # Verify shapes of output arrays
+    assert ms.shape == (T, n_latents), f"Expected shape {(T, n_latents)}, got {ms.shape}"
+    assert Vs.shape == (T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
+    assert CV.shape == (T - 1, n_latents, n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+
+    # Check that ms and Vs contain finite values
+    assert np.isfinite(ms).all(), "Non-finite values found in ms"
+    assert np.isfinite(Vs).all(), "Non-finite values found in Vs"
 
 
 def test_backward_pass_singular_S_matrix():
@@ -358,7 +406,7 @@ def test_backward_pass_singular_S_matrix():
     n_latents = 3
 
     y = np.random.randn(T, n_keypoints)
-    mf = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_latents)  # Adjust shape to match n_latents
     Vf = np.random.randn(T, n_latents, n_latents)
     Vf = np.array([np.dot(v, v.T) for v in Vf])  # Make Vf positive semi-definite
     S = np.zeros((T, n_latents, n_latents))  # Singular S matrix (all zeros)
@@ -367,8 +415,21 @@ def test_backward_pass_singular_S_matrix():
     # Run backward_pass and check stability
     try:
         ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
+
+        # Verify shapes of output arrays
+        assert ms.shape == (T, n_latents), f"Expected shape {(T, n_latents)}, got {ms.shape}"
+        assert Vs.shape == (
+        T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
+        assert CV.shape == (T - 1, n_latents,
+                            n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+
+        # Check for finite values in outputs, expecting NaNs or Infs due to singular S
+        assert np.all(np.isfinite(ms)), "Non-finite values found in ms"
+        assert np.all(np.isfinite(Vs)), "Non-finite values found in Vs"
+        assert np.all(np.isfinite(CV)), "Non-finite values found in CV"
+
     except np.linalg.LinAlgError:
-        pytest.fail("backward_pass raised LinAlgError with singular S matrix")
+        pytest.fail("backward_pass failed due to singular S matrix")
 
 
 def test_backward_pass_random_values():
@@ -378,7 +439,7 @@ def test_backward_pass_random_values():
     n_latents = 4
 
     y = np.random.randn(T, n_keypoints)
-    mf = np.random.randn(T, n_keypoints)
+    mf = np.random.randn(T, n_latents)  # Adjust shape to match n_latents
     Vf = np.random.randn(T, n_latents, n_latents)
     Vf = np.array([np.dot(v, v.T) for v in Vf])  # Make Vf positive semi-definite
     S = np.copy(Vf)
@@ -387,12 +448,12 @@ def test_backward_pass_random_values():
     # Run backward_pass
     ms, Vs, CV = backward_pass(y, mf, Vf, S, A)
 
-    # Verify shapes and finite values
-    assert ms.shape == (T, n_keypoints), f"Expected shape {(T, n_keypoints)}, got {ms.shape}"
-    assert Vs.shape == (
-    T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
-    assert CV.shape == (
-    T - 1, n_latents, n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+    # Verify shapes of output arrays
+    assert ms.shape == (T, n_latents), f"Expected shape {(T, n_latents)}, got {ms.shape}"
+    assert Vs.shape == (T, n_latents, n_latents), f"Expected shape {(T, n_latents, n_latents)}, got {Vs.shape}"
+    assert CV.shape == (T - 1, n_latents, n_latents), f"Expected shape {(T - 1, n_latents, n_latents)}, got {CV.shape}"
+
+    # Check that ms, Vs, and CV contain finite values
     assert np.isfinite(ms).all(), "Non-finite values found in ms"
     assert np.isfinite(Vs).all(), "Non-finite values found in Vs"
     assert np.isfinite(CV).all(), "Non-finite values found in CV"
@@ -563,4 +624,8 @@ def test_jax_ensemble_unsupported_mode():
     markers_3d_array = np.random.rand(n_models, n_timepoints, n_keypoints * 3)
 
     with pytest.raises(ValueError, match="averaging not supported"):
-        jax_ensemble(markers_3d_array, mode='unsupported')
\ No newline at end of file
+        jax_ensemble(markers_3d_array, mode='unsupported')
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
\ No newline at end of file
diff --git a/tests/test_ibl_pupil_smoother.py b/tests/test_ibl_pupil_smoother.py
new file mode 100644
index 0000000..1ea8d9b
--- /dev/null
+++ b/tests/test_ibl_pupil_smoother.py
@@ -0,0 +1,262 @@
+import pytest
+import pandas as pd
+import numpy as np
+from unittest.mock import patch, MagicMock
+from eks.ibl_pupil_smoother import get_pupil_location, get_pupil_diameter, add_mean_to_array, ensemble_kalman_smoother_ibl_pupil
+
+
+@pytest.fixture
+def mock_dlc_data():
+    """
+    Fixture to generate mock DLC data for testing the get_pupil_location function.
+    """
+    n_samples = 10
+
+    # Generate random data for pupil coordinates
+    dlc_data = {
+        'pupil_top_r_x': np.random.rand(n_samples),
+        'pupil_top_r_y': np.random.rand(n_samples),
+        'pupil_bottom_r_x': np.random.rand(n_samples),
+        'pupil_bottom_r_y': np.random.rand(n_samples),
+        'pupil_left_r_x': np.random.rand(n_samples),
+        'pupil_left_r_y': np.random.rand(n_samples),
+        'pupil_right_r_x': np.random.rand(n_samples),
+        'pupil_right_r_y': np.random.rand(n_samples)
+    }
+
+    # Introduce some NaN values randomly
+    dlc_data['pupil_top_r_x'][2] = np.nan
+    dlc_data['pupil_left_r_y'][5] = np.nan
+
+    return dlc_data
+
+
+def test_get_pupil_location(mock_dlc_data):
+    """
+    Test the get_pupil_location function using mock data.
+    """
+    dlc = mock_dlc_data
+    center = get_pupil_location(dlc)
+
+    # Assertions
+    assert isinstance(center, np.ndarray), "Expected center to be a numpy array"
+    assert center.shape == (len(dlc['pupil_top_r_x']), 2), \
+        f"Expected shape to be {(len(dlc['pupil_top_r_x']), 2)}, got {center.shape}"
+
+    # Check that the output does not contain any NaNs where expected
+    assert np.isfinite(center).all(), "Expected no NaN values in center"
+
+    # Check if the median calculations return finite values when data is complete
+    non_nan_center = get_pupil_location({key: np.random.rand(10) for key in dlc.keys()})
+    assert np.isfinite(non_nan_center).all(), "Expected no NaN values when data is complete"
+
+    print("Test for get_pupil_location passed successfully.")
+
+
+def test_get_pupil_diameter(mock_dlc_data):
+    """
+    Test the get_pupil_diameter function using mock data.
+    """
+    dlc = mock_dlc_data
+    diameters = get_pupil_diameter(dlc)
+
+    # Assertions
+    assert isinstance(diameters, np.ndarray), "Expected output to be a numpy array"
+    assert diameters.shape == (len(dlc['pupil_top_r_x']),), \
+        f"Expected shape to be {(len(dlc['pupil_top_r_x']),)}, got {diameters.shape}"
+
+    # Check that the output does not contain any NaNs where expected
+    assert np.isfinite(diameters).all(), "Expected no NaN values in diameters"
+
+    # Check if the median calculations return finite values when data is complete
+    non_nan_dlc = {key: np.random.rand(10) for key in dlc.keys()}
+    non_nan_diameters = get_pupil_diameter(non_nan_dlc)
+    assert np.isfinite(non_nan_diameters).all(), "Expected no NaN values with complete data"
+
+    # Test with completely NaN input
+    nan_dlc = {key: np.full(10, np.nan) for key in dlc.keys()}
+    nan_diameters = get_pupil_diameter(nan_dlc)
+    assert np.isnan(nan_diameters).all(), "Expected NaN values with all NaN input"
+
+    print("Test for get_pupil_diameter passed successfully.")
+
+
+@pytest.fixture
+def mock_data_1():
+    """
+    Fixture to generate mock data for testing the add_mean_to_array function.
+    """
+    # Generate a random array of shape (10, 4) with some example keys
+    pred_arr = np.random.randn(10, 4)
+    keys = ['key1_x', 'key2_y', 'key3_x', 'key4_y']
+    mean_x = 2.0
+    mean_y = 3.0
+    return pred_arr, keys, mean_x, mean_y
+
+
+def test_add_mean_to_array(mock_data_1):
+    """
+    Test the add_mean_to_array function using mock data.
+    """
+    pred_arr, keys, mean_x, mean_y = mock_data_1
+
+    # Run the function with the mock data
+    result = add_mean_to_array(pred_arr, keys, mean_x, mean_y)
+
+    # Assertions to verify the result
+    assert isinstance(result, dict), "Expected output to be a dictionary"
+    assert len(result) == len(keys), f"Expected dictionary to have {len(keys)} keys, got {len(result)}"
+
+    # Check that the dictionary keys match the input keys
+    assert set(result.keys()) == set(keys), "Keys in the output dictionary do not match input keys"
+
+    # Verify the values in the dictionary are correctly offset by mean_x and mean_y
+    for i, key in enumerate(keys):
+        if 'x' in key:
+            expected = pred_arr[:, i] + mean_x
+        else:
+            expected = pred_arr[:, i] + mean_y
+        np.testing.assert_array_almost_equal(result[key], expected, err_msg=f"Mismatch for key '{key}'")
+
+
+def test_add_mean_to_array_empty():
+    """
+    Test the add_mean_to_array function with empty input arrays.
+    """
+    pred_arr = np.array([]).reshape(0, 0)
+    keys = []
+    mean_x = 2.0
+    mean_y = 3.0
+
+    result = add_mean_to_array(pred_arr, keys, mean_x, mean_y)
+
+    # Assertions for empty inputs
+    assert isinstance(result, dict), "Expected output to be a dictionary"
+    assert len(result) == 0, "Expected empty dictionary for empty input"
+
+
+def test_add_mean_to_array_single_row():
+    """
+    Test the add_mean_to_array function with a single row of data.
+    """
+    pred_arr = np.array([[1.0, 2.0, 3.0, 4.0]])
+    keys = ['key1_x', 'key2_y', 'key3_x', 'key4_y']
+    mean_x = 2.0
+    mean_y = 3.0
+
+    result = add_mean_to_array(pred_arr, keys, mean_x, mean_y)
+
+    # Expected output
+    expected_dict = {
+        'key1_x': np.array([1.0 + mean_x]),
+        'key2_y': np.array([2.0 + mean_y]),
+        'key3_x': np.array([3.0 + mean_x]),
+        'key4_y': np.array([4.0 + mean_y]),
+    }
+
+    # Assertions to verify the result
+    for key, expected_value in expected_dict.items():
+        np.testing.assert_array_almost_equal(result[key], expected_value, err_msg=f"Mismatch for key '{key}'")
+
+    print("All tests for add_mean_to_array passed successfully.")
+
+
+@pytest.fixture
+def mock_data():
+    """
+    Fixture to provide mock data for testing.
+    """
+    markers_list = [
+        pd.DataFrame(
+            np.random.randn(100, 8),
+            columns=[
+                'pupil_top_r_x', 'pupil_top_r_y', 'pupil_bottom_r_x', 'pupil_bottom_r_y',
+                'pupil_right_r_x', 'pupil_right_r_y', 'pupil_left_r_x', 'pupil_left_r_y'
+            ]
+        )
+    ]
+    keypoint_names = [
+        'pupil_top_r', 'pupil_bottom_r', 'pupil_right_r', 'pupil_left_r'
+    ]
+    tracker_name = 'ensemble-kalman_tracker'
+    smooth_params = [0.5, 0.5]
+    s_frames = [10, 20, 30]
+    return markers_list, keypoint_names, tracker_name, smooth_params, s_frames
+
+
+@patch('eks.core.ensemble')
+@patch('eks.ibl_pupil_smoother.get_pupil_location')
+@patch('eks.ibl_pupil_smoother.get_pupil_diameter')
+@patch('eks.ibl_pupil_smoother.pupil_optimize_smooth')
+@patch('eks.utils.make_dlc_pandas_index')
+@patch('eks.ibl_pupil_smoother.add_mean_to_array')
+@patch('eks.core.eks_zscore')
+def test_ensemble_kalman_smoother_ibl_pupil(
+    mock_zscore, mock_add_mean, mock_index, mock_smooth,
+    mock_get_diameter, mock_get_location, mock_ensemble,
+    mock_data
+):
+    # Unpack mock data
+    markers_list, keypoint_names, tracker_name, smooth_params, s_frames = mock_data
+
+    # Mock the ensemble function
+    ensemble_preds = np.random.randn(100, 8)
+    ensemble_vars = np.random.rand(100, 8) * 0.1
+    ensemble_stacks = np.random.randn(5, 100, 8)
+    keypoints_mean_dict = {k: np.random.randn(100) for k in [
+        'pupil_top_r_x', 'pupil_top_r_y', 'pupil_bottom_r_x', 'pupil_bottom_r_y',
+        'pupil_right_r_x', 'pupil_right_r_y', 'pupil_left_r_x', 'pupil_left_r_y']}
+    keypoints_var_dict = keypoints_mean_dict.copy()
+    keypoints_stack_dict = {i: keypoints_mean_dict for i in range(5)}
+
+    mock_ensemble.return_value = (ensemble_preds, ensemble_vars, ensemble_stacks,
+                                  keypoints_mean_dict, keypoints_var_dict, keypoints_stack_dict)
+
+    # Mock the get_pupil_location and get_pupil_diameter functions
+    mock_get_location.return_value = np.random.randn(100, 2)
+    mock_get_diameter.return_value = np.random.rand(100)
+
+    # Mock the pupil_optimize_smooth function
+    mock_smooth.return_value = ([0.5, 0.6], np.random.randn(100, 3), np.random.rand(100, 3, 3), 0.05, [0.1, 0.2])
+
+    # Mock the make_dlc_pandas_index function
+    mock_index.return_value = pd.MultiIndex.from_product(
+        [['ensemble-kalman_tracker'], keypoint_names, ['x', 'y', 'likelihood', 'x_var', 'y_var', 'zscore']]
+    )
+
+    # Mock the add_mean_to_array function
+    mock_add_mean.return_value = {f'{k}_x': np.random.randn(100) for k in keypoint_names}
+    mock_add_mean.return_value.update({f'{k}_y': np.random.randn(100) for k in keypoint_names})
+
+    # Mock the eks_zscore function
+    mock_zscore.return_value = np.random.randn(100), None
+
+    # Run the function with mocked data
+    result, smooth_params_out, nll_values = ensemble_kalman_smoother_ibl_pupil(
+        markers_list, keypoint_names, tracker_name, smooth_params, s_frames
+    )
+
+    # Assertions
+    assert isinstance(result, dict), "Expected result to be a dictionary"
+    assert 'markers_df' in result, "Expected 'markers_df' in result"
+    assert 'latents_df' in result, "Expected 'latents_df' in result"
+
+    markers_df = result['markers_df']
+    latents_df = result['latents_df']
+
+    assert isinstance(markers_df, pd.DataFrame), "markers_df should be a DataFrame"
+    assert isinstance(latents_df, pd.DataFrame), "latents_df should be a DataFrame"
+
+    # Verify the shape of the output DataFrames
+    assert markers_df.shape[0] == 100, "markers_df should have 100 rows"
+    assert latents_df.shape[0] == 100, "latents_df should have 100 rows"
+
+    # Check if the smooth parameters and NLL values are correctly returned
+    assert len(smooth_params_out) == 2, "Expected 2 smooth parameters"
+    assert isinstance(nll_values, list), "Expected nll_values to be a list"
+
+    print("All tests passed successfully.")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
\ No newline at end of file
diff --git a/tests/test_singlecam_smoother.py b/tests/test_singlecam_smoother.py
index 2ffd263..48f9947 100644
--- a/tests/test_singlecam_smoother.py
+++ b/tests/test_singlecam_smoother.py
@@ -1,7 +1,11 @@
 import pytest
 import numpy as np
+import jax
+import jax.numpy as jnp
 import pandas as pd
-from eks.singlecam_smoother import ensemble_kalman_smoother_singlecam
+import os
+from eks.singlecam_smoother import ensemble_kalman_smoother_singlecam, initialize_kalman_filter, adjust_observations
+from unittest.mock import patch, MagicMock
 
 
 # Function to generate simulated data
@@ -48,12 +52,117 @@ def test_ensemble_kalman_smoother_singlecam():
     for df_dict in df_dicts:
         for key, df in df_dict.items():
             assert isinstance(df, pd.DataFrame), f"Expected {key} to be a pandas DataFrame"
-            #add more detailed checks here
+            # Check for 'likelihood' in the correct level of the columns
             assert 'likelihood' in df.columns.get_level_values(
-                1), "Expected 'likelihood' in DataFrame columns"
-            assert 'x_var' in df.columns.get_level_values(
-                1), "Expected 'x_var' in DataFrame columns"
-            assert 'y_var' in df.columns.get_level_values(
-                1), "Expected 'y_var' in DataFrame columns"
-            assert 'zscore' in df.columns.get_level_values(
-                1), "Expected 'zscore' in DataFrame columns"
+                'coords'), "Expected 'likelihood' in DataFrame columns at the 'coords' level"
+
+
+def test_adjust_observations():
+    # Define mock input data
+    n_keypoints = 3
+    keypoints_avg_dict = {
+        0: np.array([1.0, 2.0, 3.0]),     # x-coordinates for keypoint 1
+        1: np.array([4.0, 5.0, 6.0]),     # y-coordinates for keypoint 1
+        2: np.array([0.5, 1.5, 2.5]),     # x-coordinates for keypoint 2
+        3: np.array([3.5, 4.5, 5.5]),     # y-coordinates for keypoint 2
+        4: np.array([2.0, 2.5, 3.0]),     # x-coordinates for keypoint 3
+        5: np.array([6.0, 7.0, 8.0])      # y-coordinates for keypoint 3
+    }
+
+    # Create a mock scaled_ensemble_preds array (shape: [timepoints, n_keypoints, coordinates])
+    num_samples = 3
+    scaled_ensemble_preds = np.random.randn(num_samples, n_keypoints, 2)
+
+    # Call the function
+    mean_obs_dict, adjusted_obs_dict, adjusted_scaled_preds = adjust_observations(
+        keypoints_avg_dict,
+        n_keypoints,
+        scaled_ensemble_preds
+    )
+
+    # Assertions for mean observations dictionary
+    assert isinstance(mean_obs_dict, dict), "Expected mean_obs_dict to be a dictionary"
+    assert len(mean_obs_dict) == 2 * n_keypoints, f"Expected {2 * n_keypoints} entries in mean_obs_dict"
+    assert np.isclose(mean_obs_dict[0], np.mean(keypoints_avg_dict[0])), "Mean x-coord for keypoint 1 is incorrect"
+    assert np.isclose(mean_obs_dict[1], np.mean(keypoints_avg_dict[1])), "Mean y-coord for keypoint 1 is incorrect"
+
+    # Assertions for adjusted observations dictionary
+    assert isinstance(adjusted_obs_dict, dict), "Expected adjusted_obs_dict to be a dictionary"
+    assert len(adjusted_obs_dict) == 2 * n_keypoints, f"Expected {2 * n_keypoints} entries in adjusted_obs_dict"
+    assert np.allclose(
+        adjusted_obs_dict[0],
+        keypoints_avg_dict[0] - mean_obs_dict[0]
+    ), "Adjusted x-coord for keypoint 1 is incorrect"
+    assert np.allclose(
+        adjusted_obs_dict[1],
+        keypoints_avg_dict[1] - mean_obs_dict[1]
+    ), "Adjusted y-coord for keypoint 1 is incorrect"
+
+    # Assertions for adjusted scaled ensemble predictions
+    assert isinstance(adjusted_scaled_preds, jnp.ndarray), "Expected adjusted_scaled_preds to be a JAX array"
+    assert adjusted_scaled_preds.shape == scaled_ensemble_preds.shape, \
+        f"Expected shape {scaled_ensemble_preds.shape}, got {adjusted_scaled_preds.shape}"
+
+    # Check that the ensemble predictions were adjusted correctly
+    for i in range(n_keypoints):
+        mean_x = mean_obs_dict[3 * i]
+        mean_y = mean_obs_dict[3 * i + 1]
+        expected_x_adjustment = scaled_ensemble_preds[:, i, 0] - mean_x
+        expected_y_adjustment = scaled_ensemble_preds[:, i, 1] - mean_y
+        assert np.allclose(adjusted_scaled_preds[:, i, 0], expected_x_adjustment), \
+            f"Scaled ensemble preds x-coord for keypoint {i} is incorrect"
+        assert np.allclose(adjusted_scaled_preds[:, i, 1], expected_y_adjustment), \
+            f"Scaled ensemble preds y-coord for keypoint {i} is incorrect"
+
+    print("Test for adjust_observations passed successfully.")
+
+
+def test_initialize_kalman_filter():
+    # Define test parameters
+    n_samples = 10
+    n_keypoints = 3
+
+    # Generate random scaled ensemble predictions
+    scaled_ensemble_preds = np.random.randn(n_samples, n_keypoints, 2)  # Shape (T, n_keypoints, 2)
+
+    # Create a mock adjusted observations dictionary
+    adjusted_obs_dict = {
+        0: np.random.randn(n_samples),  # Adjusted x observations for keypoint 0
+        1: np.random.randn(n_samples),  # Adjusted y observations for keypoint 0
+        3: np.random.randn(n_samples),  # Adjusted x observations for keypoint 1
+        4: np.random.randn(n_samples),  # Adjusted y observations for keypoint 1
+        6: np.random.randn(n_samples),  # Adjusted x observations for keypoint 2
+        7: np.random.randn(n_samples)   # Adjusted y observations for keypoint 2
+    }
+
+    # Run the function
+    m0s, S0s, As, cov_mats, Cs, Rs, y_obs_array = initialize_kalman_filter(
+        scaled_ensemble_preds, adjusted_obs_dict, n_keypoints
+    )
+
+    # Assertions to verify the function output
+    assert m0s.shape == (n_keypoints, 2), f"Expected shape {(n_keypoints, 2)}, got {m0s.shape}"
+    assert S0s.shape == (n_keypoints, 2, 2), f"Expected shape {(n_keypoints, 2, 2)}, got {S0s.shape}"
+    assert As.shape == (n_keypoints, 2, 2), f"Expected shape {(n_keypoints, 2, 2)}, got {As.shape}"
+    assert cov_mats.shape == (n_keypoints, 2, 2), f"Expected shape {(n_keypoints, 2, 2)}, got {cov_mats.shape}"
+    assert Cs.shape == (n_keypoints, 2, 2), f"Expected shape {(n_keypoints, 2, 2)}, got {Cs.shape}"
+    assert Rs.shape == (n_keypoints, 2, 2), f"Expected shape {(n_keypoints, 2, 2)}, got {Rs.shape}"
+    assert y_obs_array.shape == (n_keypoints, n_samples, 2), f"Expected shape {(n_keypoints, n_samples, 2)}, got {y_obs_array.shape}"
+
+    # Check that the diagonal of S0s contains non-negative values (variance cannot be negative)
+    assert jnp.all(S0s[:, 0, 0] >= 0), "S0s diagonal should have non-negative variances"
+    assert jnp.all(S0s[:, 1, 1] >= 0), "S0s diagonal should have non-negative variances"
+
+    # Check that the state transition matrix is correctly initialized as identity
+    expected_A = jnp.array([[1.0, 0], [0, 1.0]])
+    assert jnp.allclose(As, expected_A), "State transition matrix A should be identity"
+
+    # Check that the measurement function matrix C is correctly initialized
+    expected_C = jnp.array([[1, 0], [0, 1]])
+    assert jnp.allclose(Cs, expected_C), "Measurement function matrix C should be identity"
+
+    print("Test for initialize_kalman_filter passed successfully.")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])

From e0f05345722e41b4517e3805b40ad2b3689ed4a2 Mon Sep 17 00:00:00 2001
From: Keemin Lee <67605380+keeminlee@users.noreply.github.com>
Date: Wed, 20 Nov 2024 08:53:37 -0500
Subject: [PATCH 19/25] Delete scripts/plotting_aeks.py

---
 scripts/plotting_aeks.py | 158 ---------------------------------------
 1 file changed, 158 deletions(-)
 delete mode 100644 scripts/plotting_aeks.py

diff --git a/scripts/plotting_aeks.py b/scripts/plotting_aeks.py
deleted file mode 100644
index f22eff9..0000000
--- a/scripts/plotting_aeks.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import copy
-import os
-import sys
-
-import cv2
-import matplotlib.patches as mpatches
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from tqdm import tqdm
-
-sys.path.append(
-    os.path.abspath(os.path.join(os.path.dirname(__file__), '../../tracking-diagnostics')))
-
-from diagnostics.video import get_frames_from_idxs
-
-from eks.utils import convert_lp_dlc, format_data
-
-
-def format_data(ensemble_dir):
-    input_files = os.listdir(ensemble_dir)
-    markers_list = []
-    for input_file in input_files:
-        markers_curr = pd.read_csv(
-            os.path.join(ensemble_dir, input_file), header=[0, 1, 2], index_col=0)
-        keypoint_names = [c[1] for c in markers_curr.columns[::3]]
-        model_name = markers_curr.columns[0][0]
-        markers_curr_fmt = convert_lp_dlc(
-            markers_curr, keypoint_names, model_name=model_name)
-        markers_curr_fmt.to_csv('fmt_input.csv', index=False)
-        markers_list.append(markers_curr_fmt)
-    return markers_list
-
-
-import os
-import subprocess
-
-
-def save_video(save_file, tmp_dir, framerate, frame_pattern='frame_%06d.jpeg'):
-    call_str = f'ffmpeg -r {framerate} -i {os.path.join(tmp_dir, frame_pattern)} -c:v libx264 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" {save_file}'
-
-    if os.name == 'nt':  # If the OS is Windows
-        subprocess.run(['ffmpeg', '-r', str(framerate), '-i', f'{tmp_dir}/frame_%06d.jpeg',
-                        '-c:v', 'libx264', '-vf', "pad=ceil(iw/2)*2:ceil(ih/2)*2",
-                        save_file],
-                       check=True)
-    else:  # If the OS is Unix/Linux
-        subprocess.run(['/bin/bash', '-c', call_str], check=True)
-
-
-# load eks
-eks_path = f'/eks/outputs/eks_test_vid.csv'
-markers_curr = pd.read_csv(eks_path, header=[0, 1, 2], index_col=0)
-keypoint_names = [c[1] for c in markers_curr.columns[::3]]
-model_name = markers_curr.columns[0][0]
-eks_pd = convert_lp_dlc(markers_curr, keypoint_names, model_name)
-
-# load aeks
-eks_path = f'/eks/outputs/aeks_test_vid.csv'
-markers_curr = pd.read_csv(eks_path, header=[0, 1, 2], index_col=0)
-keypoint_names = [c[1] for c in markers_curr.columns[::3]]
-model_name = markers_curr.columns[0][0]
-eks_pd2 = convert_lp_dlc(markers_curr, keypoint_names, model_name)
-
-# load ensembles
-ensemble_dir = f'/eks/data/mirror-mouse-aeks/expanded-networks'
-ensemble_pd_list = format_data(ensemble_dir)
-animal_ids = [1]
-body_parts = ['paw1LH_top', 'paw2LF_top', 'paw3RF_top', 'paw4RH_top', 'tailBase_top',
-              'tailMid_top', 'nose_top', 'obs_top',
-              'paw1LH_bot', 'paw2LF_bot', 'paw3RF_bot', 'paw4RH_bot', 'tailBase_bot',
-              'tailMid_bot', 'nose_bot', 'obsHigh_bot', 'obsLow_bot'
-              ]
-to_plot = []
-for animal_id in animal_ids:
-    for body_part in body_parts:
-        to_plot.append(body_part)
-
-save_path = '/eks/videos'
-video_name = 'test_vid.mp4'
-video_path = f'/eks/videos/{video_name}'
-cap = cv2.VideoCapture(video_path)
-
-start_frame = 0
-frame_idxs = None
-n_frames = 993
-idxs = np.arange(start_frame, start_frame + n_frames)
-framerate = 20
-
-
-def plot_video_markers(markers_pd, ax, n, body_part, color, alphas, markers, model_id=0,
-                       markersize=8):
-    x_key = body_part + '_x'
-    y_key = body_part + '_y'
-    markers_x = markers_pd[x_key][n]
-    markers_y = markers_pd[y_key][n]
-    ax.scatter(markers_x, markers_y, alpha=alphas[model_id], marker="o", color=color)
-
-
-colors = ['cyan', 'pink', 'purple']
-alphas = [.8] * len(ensemble_pd_list) + [1.0]
-markers = ['.'] * len(ensemble_pd_list) + ['x']
-model_labels = ['expanded-network rng0', 'eks', 'aeks']
-model_colors = colors
-fr = 60
-
-for body_part in to_plot:
-    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
-    tmp_dir = os.path.join(save_path, f'tmp_{body_part}')
-    if not os.path.exists(tmp_dir):
-        os.makedirs(tmp_dir)
-    save_file = os.path.join(save_path, f'test_vid_{body_part}.mp4')
-
-    txt_fr_kwargs = {
-        'fontsize': 14, 'color': [1, 1, 1], 'horizontalalignment': 'left',
-        'verticalalignment': 'top', 'fontname': 'monospace',
-        'bbox': dict(facecolor='k', alpha=0.25, edgecolor='none'),
-        'transform': ax.transAxes
-    }
-    save_imgs = True
-    if save_imgs:
-        markersize = 18
-    else:
-        markersize = 12
-    for idx in tqdm(range(len(idxs))):
-        n = idxs[idx]
-        ax.clear()
-        frame = get_frames_from_idxs(cap, [n])
-        ax.imshow(frame[0, 0], vmin=0, vmax=255, cmap='gray')
-        ax.set_xticks([])
-        ax.set_yticks([])
-        patches = []
-        # ensemble
-        for model_id, markers_pd in enumerate(ensemble_pd_list):
-            markers_pd_copy = copy.deepcopy(markers_pd)
-            plot_video_markers(markers_pd_copy, ax, n, body_part, colors[0], alphas, markers,
-                               model_id=model_id, markersize=markersize)
-        # eks_ind
-        for model_id, markers_pd in enumerate([eks_pd]):
-            markers_pd_copy = copy.deepcopy(markers_pd)
-            plot_video_markers(markers_pd_copy, ax, n, body_part, colors[1], alphas, markers,
-                               model_id=model_id, markersize=markersize)
-        # eks_cdnm
-        for model_id, markers_pd in enumerate([eks_pd2]):
-            markers_pd_copy = copy.deepcopy(markers_pd)
-            plot_video_markers(markers_pd_copy, ax, n, body_part, colors[2], alphas, markers,
-                               model_id=model_id, markersize=markersize)
-        # legend
-        for i, model_label in enumerate(model_labels):
-            patches.append(mpatches.Patch(color=model_colors[i], label=model_label))
-        ax.legend(handles=patches, prop={'size': 12}, loc='upper right')
-        im = ax.text(0.02, 0.98, f'frame {n}', **txt_fr_kwargs)
-        plt.savefig(os.path.join(tmp_dir, 'frame_%06d.jpeg' % idx))
-    save_video(save_file, tmp_dir, framerate, frame_pattern='frame_%06d.jpeg')
-    # Clean up temporary directory
-    for file in os.listdir(tmp_dir):
-        os.remove(os.path.join(tmp_dir, file))
-    os.rmdir(tmp_dir)

From 4619ab763a556d73801bc3851ecb40a376a70d73 Mon Sep 17 00:00:00 2001
From: Keemin Lee <67605380+keeminlee@users.noreply.github.com>
Date: Wed, 20 Nov 2024 08:58:05 -0500
Subject: [PATCH 20/25] Delete tests/run_tests.py

---
 tests/run_tests.py | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 tests/run_tests.py

diff --git a/tests/run_tests.py b/tests/run_tests.py
deleted file mode 100644
index 2c045cc..0000000
--- a/tests/run_tests.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import pytest
-import sys
-
-def main():
-    # Get arguments from the command line (excluding the script name)
-    args = sys.argv[1:]
-
-    # Default to running both test files if no arguments are provided
-    if not args:
-        test_files = ["test_core.py", "test_singlecam_smoother.py"]
-    else:
-        # Use provided arguments as the list of test files to run
-        test_files = args
-
-    # Run pytest on the specified test files
-    result = pytest.main(["-v"] + test_files)
-    if result == 0:
-        print("All tests passed successfully!")
-    else:
-        print("Some tests failed.")
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file

From e1ecc975d9f65361f262599816715159772f409d Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Wed, 20 Nov 2024 09:12:11 -0500
Subject: [PATCH 21/25] added comment for E_blocks

---
 eks/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/eks/core.py b/eks/core.py
index eadd2e7..b47f985 100644
--- a/eks/core.py
+++ b/eks/core.py
@@ -742,7 +742,8 @@ def compute_covariance_matrix(ensemble_preds):
     # Index covariance matrix into blocks for each keypoint
     cov_mats = []
     for i in range(n_keypoints):
-        E_block = extract_submatrix(E, i)
+        # E_block = extract_submatrix(E, i)  -- using E_block instead of the identity matrix
+        # leads to a correlated dynamics model, but further debugging required due to negative vars
         cov_mats.append([[1, 0], [0, 1]])
     cov_mats = jnp.array(cov_mats)
     return cov_mats

From c254d0a0c780bbf89f21560a228b83859dacaa1c Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Tue, 7 Jan 2025 13:22:06 -0500
Subject: [PATCH 22/25] mirrored and unmirrored multicam functions

---
 eks/ibl_paw_multiview_smoother.py    |   1 -
 eks/ibl_pupil_smoother.py            |   8 +-
 eks/multicam_smoother.py             | 211 ++++++++++++++++++++++-----
 eks/singlecam_smoother.py            |   3 +-
 eks/utils.py                         |  70 ++++++---
 scripts/ibl_paw_multiview_example.py |   1 -
 scripts/ibl_pupil_example.py         |   1 -
 scripts/mirrored_multicam_example.py |  87 ++++-------
 scripts/multicam_example.py          |  54 +++++++
 scripts/singlecam_example.py         |   3 +-
 tests/conftest.py                    |   2 +-
 tests/test_core.py                   |   7 +-
 tests/test_multicam_smoother.py      |  86 +++++++++++
 tests/test_singlecam_smoother.py     |   2 -
 14 files changed, 412 insertions(+), 124 deletions(-)
 create mode 100644 scripts/multicam_example.py
 create mode 100644 tests/test_multicam_smoother.py

diff --git a/eks/ibl_paw_multiview_smoother.py b/eks/ibl_paw_multiview_smoother.py
index c400e79..d46c066 100644
--- a/eks/ibl_paw_multiview_smoother.py
+++ b/eks/ibl_paw_multiview_smoother.py
@@ -6,7 +6,6 @@
 from eks.core import backward_pass, eks_zscore, ensemble, forward_pass
 from eks.utils import make_dlc_pandas_index
 
-
 # TODO:
 # - allow conf_weighted_mean for ensemble variance computation
 
diff --git a/eks/ibl_pupil_smoother.py b/eks/ibl_pupil_smoother.py
index 546f844..c9ee6a4 100644
--- a/eks/ibl_pupil_smoother.py
+++ b/eks/ibl_pupil_smoother.py
@@ -6,8 +6,8 @@
 import pandas as pd
 from scipy.optimize import minimize
 
-from eks.core import backward_pass, compute_nll, eks_zscore, ensemble, forward_pass
-from eks.utils import crop_frames, make_dlc_pandas_index, format_data
+from eks.core import backward_pass, compute_nll, ensemble, forward_pass
+from eks.utils import crop_frames, format_data, make_dlc_pandas_index
 
 
 def get_pupil_location(dlc):
@@ -80,7 +80,6 @@ def add_mean_to_array(pred_arr, keys, mean_x, mean_y):
     return processed_arr_dict
 
 
-        smooth_params (list): List containing diameter_s and com_s.
 def fit_eks_pupil(
     input_source: Union[str, list],
     save_file: str,
@@ -277,7 +276,8 @@ def ensemble_kalman_smoother_ibl_pupil(
         # compute zscore for EKS to see how it deviates from the ensemble
         # eks_predictions = \
         #     np.asarray([processed_arr_dict[key_pair[0]], processed_arr_dict[key_pair[1]]]).T
-        # ensemble_preds_curr = ensemble_preds[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
+        # ensemble_preds_curr = ensemble_preds
+        #                                   [:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
         # ensemble_vars_curr = ensemble_vars[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
         # zscore, _ = eks_zscore(
         #     eks_predictions,
diff --git a/eks/multicam_smoother.py b/eks/multicam_smoother.py
index 5845a96..2124b50 100644
--- a/eks/multicam_smoother.py
+++ b/eks/multicam_smoother.py
@@ -1,3 +1,6 @@
+import os
+from typing import Optional, Union
+
 import numpy as np
 import pandas as pd
 from scipy.optimize import minimize
@@ -11,39 +14,182 @@
     forward_pass,
 )
 from eks.ibl_paw_multiview_smoother import pca, remove_camera_means
-from eks.utils import crop_frames, make_dlc_pandas_index
+from eks.utils import crop_frames, format_data, make_dlc_pandas_index, populate_output_dataframe
+
+
+def fit_eks_mirrored_multicam(
+    input_source: Union[str, list],
+    save_file: str,
+    bodypart_list: Optional[list] = None,
+    smooth_param: Optional[Union[float, list]] = None,
+    s_frames: Optional[list] = None,
+    camera_names: Optional[list] = None,
+    quantile_keep_pca: float = 95,
+    avg_mode: str = 'median',
+    zscore_threshold: float = 2,
+) -> tuple:
+    """
+    Fit the Ensemble Kalman Smoother for mirrored multi-camera data.
+
+    Args:
+        input_source: Directory path or list of CSV file paths with columns for all cameras.
+        save_file: File to save output DataFrame.
+        bodypart_list: List of body parts.
+        smooth_param: Value in (0, Inf); smaller values lead to more smoothing.
+        s_frames: Frames for automatic optimization if smooth_param is not provided.
+        camera_names: List of camera names corresponding to the input data.
+        quantile_keep_pca: Percentage of points kept for PCA (default: 95).
+        avg_mode: Mode for averaging across ensemble ('median', 'mean').
+        zscore_threshold: Z-score threshold for filtering low ensemble std.
+        save_dir: The directory path to save output files to.
+
+    Returns:
+        tuple: Smoothed DataFrames for each camera, final smoothing parameters, and NLL values.
+    """
+    # Load and format input files
+    input_dfs_list, output_df, keypoint_names = format_data(input_source)
+    if bodypart_list is None:
+        bodypart_list = keypoint_names
+
+    # loop over keypoints; apply eks to each individually
+    for keypoint_ensemble in bodypart_list:
+        # Separate body part predictions by camera view
+        marker_list_by_cam = [[] for _ in range(len(camera_names))]
+        for markers_curr in input_dfs_list:
+            for c, camera_name in enumerate(camera_names):
+                non_likelihood_keys = [
+                    key for key in markers_curr.keys()
+                    if camera_names[c] in key and keypoint_ensemble in key
+                ]
+                marker_list_by_cam[c].append(markers_curr[non_likelihood_keys])
+
+        # Run the ensemble Kalman smoother for multi-camera data
+        camera_dfs, smooth_params_final, nll_values = ensemble_kalman_smoother_multicam(
+            markers_list_cameras=marker_list_by_cam,
+            keypoint_ensemble=keypoint_ensemble,
+            smooth_param=smooth_param,
+            quantile_keep_pca=quantile_keep_pca,
+            camera_names=camera_names,
+            s_frames=s_frames,
+            ensembling_mode=avg_mode,
+            zscore_threshold=zscore_threshold,
+        )
+        # Put results in new dataframe
+        for camera_name in camera_names:
+            output_df = populate_output_dataframe(
+                camera_dfs[camera_name],
+                keypoint_ensemble,
+                output_df,
+                key_suffix=f'_{camera_name}'
+            )
+        # Save the output DataFrames to CSV file
+        output_df.to_csv(save_file)
+    return output_df, smooth_params_final, input_dfs_list, nll_values
+
+
+def fit_eks_multicam(
+    input_source: Union[str, list],
+    save_dir: str,
+    bodypart_list: Optional[list] = None,
+    smooth_param: Optional[Union[float, list]] = None,
+    s_frames: Optional[list] = None,
+    camera_names: Optional[list] = None,
+    quantile_keep_pca: float = 95,
+    avg_mode: str = 'median',
+    zscore_threshold: float = 2,
+) -> tuple:
+    """
+    Fit the Ensemble Kalman Smoother for un-mirrored multi-camera data.
+
+    Args:
+        input_source: Directory path or list of CSV file paths with columns for all cameras.
+        save_dir: Directory to save output DataFrame.
+        bodypart_list: List of body parts.
+        smooth_param: Value in (0, Inf); smaller values lead to more smoothing.
+        s_frames: Frames for automatic optimization if smooth_param is not provided.
+        camera_names: List of camera names corresponding to the input data.
+        quantile_keep_pca: Percentage of points kept for PCA (default: 95).
+        avg_mode: Mode for averaging across ensemble ('median', 'mean').
+        zscore_threshold: Z-score threshold for filtering low ensemble std.
+
+    Returns:
+        tuple: Smoothed DataFrames for each camera, final smoothing parameters, and NLL values.
+    """
+    # Load and format input files
+    # NOTE: input_dfs_list is a list of camera-specific lists of marker Dataframes
+    input_dfs_list, output_df, keypoint_names = format_data(input_source,
+                                                            camera_names=camera_names)
+    if bodypart_list is None:
+        bodypart_list = keypoint_names
+
+    output_dfs = []  # Stores output dataframes (by camera)
+    for _ in camera_names:
+        output_dfs.append(output_df.copy())
+
+    # loop over keypoints; apply eks to each individually
+    for keypoint_ensemble in bodypart_list:
+        # Separate body part predictions by camera view
+        marker_list_by_cam = [[] for _ in range(len(camera_names))]
+        for c, camera_name in enumerate(camera_names):
+            ensemble_members = input_dfs_list[c]
+            for markers_curr in ensemble_members:
+                non_likelihood_keys = [
+                    key for key in markers_curr.keys()
+                    if keypoint_ensemble in key
+                ]
+                marker_list_by_cam[c].append(markers_curr[non_likelihood_keys])
+
+        # Run the ensemble Kalman smoother for multi-camera data
+        camera_dfs, smooth_params_final, nll_values = ensemble_kalman_smoother_multicam(
+            markers_list_cameras=marker_list_by_cam,
+            keypoint_ensemble=keypoint_ensemble,
+            smooth_param=smooth_param,
+            quantile_keep_pca=quantile_keep_pca,
+            camera_names=camera_names,
+            s_frames=s_frames,
+            ensembling_mode=avg_mode,
+            zscore_threshold=zscore_threshold,
+        )
+        # Save the output DataFrames to CSV files and populate output_dfs
+        for c, (camera_name, camera_df) in enumerate(camera_dfs.items()):
+            populate_output_dataframe(camera_df,
+                                      keypoint_ensemble,
+                                      output_dfs[c],
+                                      )
+
+    # Save output files (one per view)
+    for c, camera in enumerate(camera_names):
+        save_filename = f'multicam_{camera}_results.csv'
+        output_dfs[c].to_csv(os.path.join(save_dir, save_filename))
+    return output_dfs, smooth_params_final, input_dfs_list, nll_values
 
 
 def ensemble_kalman_smoother_multicam(
-    markers_list_cameras, keypoint_ensemble, smooth_param, quantile_keep_pca, camera_names,
-        s_frames, ensembling_mode='median', zscore_threshold=2):
-    """Use multi-view constraints to fit a 3d latent subspace for each body part.
-
-    Parameters
-    ----------
-    markers_list_cameras : list of list of pd.DataFrames
-        each list element is a list of dataframe predictions from one ensemble member for each
-        camera.
-    keypoint_ensemble : str
-        the name of the keypoint to be ensembled and smoothed
-    smooth_param : float
-        ranges from .01-2 (smaller values = more smoothing)
-    quantile_keep_pca
-        percentage of the points are kept for multi-view PCA (lowest ensemble variance)
-    camera_names: list
-        the camera names (should be the same length as markers_list_cameras).
-    s_frames : list of tuples or int
-        specifies frames to be used for smoothing parameter auto-tuning
-        the function used for ensembling ('mean', 'median', or 'confidence_weighted_mean')
-    zscore_threshold:
-        Minimum std threshold to reduce the effect of low ensemble std on a zscore metric
-        (default 2).
-
-    Returns
-    -------
-    dict
-        camera_dfs: dataframe containing smoothed markers for each camera; same format as input
-        dataframes
+    markers_list_cameras: list,
+    keypoint_ensemble: str,
+    smooth_param: Optional[float] = None,
+    quantile_keep_pca: float = 95,
+    camera_names: Optional[list] = None,
+    s_frames: Optional[list] = None,
+    ensembling_mode: str = 'median',
+    zscore_threshold: float = 2,
+) -> dict:
+    """
+    Use multi-view constraints to fit a 3D latent subspace for each body part.
+
+    Args:
+        markers_list_cameras: List of lists of pd.DataFrames, where each inner list contains
+            DataFrame predictions from one ensemble member for each camera.
+        keypoint_ensemble: The name of the keypoint to be ensembled and smoothed.
+        smooth_param: Value in (0, Inf); smaller values lead to more smoothing (default: None).
+        quantile_keep_pca: Percentage of points kept for PCA (default: 95).
+        camera_names: List of camera names corresponding to the input data (default: None).
+        s_frames: Frames for auto-optimization if smooth_param is not provided (default: None).
+        ensembling_mode: Mode for averaging across ensemble.
+        zscore_threshold: Minimum std threshold for z-score calculation (default: 2).
+
+    Returns:
+        dict: A dictionary containing smoothed DataFrames for each camera.
     """
 
     # --------------------------------------------------------------
@@ -154,7 +300,7 @@ def ensemble_kalman_smoother_multicam(
     # Call functions from ensemble_kalman to optimize smooth_param before filtering and smoothing
     smooth_param, ms, Vs, nll, nll_values = multicam_optimize_smooth(
         cov_matrix, y_obs, m0, S0, C, A, R, ensemble_vars, s_frames, smooth_param)
-    print(f"NLL is {nll} for {keypoint_ensemble}, smooth_param={smooth_param}")
+    print(f"Smoothed {keypoint_ensemble} at smooth_param={smooth_param:.3f}")
     smooth_param_final = smooth_param
 
     # Smoothed posterior over ys
@@ -194,7 +340,7 @@ def ensemble_kalman_smoother_multicam(
             nll_values,
             ensemble_std
         ]).T
-        camera_dfs[camera_name + '_df'] = pd.DataFrame(pred_arr, columns=pdindex)
+        camera_dfs[camera_name] = pd.DataFrame(pred_arr, columns=pdindex)
     return camera_dfs, smooth_param_final, nll_values
 
 
@@ -235,7 +381,6 @@ def callback(xk):
             bounds=[(0, None)]
         )
         smooth_param = sol.x[0]
-        print(f'Optimal at s={smooth_param}')
 
     # Final smooth with optimized s
     ms, Vs, nll, nll_values = multicam_smooth_final(
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index 7bdf743..a7df9df 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -12,7 +12,6 @@
 from eks.core import (
     compute_covariance_matrix,
     compute_initial_guesses,
-    eks_zscore,
     jax_backward_pass,
     jax_ensemble,
     jax_forward_pass,
@@ -158,7 +157,6 @@ def ensemble_kalman_smoother_singlecam(
 
     y_m_smooths = np.zeros((n_keypoints, T, n_coords))
     y_v_smooths = np.zeros((n_keypoints, T, n_coords, n_coords))
-    eks_preds_array = np.zeros(y_m_smooths.shape)
 
     data_arr = []
 
@@ -170,6 +168,7 @@ def ensemble_kalman_smoother_singlecam(
         mean_y_obs = mean_obs_dict[3 * k + 1]
 
         # Computing z-score
+        # eks_preds_array = np.zeros(y_m_smooths.shape)
         # eks_preds_array[k] = y_m_smooths[k].copy()
         # eks_preds_array[k] = np.asarray([
         #     eks_preds_array[k].T[0] + mean_x_obs,
diff --git a/eks/utils.py b/eks/utils.py
index ffb945b..00fb59a 100644
--- a/eks/utils.py
+++ b/eks/utils.py
@@ -81,15 +81,17 @@ def convert_slp_dlc(base_dir, slp_file):
     return df
 
 
-def format_data(input_source):
+def format_data(input_source, camera_names=None):
     """
     Load and format input files from a directory or a list of file paths.
 
     Args:
         input_source (str or list): Directory path or list of file paths.
+        camera_names (None or list): List of multiple camera/view names. None = single camera
+            *** data with mirrored naming schemes (e.g. paw1LH_top), keep camera_names as None
 
     Returns:
-        input_dfs_list (list): List of formatted DataFrames.
+        input_dfs_list (list): List of formatted DataFrames (List of Lists for un-mirrored sets).
         output_df (DataFrame): Empty DataFrame for storing results.
         keypoint_names (list): List of keypoint names.
 
@@ -109,28 +111,58 @@ def format_data(input_source):
         raise ValueError("input_source must be a directory path or a list of file paths")
 
     # Process each file based on the data type
-    for file_path in file_paths:
-        if file_path.endswith('.slp'):
-            markers_curr = convert_slp_dlc(os.path.dirname(file_path), os.path.basename(file_path))
-            keypoint_names = [c[1] for c in markers_curr.columns[::3]]
-            markers_curr_fmt = markers_curr
-
-        elif file_path.endswith('.csv'):
-            markers_curr = pd.read_csv(file_path, header=[0, 1, 2], index_col=0)
-            keypoint_names = [c[1] for c in markers_curr.columns[::3]]
-            model_name = markers_curr.columns[0][0]
-            markers_curr_fmt = convert_lp_dlc(markers_curr, keypoint_names, model_name=model_name)
-        else:
-            continue
+    if camera_names is None:
+        for file_path in file_paths:
+            if file_path.endswith('.slp'):
+                markers_curr = convert_slp_dlc(os.path.dirname(file_path),
+                                               os.path.basename(file_path))
+                keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+                markers_curr_fmt = markers_curr
+            elif file_path.endswith('.csv'):
+                markers_curr = pd.read_csv(file_path, header=[0, 1, 2], index_col=0)
+                keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+                model_name = markers_curr.columns[0][0]
+                markers_curr_fmt = convert_lp_dlc(markers_curr,
+                                                  keypoint_names,
+                                                  model_name=model_name)
+            else:
+                continue
+            input_dfs_list.append(markers_curr_fmt)
+    else:
+        for camera in camera_names:
+            markers_for_this_camera = []  # inner list of markers for specific camera view
+            for file_path in file_paths:
+                if camera not in file_path:
+                    continue
+                else:  # file_path matches the camera name, proceed with processing
+                    if file_path.endswith('.slp'):
+                        markers_curr = convert_slp_dlc(os.path.dirname(file_path),
+                                                       os.path.basename(file_path))
+                        keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+                        markers_curr_fmt = markers_curr
+                    elif file_path.endswith('.csv'):
+                        markers_curr = pd.read_csv(file_path, header=[0, 1, 2], index_col=0)
+                        keypoint_names = [c[1] for c in markers_curr.columns[::3]]
+                        model_name = markers_curr.columns[0][0]
+                        markers_curr_fmt = convert_lp_dlc(markers_curr,
+                                                          keypoint_names,
+                                                          model_name=model_name)
+                    else:
+                        continue
+                markers_for_this_camera.append(markers_curr_fmt)
+            input_dfs_list.append(markers_for_this_camera) # list of lists of markers
 
-        input_dfs_list.append(markers_curr_fmt)
 
     # Check if we found any valid input files
     if len(input_dfs_list) == 0:
         raise FileNotFoundError(f'No valid marker input files found in {input_source}')
 
     # Create an empty output DataFrame using the last processed DataFrame as a template
-    output_df = make_output_dataframe(input_dfs_list[0])
+    if camera_names is None:
+        last_df = input_dfs_list[0]
+    else:  # multicam
+        last_df = input_dfs_list[0][0]
+    output_df = make_output_dataframe(last_df)
 
     return input_dfs_list, output_df, keypoint_names
 
@@ -152,11 +184,13 @@ def make_output_dataframe(markers_curr):
             parts = col.split('_')
             instance_num = parts[0]
             keypoint_name = '_'.join(parts[1:-1])  # Combine parts for keypoint name
+            if keypoint_name != '':
+                keypoint_name = f'_{keypoint_name}'
             feature = parts[-1]
 
             # Construct new column names with desired MultiIndex structure
             new_columns.append(
-                ('ensemble-kalman_tracker', f'{instance_num}_{keypoint_name}', feature))
+                ('ensemble-kalman_tracker', f'{instance_num}{keypoint_name}', feature))
 
         # Convert the columns Index to a MultiIndex with three levels
         markers_eks.columns = pd.MultiIndex.from_tuples(new_columns,
diff --git a/scripts/ibl_paw_multiview_example.py b/scripts/ibl_paw_multiview_example.py
index ba88265..4f68518 100644
--- a/scripts/ibl_paw_multiview_example.py
+++ b/scripts/ibl_paw_multiview_example.py
@@ -9,7 +9,6 @@
 from eks.ibl_paw_multiview_smoother import ensemble_kalman_smoother_ibl_paw
 from eks.utils import convert_lp_dlc
 
-
 smoother_type = 'ibl_paw'
 
 # Collect User-Provided Args
diff --git a/scripts/ibl_pupil_example.py b/scripts/ibl_pupil_example.py
index 80fc70e..974014c 100644
--- a/scripts/ibl_pupil_example.py
+++ b/scripts/ibl_pupil_example.py
@@ -6,7 +6,6 @@
 from eks.ibl_pupil_smoother import fit_eks_pupil
 from eks.utils import plot_results
 
-
 smoother_type = 'ibl_pupil'
 
 # Collect User-Provided Arguments
diff --git a/scripts/mirrored_multicam_example.py b/scripts/mirrored_multicam_example.py
index f03d511..bdda084 100644
--- a/scripts/mirrored_multicam_example.py
+++ b/scripts/mirrored_multicam_example.py
@@ -1,77 +1,52 @@
 """Example script for multi-camera datasets."""
+
 import os
 
 from eks.command_line_args import handle_io, handle_parse_args
-from eks.multicam_smoother import ensemble_kalman_smoother_multicam
-from eks.utils import format_data, plot_results, populate_output_dataframe
-
+from eks.multicam_smoother import fit_eks_mirrored_multicam
+from eks.utils import plot_results
 
 smoother_type = 'multicam'
 
 # Collect User-Provided Args
 args = handle_parse_args(smoother_type)
-input_dir = os.path.abspath(args.input_dir)
-save_dir = handle_io(input_dir, args.save_dir)  # defaults to outputs
+input_source = args.input_dir if isinstance(args.input_dir, str) else args.input_files
+# Determine the input directory path
+if isinstance(input_source, str):
+    input_dir = os.path.abspath(input_source)
+else:
+    input_dir = os.path.abspath(os.path.dirname(input_source[0]))
+# Set up the save directory
 save_filename = args.save_filename
+save_dir = handle_io(input_dir, args.save_dir)
 bodypart_list = args.bodypart_list
-s = args.s  # defaults to automatic optimization
-s_frames = args.s_frames  # frames to be used for automatic optimization (only if no --s flag)
-blocks = args.blocks
+s = args.s  # Defaults to automatic optimization
+s_frames = args.s_frames  # Frames to be used for automatic optimization if s is not provided
 camera_names = args.camera_names
 quantile_keep_pca = args.quantile_keep_pca
 
-# Load and format input files and prepare an empty DataFrame for output.
-input_dfs_list, output_df, keypoint_names = format_data(input_dir)
-if bodypart_list is None:
-    bodypart_list = keypoint_names
-print(f'Input data has been read in for the following keypoints:\n{bodypart_list}')
-
-# loop over keypoints; apply eks to each individually
-# Note: all camera views must be stored in the same csv file
-# TODO: dictionary where keys are view names, values are lists of csv paths
-for keypoint_ensemble in bodypart_list:
-    # Separate body part predictions by camera view
-    marker_list_by_cam = [[] for _ in range(len(camera_names))]
-    for markers_curr in input_dfs_list:
-        for c, camera_name in enumerate(camera_names):
-            non_likelihood_keys = [
-                key for key in markers_curr.keys()
-                if camera_names[c] in key and keypoint_ensemble in key
-            ]
-            marker_list_by_cam[c].append(markers_curr[non_likelihood_keys])
-
-    # run eks
-    cameras_df_dict, s_final, nll_values = ensemble_kalman_smoother_multicam(
-        markers_list_cameras=marker_list_by_cam,
-        keypoint_ensemble=keypoint_ensemble,
-        smooth_param=s,
-        quantile_keep_pca=quantile_keep_pca,
-        camera_names=camera_names,
-        s_frames=s_frames,
-    )
-
-    # put results into new dataframe
-    for camera in camera_names:
-        cameras_df = cameras_df_dict[f'{camera}_df']
-        output_df = populate_output_dataframe(
-            cameras_df,
-            keypoint_ensemble,
-            output_df,
-            key_suffix=f'_{camera}',
-        )
-
-# save eks results
-save_filename = save_filename or f'{smoother_type}_{s_final}.csv'
-output_df.to_csv(os.path.join(save_dir, save_filename))
+# Fit EKS using the provided input data
+output_df, s_finals, input_dfs, nll_values = fit_eks_mirrored_multicam(
+    input_source=input_source,
+    save_file=os.path.join(save_dir, save_filename or 'eks_mirrored_multicam'),
+    bodypart_list=bodypart_list,
+    smooth_param=s,
+    s_frames=s_frames,
+    camera_names=camera_names,
+    quantile_keep_pca=quantile_keep_pca,
+)
 
-# plot results
+# Plot results for a specific keypoint (default to last keypoint)
+keypoint_i = -1
 plot_results(
     output_df=output_df,
-    input_dfs_list=input_dfs_list,
-    key=f'{bodypart_list[-1]}_{camera_names[0]}',
+    input_dfs_list=input_dfs,
+    key=f'{bodypart_list[keypoint_i]}_{camera_names[0]}',
     idxs=(0, 500),
-    s_final=s_final,
-    nll_values=nll_values,
+    s_final=s_finals,
+    nll_values=None,
     save_dir=save_dir,
     smoother_type=smoother_type,
 )
+
+print("Ensemble Kalman Smoothing complete. Results saved and plotted successfully.")
diff --git a/scripts/multicam_example.py b/scripts/multicam_example.py
new file mode 100644
index 0000000..bbb1824
--- /dev/null
+++ b/scripts/multicam_example.py
@@ -0,0 +1,54 @@
+"""Example script for multi-camera datasets."""
+
+import os
+
+from eks.command_line_args import handle_io, handle_parse_args
+from eks.multicam_smoother import fit_eks_multicam
+from eks.utils import plot_results
+
+smoother_type = 'multicam'
+
+# Collect User-Provided Args
+args = handle_parse_args(smoother_type)
+input_source = args.input_dir if isinstance(args.input_dir, str) else args.input_files
+# Determine the input directory path
+if isinstance(input_source, str):
+    input_dir = os.path.abspath(input_source)
+else:
+    input_dir = os.path.abspath(os.path.dirname(input_source[0]))
+# Set up the save directory
+save_filename = args.save_filename
+save_dir = handle_io(input_dir, args.save_dir)
+bodypart_list = args.bodypart_list
+s = args.s  # Defaults to automatic optimization
+s_frames = args.s_frames  # Frames to be used for automatic optimization if s is not provided
+camera_names = args.camera_names
+quantile_keep_pca = args.quantile_keep_pca
+
+# Fit EKS using the provided input data
+output_dfs, s_finals, input_dfs, nll_values = fit_eks_multicam(
+    input_source=input_source,
+    save_dir=save_dir,
+    bodypart_list=bodypart_list,
+    smooth_param=s,
+    s_frames=s_frames,
+    camera_names=camera_names,
+    quantile_keep_pca=quantile_keep_pca,
+)
+
+
+# Plot results for a specific keypoint (default to last keypoint of last camera view)
+keypoint_i = -1
+camera_c = -1
+plot_results(
+    output_df=output_dfs[camera_c],
+    input_dfs_list=input_dfs[camera_c],
+    key=f'{bodypart_list[keypoint_i]}',
+    idxs=(0, 500),
+    s_final=s_finals,
+    nll_values=None,
+    save_dir=save_dir,
+    smoother_type=smoother_type,
+)
+
+print("Ensemble Kalman Smoothing complete. Results saved and plotted successfully.")
diff --git a/scripts/singlecam_example.py b/scripts/singlecam_example.py
index 595016b..b92054c 100644
--- a/scripts/singlecam_example.py
+++ b/scripts/singlecam_example.py
@@ -6,7 +6,6 @@
 from eks.singlecam_smoother import fit_eks_singlecam
 from eks.utils import plot_results
 
-
 smoother_type = 'singlecam'
 
 # Collect User-Provided Args
@@ -31,7 +30,7 @@
     input_source=input_source,
     save_file=os.path.join(save_dir, save_filename or 'eks_singlecam.csv'),
     bodypart_list=bodypart_list,
-    s=s,
+    smooth_param=s,
     s_frames=s_frames,
     blocks=blocks,
 )
diff --git a/tests/conftest.py b/tests/conftest.py
index 3503196..4cde8ba 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,7 +1,7 @@
 import subprocess
+from typing import Callable
 
 import pytest
-from typing import Callable
 
 
 @pytest.fixture
diff --git a/tests/test_core.py b/tests/test_core.py
index ee71e17..da7212f 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,8 +1,9 @@
-import pytest
-import numpy as np
 import jax.numpy as jnp
+import numpy as np
 import pandas as pd
-from eks.core import ensemble, kalman_dot, forward_pass, backward_pass, compute_nll, jax_ensemble
+import pytest
+
+from eks.core import backward_pass, compute_nll, ensemble, forward_pass, jax_ensemble, kalman_dot
 
 
 def test_ensemble():
diff --git a/tests/test_multicam_smoother.py b/tests/test_multicam_smoother.py
new file mode 100644
index 0000000..21f6715
--- /dev/null
+++ b/tests/test_multicam_smoother.py
@@ -0,0 +1,86 @@
+import numpy as np
+import pandas as pd
+
+from eks.multicam_smoother import ensemble_kalman_smoother_multicam
+
+
+def test_ensemble_kalman_smoother_multicam():
+    """Test the basic functionality of ensemble_kalman_smoother_multicam."""
+    # Mock inputs
+    keypoint_names = ['kp1', 'kp2']
+    columns = [f'{kp}_{coord}' for kp in keypoint_names for coord in ['x', 'y', 'likelihood']]
+    markers_list_cameras = [
+        [
+            pd.DataFrame(np.random.randn(100, len(columns)), columns=columns),
+            pd.DataFrame(np.random.randn(100, len(columns)), columns=columns),
+        ] for _ in range(2)
+    ]
+    camera_names = ['cam1', 'cam2']
+    keypoint_ensemble = 'kp1'
+
+    smooth_param = 0.1
+    quantile_keep_pca = 95
+    s_frames = None
+    zscore_threshold = 2
+
+    # Run the smoother
+    camera_dfs, smooth_param_final, nll_values = ensemble_kalman_smoother_multicam(
+        markers_list_cameras=markers_list_cameras,
+        keypoint_ensemble=keypoint_ensemble,
+        smooth_param=smooth_param,
+        quantile_keep_pca=quantile_keep_pca,
+        camera_names=camera_names,
+        s_frames=s_frames,
+        ensembling_mode='median',
+        zscore_threshold=zscore_threshold,
+    )
+
+    # Assertions
+    assert isinstance(camera_dfs, dict), "Expected output to be a dictionary"
+    assert len(camera_dfs) == len(camera_names), \
+        f"Expected {len(camera_names)} entries in camera_dfs, got {len(camera_dfs)}"
+    for cam_name in camera_names:
+        assert cam_name in camera_dfs, f"Missing camera name {cam_name} in output"
+        assert isinstance(camera_dfs[cam_name], pd.DataFrame), \
+            f"Expected DataFrame for {cam_name}, got {type(camera_dfs[cam_name])}"
+
+    assert isinstance(smooth_param_final, float), \
+        f"Expected smooth_param_final to be a float, got {type(smooth_param_final)}"
+    assert smooth_param_final == smooth_param, \
+        f"Expected smooth_param_final to match input smooth_param ({smooth_param}), " \
+        f"got {smooth_param_final}"
+
+
+def test_ensemble_kalman_smoother_multicam_no_smooth_param():
+    """Test ensemble_kalman_smoother_multicam with no smooth_param provided."""
+    # Mock inputs
+    keypoint_names = ['kp1', 'kp2']
+    columns = [f'{kp}_{coord}' for kp in keypoint_names for coord in ['x', 'y', 'likelihood']]
+    markers_list_cameras = [
+        [
+            pd.DataFrame(np.random.randn(100, len(columns)), columns=columns),
+            pd.DataFrame(np.random.randn(100, len(columns)), columns=columns),
+        ] for _ in range(2)
+    ]
+    camera_names = ['cam1', 'cam2']
+    keypoint_ensemble = 'kp1'
+
+    quantile_keep_pca = 90
+    s_frames = [(0, 10)]
+
+    # Run the smoother without providing smooth_param
+    camera_dfs, smooth_param_final, nll_values = ensemble_kalman_smoother_multicam(
+        markers_list_cameras=markers_list_cameras,
+        keypoint_ensemble=keypoint_ensemble,
+        smooth_param=None,
+        quantile_keep_pca=quantile_keep_pca,
+        camera_names=camera_names,
+        s_frames=s_frames,
+        ensembling_mode='median',
+        zscore_threshold=2,
+    )
+
+    # Assertions
+    assert smooth_param_final is not None, "Expected smooth_param_final to be not None"
+    assert isinstance(smooth_param_final, float), \
+        f"Expected smooth_param_final to be a float, got {type(smooth_param_final)}"
diff --git a/tests/test_singlecam_smoother.py b/tests/test_singlecam_smoother.py
index 7fed037..ac9b828 100644
--- a/tests/test_singlecam_smoother.py
+++ b/tests/test_singlecam_smoother.py
@@ -1,7 +1,5 @@
 import jax.numpy as jnp
 import numpy as np
-import jax
-import jax.numpy as jnp
 import pandas as pd
 
 from eks.singlecam_smoother import (

From 61cdcccd99945d71d24ea0f6757b8458b8703fea Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Tue, 7 Jan 2025 14:41:14 -0500
Subject: [PATCH 23/25] resolving PR edit requests: more tests + indentation
 fix

---
 eks/multicam_smoother.py                      |  4 ++--
 .../scripts/test_mirrored_multicam_example.py | 12 ++++++++++
 tests/scripts/test_multicam_example.py        | 23 +++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 tests/scripts/test_multicam_example.py

diff --git a/eks/multicam_smoother.py b/eks/multicam_smoother.py
index 2124b50..8f458ad 100644
--- a/eks/multicam_smoother.py
+++ b/eks/multicam_smoother.py
@@ -82,8 +82,8 @@ def fit_eks_mirrored_multicam(
                 output_df,
                 key_suffix=f'_{camera_name}'
             )
-        # Save the output DataFrames to CSV file
-        output_df.to_csv(save_file)
+    # Save the output DataFrames to CSV file
+    output_df.to_csv(save_file)
     return output_df, smooth_params_final, input_dfs_list, nll_values
 
 
diff --git a/tests/scripts/test_mirrored_multicam_example.py b/tests/scripts/test_mirrored_multicam_example.py
index 200b83b..187c38c 100644
--- a/tests/scripts/test_mirrored_multicam_example.py
+++ b/tests/scripts/test_mirrored_multicam_example.py
@@ -9,3 +9,15 @@ def test_mirrored_multicam_example_defaults(run_script, tmpdir, pytestconfig):
         bodypart_list=['paw1LH', 'paw2LF'],  # , 'paw3RF', 'paw4RH'],  # unneeded computation
         camera_names=['top', 'bot'],
     )
+
+
+def test_mirrored_multicam_example_fixed_smooth_param(run_script, tmpdir, pytestconfig):
+
+    run_script(
+        script_file=str(pytestconfig.rootpath / 'scripts' / 'mirrored_multicam_example.py'),
+        input_dir=str(pytestconfig.rootpath / 'data' / 'mirror-mouse'),
+        output_dir=tmpdir,
+        bodypart_list=['paw1LH', 'paw2LF'],  # , 'paw3RF', 'paw4RH'],  # unneeded computation
+        camera_names=['top', 'bot'],
+        s=10
+    )
\ No newline at end of file
diff --git a/tests/scripts/test_multicam_example.py b/tests/scripts/test_multicam_example.py
new file mode 100644
index 0000000..c6e1f25
--- /dev/null
+++ b/tests/scripts/test_multicam_example.py
@@ -0,0 +1,23 @@
+
+
+def test_multicam_example_defaults(run_script, tmpdir, pytestconfig):
+
+    run_script(
+        script_file=str(pytestconfig.rootpath / 'scripts' / 'multicam_example.py'),
+        input_dir=str(pytestconfig.rootpath / 'data' / 'mirror-mouse-separate'),
+        output_dir=tmpdir,
+        bodypart_list=['paw1LH', 'paw2LF'],  # , 'paw3RF', 'paw4RH'],  # unneeded computation
+        camera_names=['top', 'bot'],
+    )
+
+
+def test_multicam_example_fixed_smooth_param(run_script, tmpdir, pytestconfig):
+
+    run_script(
+        script_file=str(pytestconfig.rootpath / 'scripts' / 'multicam_example.py'),
+        input_dir=str(pytestconfig.rootpath / 'data' / 'mirror-mouse-separate'),
+        output_dir=tmpdir,
+        bodypart_list=['paw1LH', 'paw2LF'],  # , 'paw3RF', 'paw4RH'],  # unneeded computation
+        camera_names=['top', 'bot'],
+        s=10
+    )
\ No newline at end of file

From 78ab00d5efad6a4002094b0c195959e6bdabb0ab Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Tue, 7 Jan 2025 18:54:41 -0500
Subject: [PATCH 24/25] print type format compatibility fix for s=10

---
 eks/multicam_smoother.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eks/multicam_smoother.py b/eks/multicam_smoother.py
index 8f458ad..5384f7e 100644
--- a/eks/multicam_smoother.py
+++ b/eks/multicam_smoother.py
@@ -300,7 +300,7 @@ def ensemble_kalman_smoother_multicam(
     # Call functions from ensemble_kalman to optimize smooth_param before filtering and smoothing
     smooth_param, ms, Vs, nll, nll_values = multicam_optimize_smooth(
         cov_matrix, y_obs, m0, S0, C, A, R, ensemble_vars, s_frames, smooth_param)
-    print(f"Smoothed {keypoint_ensemble} at smooth_param={smooth_param:.3f}")
+    print(f"Smoothed {keypoint_ensemble} at smooth_param={smooth_param}")
     smooth_param_final = smooth_param
 
     # Smoothed posterior over ys

From c90a1d81cc0a0c0e673dca13667516d01dd3c905 Mon Sep 17 00:00:00 2001
From: Keemin Lee <keemin7@gmail.com>
Date: Mon, 13 Jan 2025 17:41:42 -0500
Subject: [PATCH 25/25] GPU accel removed

---
 eks/core.py                        | 160 ---------
 eks/multicam_smoother.py           |  21 +-
 eks/singlecam_smoother.py          |   1 -
 eks/singlecam_smoother_parallel.py | 535 +++++++++++++++++++++++++++++
 4 files changed, 546 insertions(+), 171 deletions(-)
 create mode 100644 eks/singlecam_smoother_parallel.py

diff --git a/eks/core.py b/eks/core.py
index c766ed8..3acfc53 100644
--- a/eks/core.py
+++ b/eks/core.py
@@ -528,166 +528,6 @@ def single_timestep_nll(innovation, innovation_cov):
     nll_increment = 0.5 * jnp.abs(log_det_S + quadratic_term + c)
     return nll_increment
 
-
-# ----- Parallel Functions for GPU -----
-
-def first_filtering_element(C, A, Q, R, m0, P0, y):
-    # model.F = A, model.H = C,
-    S = C @ Q @ C.T + R
-    CF, low = jsc.linalg.cho_factor(S)  # note the jsc
-
-    m1 = A @ m0
-    P1 = A @ P0 @ A.T + Q
-    S1 = C @ P1 @ C.T + R
-    K1 = jsc.linalg.solve(S1, C @ P1, assume_a='pos').T  # note the jsc
-
-    A_updated = jnp.zeros_like(A)
-    b = m1 + K1 @ (y - C @ m1)
-    C_updated = P1 - K1 @ S1 @ K1.T
-
-    # note the jsc
-    eta = A.T @ C.T @ jsc.linalg.cho_solve((CF, low), y)
-    J = A.T @ C.T @ jsc.linalg.cho_solve((CF, low), C @ A)
-    return A_updated, b, C_updated, J, eta
-
-
-def generic_filtering_element(C, A, Q, R, y):
-    S = C @ Q @ C.T + R
-    CF, low = jsc.linalg.cho_factor(S)  # note the jsc
-    K = jsc.linalg.cho_solve((CF, low), C @ Q).T  # note the jsc
-    A_updated = A - K @ C @ A
-    b = K @ y
-    C_updated = Q - K @ C @ Q
-
-    # note the jsc
-    eta = A.T @ C.T @ jsc.linalg.cho_solve((CF, low), y)
-    J = A.T @ C.T @ jsc.linalg.cho_solve((CF, low), C @ A)
-    return A_updated, b, C_updated, J, eta
-
-
-def make_associative_filtering_elements(C, A, Q, R, m0, P0, observations):
-    first_elems = first_filtering_element(C, A, Q, R, m0, P0, observations[0])
-    generic_elems = vmap(lambda o: generic_filtering_element(C, A, Q, R, o))(observations[1:])
-    return tuple(jnp.concatenate([jnp.expand_dims(first_e, 0), gen_es])
-                 for first_e, gen_es in zip(first_elems, generic_elems))
-
-
-@partial(vmap)
-def filtering_operator(elem1, elem2):
-    # # note the jsc everywhere
-    A1, b1, C1, J1, eta1 = elem1
-    A2, b2, C2, J2, eta2 = elem2
-    dim = A1.shape[0]
-    I_var = jnp.eye(dim)  # note the jnp
-
-    I_C1J2 = I_var + C1 @ J2
-    temp = jsc.linalg.solve(I_C1J2.T, A2.T).T
-    A = temp @ A1
-    b = temp @ (b1 + C1 @ eta2) + b2
-    C = temp @ C1 @ A2.T + C2
-
-    I_J2C1 = I_var + J2 @ C1
-    temp = jsc.linalg.solve(I_J2C1.T, A1).T
-
-    eta = temp @ (eta2 - J2 @ b1) + eta1
-    J = temp @ J2 @ A1 + J1
-
-    return A, b, C, J, eta
-
-
-def pkf(y, m0, cov0, A, Q, C, R):
-    initial_elements = make_associative_filtering_elements(C, A, Q, R, m0, cov0, y)
-    final_elements = associative_scan(filtering_operator, initial_elements)
-    return final_elements
-
-
-pkf_func = jit(pkf)
-
-
-def get_kalman_means(A_scan, b_scan, m0):
-    """
-    Computes the Kalman mean at a single timepoint, the result is:
-    A_scan @ m0 + b_scan
-
-    Returned shape: (state_dimension, 1)
-    """
-    return A_scan @ jnp.expand_dims(m0, axis=1) + jnp.expand_dims(b_scan, axis=1)
-
-
-def get_kalman_variances(C):
-    return C
-
-
-def get_next_cov(A, C, Q, R, filter_cov, filter_mean):
-    """
-    Given the moments of p(x_t | y_1, ..., y_t) (normal filter distribution),
-    compute the moments of the distribution for:
-    p(y_{t+1} | y_1, ..., y_t)
-
-    Params:
-        A (np.ndarray): Shape (state_dimension, state_dimension) Process coeff matrix
-        C (np.ndarray): Shape (obs_dimension, state_dimension) Observation coeff matrix
-        Q (np.ndarray): Shape (state_dimension, state_dimension). Process noise covariance matrix.
-        R (np.ndarray): Shape (obs_dimension, obs_dimension). Observation noise covariance matrix.
-        filter_cov (np.ndarray). Shape (state_dimension, state_dimension). Filtered covariance
-        filter_mean (np.ndarray). Shape (state_dimension, 1). Filter mean
-
-    Returns:
-        mean (np.ndarray). Shape (obs_dimension, 1)
-        cov (np.ndarray). Shape (obs_dimension, obs_dimension).
-    """
-    mean = C @ A @ filter_mean
-    cov = C @ (A @ filter_cov @ A.T + Q) @ C.T + R
-    return mean, cov
-
-
-def compute_marginal_nll(value, mean, covariance):
-    return -1 * jax.scipy.stats.multivariate_normal.logpdf(value, mean, covariance)
-
-
-def parallel_loss_single(A_scan, b_scan, C_scan, A, C, Q, R, next_observation, m0):
-    curr_mean = get_kalman_means(A_scan, b_scan, m0)
-    curr_cov = get_kalman_variances(C_scan)  # Placeholder; just returns identity
-
-    next_mean, next_cov = get_next_cov(A, C, Q, R, curr_cov, curr_mean)
-    return jnp.squeeze(curr_mean), curr_cov, compute_marginal_nll(jnp.squeeze(next_observation),
-                                                                  jnp.squeeze(next_mean), next_cov)
-
-
-parallel_loss_func_vmap = jit(
-    vmap(parallel_loss_single, in_axes=(0, 0, 0, None, None, None, None, 0, None),
-         out_axes=(0, 0, 0)))
-
-
-@partial(jit)
-def y1_given_x0_nll(C, A, Q, R, m0, cov0, obs):
-    y1_predictive_mean = C @ A @ jnp.expand_dims(m0, axis=1)
-    y1_predictive_cov = C @ (A @ cov0 @ A.T + Q) @ C.T + R
-    addend = -1 * jax.scipy.stats.multivariate_normal.logpdf(obs, jnp.squeeze(y1_predictive_mean),
-                                                             y1_predictive_cov)
-    return addend
-
-
-def pkf_and_loss(y, m0, cov0, A, Q, C, R):
-    A_scan, b_scan, C_scan, _, _ = pkf_func(y, m0, cov0, A, Q, C, R)
-
-    # Gives us the NLL for p(y_i | y_1, ..., y_{i-1}) for i > 1.
-    # Need to use the parallel scan outputs for this. i = 1 handled below
-    filtered_states, filtered_covariances, losses = parallel_loss_func_vmap(A_scan[:-1],
-                                                                            b_scan[:-1],
-                                                                            C_scan[:-1], A, C, Q,
-                                                                            R, y[1:], m0)
-
-    # Gives us the NLL for p_y(y_1 | x_0)
-    addend = y1_given_x0_nll(C, A, Q, R, m0, cov0, y[0])
-
-    final_mean = get_kalman_means(A_scan[-1], b_scan[-1], m0).T
-    final_covariance = jnp.expand_dims(get_kalman_variances(C_scan[-1]), axis=0)
-    filtered_states = jnp.concatenate([filtered_states, final_mean], axis=0)
-    filtered_variances = jnp.concatenate([filtered_covariances, final_covariance], axis=0)
-    return filtered_states, filtered_variances, jnp.sum(losses) + addend
-
-
 # -------------------------------------------------------------------------------------
 # Misc: These miscellaneous functions generally have specific computations used by the
 # core functions or the smoothers
diff --git a/eks/multicam_smoother.py b/eks/multicam_smoother.py
index 5384f7e..a24b10f 100644
--- a/eks/multicam_smoother.py
+++ b/eks/multicam_smoother.py
@@ -193,12 +193,13 @@ def ensemble_kalman_smoother_multicam(
     """
 
     # --------------------------------------------------------------
-    # interpolate right cam markers to left cam timestamps
+    # Setup: Interpolate right cam markers to left cam timestamps
     # --------------------------------------------------------------
     num_cameras = len(camera_names)
     markers_list_stacked_interp = []
     markers_list_interp = [[] for i in range(num_cameras)]
     camera_likelihoods_stacked = []
+
     for model_id in range(len(markers_list_cameras[0])):
         bl_markers_curr = []
         camera_markers_curr = [[] for i in range(num_cameras)]
@@ -219,21 +220,24 @@ def ensemble_kalman_smoother_multicam(
         for camera in range(num_cameras):
             markers_list_interp[camera].append(camera_markers_curr[camera])
             camera_likelihoods[camera] = np.asarray(camera_likelihoods[camera])
+
     markers_list_stacked_interp = np.asarray(markers_list_stacked_interp)
     markers_list_interp = np.asarray(markers_list_interp)
     camera_likelihoods_stacked = np.asarray(camera_likelihoods_stacked)
-
     keys = [keypoint_ensemble + '_x', keypoint_ensemble + '_y']
     markers_list_cams = [[] for i in range(num_cameras)]
+
     for k in range(len(markers_list_interp[0])):
         for camera in range(num_cameras):
             markers_cam = pd.DataFrame(markers_list_interp[camera][k], columns=keys)
             markers_cam[f'{keypoint_ensemble}_likelihood'] = camera_likelihoods_stacked[k][camera]
             markers_list_cams[camera].append(markers_cam)
+
     # compute ensemble median for each camera
     cam_ensemble_preds = []
     cam_ensemble_vars = []
     cam_ensemble_stacks = []
+
     for camera in range(num_cameras):
         cam_ensemble_preds_curr, cam_ensemble_vars_curr, _, cam_ensemble_stacks_curr = ensemble(
             markers_list_cams[camera], keys, avg_mode=ensembling_mode,
@@ -250,13 +254,14 @@ def ensemble_kalman_smoother_multicam(
 
     good_cam_ensemble_preds = []
     good_cam_ensemble_vars = []
+
     for camera in range(num_cameras):
         good_cam_ensemble_preds.append(cam_ensemble_preds[camera][good_frames])
         good_cam_ensemble_vars.append(cam_ensemble_vars[camera][good_frames])
 
     good_ensemble_preds = np.hstack(good_cam_ensemble_preds)
-    # good_ensemble_vars = np.hstack(good_cam_ensemble_vars)
     means_camera = []
+
     for i in range(good_ensemble_preds.shape[1]):
         means_camera.append(good_ensemble_preds[:, i].mean())
 
@@ -280,21 +285,17 @@ def ensemble_kalman_smoother_multicam(
     # latent variables (observed)
     good_z_t_obs = good_ensemble_pcs  # latent variables - true 3D pca
 
-    # ------ Set values for kalman filter ------
+    # --------------------------------------------------------------
+    # Kalman Filter
+    # --------------------------------------------------------------
     m0 = np.asarray([0.0, 0.0, 0.0])  # initial state: mean
     S0 = np.asarray([[np.var(good_z_t_obs[:, 0]), 0.0, 0.0],
                      [0.0, np.var(good_z_t_obs[:, 1]), 0.0],
                      [0.0, 0.0, np.var(good_z_t_obs[:, 2])]])  # diagonal: var
-
     A = np.asarray([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])  # state-transition matrix,
-
-    # Q = np.asarray([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) <-- state-cov matrix?
-
     d_t = good_z_t_obs[1:] - good_z_t_obs[:-1]
-
     C = ensemble_pca.components_.T  # Measurement function is inverse transform of PCA
     R = np.eye(ensemble_pca.components_.shape[1])  # placeholder diagonal matrix for ensemble var
-
     cov_matrix = np.cov(d_t.T)
 
     # Call functions from ensemble_kalman to optimize smooth_param before filtering and smoothing
diff --git a/eks/singlecam_smoother.py b/eks/singlecam_smoother.py
index a7df9df..68996ed 100644
--- a/eks/singlecam_smoother.py
+++ b/eks/singlecam_smoother.py
@@ -16,7 +16,6 @@
     jax_ensemble,
     jax_forward_pass,
     jax_forward_pass_nlls,
-    pkf_and_loss,
 )
 from eks.utils import crop_frames, format_data, make_dlc_pandas_index
 
diff --git a/eks/singlecam_smoother_parallel.py b/eks/singlecam_smoother_parallel.py
new file mode 100644
index 0000000..1c6b0c1
--- /dev/null
+++ b/eks/singlecam_smoother_parallel.py
@@ -0,0 +1,535 @@
+import os
+from functools import partial
+from typing import Optional, Union
+
+import jax
+import jax.numpy as jnp
+import numpy as np
+import optax
+import pandas as pd
+from jax import jit, vmap
+
+from eks.core import (
+    compute_covariance_matrix,
+    compute_initial_guesses,
+    jax_backward_pass,
+    jax_ensemble,
+    jax_forward_pass,
+    jax_forward_pass_nlls,
+    pkf_and_loss,
+)
+from eks.utils import crop_frames, format_data, make_dlc_pandas_index
+
+
+def fit_eks_singlecam(
+    input_source: Union[str, list],
+    save_file: str,
+    bodypart_list: Optional[list] = None,
+    smooth_param: Optional[Union[float, list]] = None,
+    s_frames: Optional[list] = None,
+    blocks: list = [],
+    avg_mode: str = 'median',
+    var_mode: str = 'confidence_weighted_var',
+) -> tuple:
+    """Fit the Ensemble Kalman Smoother for single-camera data.
+
+    Args:
+        input_source: directory path or list of CSV file paths. If a directory path, all files
+            within this directory will be used.
+        save_file: File to save output dataframe.
+        bodypart_list: list of body parts to analyze.
+        smooth_param: value in (0, Inf); smaller values lead to more smoothing
+        s_frames: Frames for automatic optimization if smooth_param is not provided.
+        blocks: keypoints to be blocked for correlated noise. Generates on smoothing param per
+            block, as opposed to per keypoint.
+            Specified by the form "x1, x2, x3; y1, y2" referring to keypoint indices (start at 0)
+        avg_mode: mode for averaging across ensemble
+            'median' | 'mean'
+        var_mode: mode for computing ensemble variance
+            'var' | 'confidence_weighted_var'
+
+    Returns:
+        tuple:
+            df_smoothed (pd.DataFrame)
+            s_finals (list): List of optimized smoothing factors for each keypoint.
+            input_dfs (list): List of input DataFrames for plotting.
+            bodypart_list (list): List of body parts used.
+
+    """
+    # Load and format input files using the unified format_data function
+    input_dfs_list, _, keypoint_names = format_data(input_source)
+
+    if bodypart_list is None:
+        bodypart_list = keypoint_names
+    print(f'Input data loaded for keypoints:\n{bodypart_list}')
+
+    # Run the ensemble Kalman smoother
+    df_smoothed, smooth_params_final = ensemble_kalman_smoother_singlecam(
+        markers_list=input_dfs_list,
+        keypoint_names=bodypart_list,
+        smooth_param=smooth_param,
+        s_frames=s_frames,
+        blocks=blocks,
+        avg_mode=avg_mode,
+        var_mode=var_mode,
+    )
+
+    # Save the output DataFrame to CSV
+    os.makedirs(os.path.dirname(save_file), exist_ok=True)
+    df_smoothed.to_csv(save_file)
+    print("DataFrames successfully converted to CSV")
+
+    return df_smoothed, smooth_params_final, input_dfs_list, bodypart_list
+
+
+def ensemble_kalman_smoother_singlecam(
+    markers_list: list,
+    keypoint_names: list,
+    smooth_param: Optional[Union[float, list]] = None,
+    s_frames: Optional[list] = None,
+    blocks: list = [],
+    avg_mode: str = 'median',
+    var_mode: str = 'confidence_weighted_var',
+    zscore_threshold: float = 2,
+    verbose: bool = False,
+) -> tuple:
+    """Perform Ensemble Kalman Smoothing for single-camera data.
+
+    Args:
+        markers_list: pd.DataFrames
+            each list element is a dataframe of predictions from one ensemble member
+        keypoint_names: List of body parts to run smoothing on
+        smooth_param: value in (0, Inf); smaller values lead to more smoothing
+        s_frames: List of frames for automatic computation of smoothing parameter
+        blocks: keypoints to be blocked for correlated noise. Generates on smoothing param per
+            block, as opposed to per keypoint.
+            Specified by the form "x1, x2, x3; y1, y2" referring to keypoint indices (start at 0)
+        avg_mode: mode for averaging across ensemble
+            'median' | 'mean'
+        var_mode: mode for computing ensemble variance
+            'var' | 'confidence_weighted_var'
+        zscore_threshold: z-score threshold.
+        verbose: True to print out details
+
+    Returns:
+        tuple: Dataframes with smoothed predictions, final smoothing parameters, NLL values.
+
+    """
+
+    # Convert list of DataFrames to a 3D NumPy array
+    data_arrays = [df.to_numpy() for df in markers_list]
+    markers_3d_array = np.stack(data_arrays, axis=0)
+
+    # Map keypoint names to indices and crop markers_3d_array
+    keypoint_is = {}
+    keys = []
+    for i, col in enumerate(markers_list[0].columns):
+        keypoint_is[col] = i
+    for part in keypoint_names:
+        keys.append(keypoint_is[part + '_x'])
+        keys.append(keypoint_is[part + '_y'])
+        keys.append(keypoint_is[part + '_likelihood'])
+    key_cols = np.array(keys)
+    markers_3d_array = markers_3d_array[:, :, key_cols]
+
+    T = markers_3d_array.shape[1]
+    n_keypoints = markers_3d_array.shape[2] // 3
+    n_coords = 2
+
+    # Compute ensemble statistics
+    print("Ensembling models")
+    ensemble_preds, ensemble_vars, ensemble_likes = jax_ensemble(
+        markers_3d_array, avg_mode=avg_mode, var_mode=var_mode,
+    )
+
+    # Calculate mean and adjusted observations
+    mean_obs_dict, adjusted_obs_dict, scaled_ensemble_preds = adjust_observations(
+        ensemble_preds.copy(), n_keypoints,
+    )
+
+    # Initialize Kalman filter values
+    m0s, S0s, As, cov_mats, Cs, Rs, ys = initialize_kalman_filter(
+        scaled_ensemble_preds, adjusted_obs_dict, n_keypoints)
+    # Main smoothing function
+    s_finals, ms, Vs, nlls = singlecam_optimize_smooth(
+        cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
+        s_frames, smooth_param, blocks, verbose)
+
+    y_m_smooths = np.zeros((n_keypoints, T, n_coords))
+    y_v_smooths = np.zeros((n_keypoints, T, n_coords, n_coords))
+
+    data_arr = []
+
+    # Process each keypoint
+    for k in range(n_keypoints):
+        y_m_smooths[k] = np.dot(Cs[k], ms[k].T).T
+        y_v_smooths[k] = np.swapaxes(np.dot(Cs[k], np.dot(Vs[k], Cs[k].T)), 0, 1)
+        mean_x_obs = mean_obs_dict[3 * k]
+        mean_y_obs = mean_obs_dict[3 * k + 1]
+
+        # Computing z-score
+        # eks_preds_array = np.zeros(y_m_smooths.shape)
+        # eks_preds_array[k] = y_m_smooths[k].copy()
+        # eks_preds_array[k] = np.asarray([
+        #     eks_preds_array[k].T[0] + mean_x_obs,
+        #     eks_preds_array[k].T[1] + mean_y_obs,
+        # ]).T
+        # zscore, ensemble_std = eks_zscore(
+        #     eks_preds_array[k],
+        #     ensemble_preds[:, k, :],
+        #     ensemble_vars[:, k, :],
+        #     min_ensemble_std=zscore_threshold,
+        # )
+        # nll = nlls[k]
+
+        # keep track of labels for each data entry
+        labels = []
+
+        # smoothed x vals
+        data_arr.append(y_m_smooths[k].T[0] + mean_x_obs)
+        labels.append('x')
+        # smoothed y vals
+        data_arr.append(y_m_smooths[k].T[1] + mean_y_obs)
+        labels.append('y')
+        # mean likelihood
+        data_arr.append(ensemble_likes[:, k, 0])
+        labels.append('likelihood')
+        # x vals ensemble median
+        data_arr.append(ensemble_preds[:, k, 0])
+        labels.append('x_ens_median')
+        # y vals ensemble median
+        data_arr.append(ensemble_preds[:, k, 1])
+        labels.append('y_ens_median')
+        # x vals ensemble variance
+        data_arr.append(ensemble_vars[:, k, 0])
+        labels.append('x_ens_var')
+        # y vals ensemble variance
+        data_arr.append(ensemble_vars[:, k, 1])
+        labels.append('y_ens_var')
+        # x vals posterior variance
+        data_arr.append(y_v_smooths[k][:, 0, 0])
+        labels.append('x_posterior_var')
+        # y vals posterior variance
+        data_arr.append(y_v_smooths[k][:, 1, 1])
+        labels.append('y_posterior_var')
+
+    data_arr = np.asarray(data_arr)
+
+    # put data into dataframe
+    pdindex = make_dlc_pandas_index(keypoint_names, labels=labels)
+    markers_df = pd.DataFrame(data_arr.T, columns=pdindex)
+
+    return markers_df, s_finals
+
+
+def adjust_observations(
+    scaled_ensemble_preds: np.ndarray,
+    n_keypoints: int,
+) -> tuple:
+    """
+    Adjust observations by computing mean and adjusted observations for each keypoint.
+
+    Args:
+        scaled_ensemble_preds: shape (n_timepoints, n_keypoints, n_coordinates)
+        n_keypoints: Number of keypoints.
+
+    Returns:
+        tuple: Mean observations dict, adjusted observations dict, scaled ensemble preds
+
+    """
+
+    # Ensure scaled_ensemble_preds is a JAX array
+    scaled_ensemble_preds = jnp.array(scaled_ensemble_preds)
+
+    # Convert dictionaries to JAX arrays
+    keypoints_avg_array = scaled_ensemble_preds.reshape((scaled_ensemble_preds.shape[0], -1)).T
+    x_keys = jnp.array([3 * i for i in range(n_keypoints)])
+    y_keys = jnp.array([3 * i + 1 for i in range(n_keypoints)])
+
+    def compute_adjusted_means(i):
+        mean_x_obs = jnp.nanmean(keypoints_avg_array[2 * i])
+        mean_y_obs = jnp.nanmean(keypoints_avg_array[2 * i + 1])
+        adjusted_x_obs = keypoints_avg_array[2 * i] - mean_x_obs
+        adjusted_y_obs = keypoints_avg_array[2 * i + 1] - mean_y_obs
+        return mean_x_obs, mean_y_obs, adjusted_x_obs, adjusted_y_obs
+
+    means_and_adjustments = jax.vmap(compute_adjusted_means)(jnp.arange(n_keypoints))
+
+    mean_x_obs, mean_y_obs, adjusted_x_obs, adjusted_y_obs = means_and_adjustments
+
+    # Convert JAX arrays to NumPy arrays for dictionary keys
+    x_keys_np = np.array(x_keys)
+    y_keys_np = np.array(y_keys)
+
+    mean_obs_dict = {x_keys_np[i]: mean_x_obs[i] for i in range(n_keypoints)}
+    mean_obs_dict.update({y_keys_np[i]: mean_y_obs[i] for i in range(n_keypoints)})
+
+    adjusted_obs_dict = {x_keys_np[i]: adjusted_x_obs[i] for i in range(n_keypoints)}
+    adjusted_obs_dict.update({y_keys_np[i]: adjusted_y_obs[i] for i in range(n_keypoints)})
+
+    def scale_ensemble_preds(mean_x_obs, mean_y_obs, scaled_ensemble_preds, i):
+        scaled_ensemble_preds = scaled_ensemble_preds.at[:, i, 0].add(-mean_x_obs)
+        scaled_ensemble_preds = scaled_ensemble_preds.at[:, i, 1].add(-mean_y_obs)
+        return scaled_ensemble_preds
+
+    for i in range(n_keypoints):
+        mean_x = mean_obs_dict[x_keys_np[i]]
+        mean_y = mean_obs_dict[y_keys_np[i]]
+        scaled_ensemble_preds = scale_ensemble_preds(mean_x, mean_y, scaled_ensemble_preds, i)
+
+    return mean_obs_dict, adjusted_obs_dict, scaled_ensemble_preds
+
+
+def initialize_kalman_filter(
+    scaled_ensemble_preds: np.ndarray,
+    adjusted_obs_dict: dict,
+    n_keypoints: int
+) -> tuple:
+    """
+    Initialize the Kalman filter values.
+
+    Parameters:
+        scaled_ensemble_preds: Scaled ensemble predictions.
+        adjusted_obs_dict: Adjusted observations dictionary.
+        n_keypoints: Number of keypoints.
+
+    Returns:
+        tuple: Initial Kalman filter values and covariance matrices.
+
+    """
+
+    # Convert inputs to JAX arrays
+    scaled_ensemble_preds = jnp.array(scaled_ensemble_preds)
+
+    # Extract the necessary values from adjusted_obs_dict
+    adjusted_x_obs_list = [adjusted_obs_dict[3 * i] for i in range(n_keypoints)]
+    adjusted_y_obs_list = [adjusted_obs_dict[3 * i + 1] for i in range(n_keypoints)]
+
+    # Convert these lists to JAX arrays
+    adjusted_x_obs_array = jnp.array(adjusted_x_obs_list)
+    adjusted_y_obs_array = jnp.array(adjusted_y_obs_list)
+
+    def init_kalman(i, adjusted_x_obs, adjusted_y_obs):
+        m0 = jnp.array([0.0, 0.0])  # initial state: mean
+        S0 = jnp.array([[jnp.nanvar(adjusted_x_obs), 0.0],
+                        [0.0, jnp.nanvar(adjusted_y_obs)]])  # diagonal: var
+        A = jnp.array([[1.0, 0], [0, 1.0]])  # state-transition matrix
+        C = jnp.array([[1, 0], [0, 1]])  # Measurement function
+        R = jnp.eye(2)  # placeholder diagonal matrix for ensemble variance
+        y_obs = scaled_ensemble_preds[:, i, :]
+
+        return m0, S0, A, C, R, y_obs
+
+    # Use vmap to vectorize the initialization over all keypoints
+    init_kalman_vmap = jax.vmap(init_kalman, in_axes=(0, 0, 0))
+    m0s, S0s, As, Cs, Rs, y_obs_array = init_kalman_vmap(jnp.arange(n_keypoints),
+                                                         adjusted_x_obs_array,
+                                                         adjusted_y_obs_array)
+    cov_mats = compute_covariance_matrix(scaled_ensemble_preds)
+    return m0s, S0s, As, cov_mats, Cs, Rs, y_obs_array
+
+
+def singlecam_optimize_smooth(
+    cov_mats: np.ndarray,
+    ys: np.ndarray,
+    m0s: np.ndarray,
+    S0s: np.ndarray,
+    Cs: np.ndarray,
+    As: np.ndarray,
+    Rs: np.ndarray,
+    ensemble_vars: np.ndarray,
+    s_frames: list,
+    smooth_param: Union[float, list],
+    blocks: list = [],
+    maxiter: int = 1000,
+    verbose: bool = False,
+) -> tuple:
+    """Optimize smoothing parameter, and use the result to run the kalman filter-smoother.
+
+    Parameters:
+        cov_mats: Covariance matrices.
+        ys: Observations. Shape (keypoints, frames, coordinates). coordinate is usually 2
+        m0s: Initial mean state.
+        S0s: Initial state covariance.
+        Cs: Measurement function.
+        As: State-transition matrix.
+        Rs: Measurement noise covariance.
+        ensemble_vars: Ensemble variances.
+        s_frames: List of frames.
+        smooth_param: Smoothing parameter.
+        blocks: keypoints to be blocked for correlated noise. Generates on smoothing param per
+            block, as opposed to per keypoint.
+            Specified by the form "x1, x2, x3; y1, y2" referring to keypoint indices (start at 0)
+        maxiter
+        verbose
+
+    Returns:
+        tuple: Final smoothing parameters, smoothed means, smoothed covariances,
+               negative log-likelihoods, negative log-likelihood values.
+
+    """
+
+    n_keypoints = ys.shape[0]
+    s_finals = []
+    if len(blocks) == 0:
+        for n in range(n_keypoints):
+            blocks.append([n])
+    if verbose:
+        print(f'Correlated keypoint blocks: {blocks}')
+
+    @partial(jit)
+    def nll_loss_sequential_scan(s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs, ensemble_vars):
+        s = jnp.exp(s)  # To ensure positivity
+        return singlecam_smooth_min(
+            s, cov_mats, cropped_ys, m0s, S0s, Cs, As, Rs, ensemble_vars)
+
+    loss_function = nll_loss_sequential_scan
+
+    # Optimize smooth_param
+    if smooth_param is not None:
+        if isinstance(smooth_param, float):
+            s_finals = [smooth_param]
+        elif isinstance(smooth_param, int):
+            s_finals = [float(smooth_param)]
+        else:
+            s_finals = smooth_param
+    else:
+        guesses = []
+        cropped_ys = []
+        for k in range(n_keypoints):
+            current_guess = compute_initial_guesses(ensemble_vars[:, k, :])
+            guesses.append(current_guess)
+            if s_frames is None or len(s_frames) == 0:
+                cropped_ys.append(ys[k])
+            else:
+                cropped_ys.append(crop_frames(ys[k], s_frames))
+
+        cropped_ys = np.array(cropped_ys)  # Concatenation of this list along dimension 0
+
+        # Optimize negative log likelihood
+        for block in blocks:
+            s_init = guesses[block[0]]
+            if s_init <= 0:
+                s_init = 2
+            s_init = jnp.log(s_init)
+            optimizer = optax.adam(learning_rate=0.25)
+            opt_state = optimizer.init(s_init)
+
+            selector = np.array(block).astype(int)
+            cov_mats_sub = cov_mats[selector]
+            m0s_crop = m0s[selector]
+            S0s_crop = S0s[selector]
+            Cs_crop = Cs[selector]
+            As_crop = As[selector]
+            Rs_crop = Rs[selector]
+            y_subset = cropped_ys[selector]
+
+            def step(s, opt_state):
+                loss, grads = jax.value_and_grad(loss_function)(
+                    s, cov_mats_sub, y_subset, m0s_crop, S0s_crop, Cs_crop, As_crop, Rs_crop)
+                updates, opt_state = optimizer.update(grads, opt_state)
+                s = optax.apply_updates(s, updates)
+                return s, opt_state, loss
+
+            prev_loss = jnp.inf
+            for iteration in range(maxiter):
+                s_init, opt_state, loss = step(s_init, opt_state)
+
+                if verbose and iteration % 10 == 0 or iteration == maxiter - 1:
+                    print(f'Iteration {iteration}, Current loss: {loss}, Current s: {s_init}')
+
+                tol = 0.001 * jnp.abs(jnp.log(prev_loss))
+                if jnp.linalg.norm(loss - prev_loss) < tol + 1e-6:
+                    break
+
+                prev_loss = loss
+
+            s_final = jnp.exp(s_init)  # Convert back from log-space
+
+            for b in block:
+                if verbose:
+                    print(f's={s_final} for keypoint {b}')
+                s_finals.append(s_final)
+
+    s_finals = np.array(s_finals)
+    # Final smooth with optimized s
+    ms, Vs, nlls = final_forwards_backwards_pass(
+        cov_mats, s_finals, ys, m0s, S0s, Cs, As, Rs, ensemble_vars,
+    )
+
+    return s_finals, ms, Vs, nlls
+
+
+def inner_smooth_min_routine(y, m0, S0, A, Q, C, R, ensemble_vars):
+    # Run filtering with the current smooth_param
+    _, _, nll = jax_forward_pass(y, m0, S0, A, Q, C, R, ensemble_vars)
+    return nll
+
+
+inner_smooth_min_routine_vmap = vmap(inner_smooth_min_routine, in_axes=(0, 0, 0, 0, 0, 0, 0))
+
+
+def singlecam_smooth_min(smooth_param, cov_mats, ys, m0s, S0s, Cs, As, Rs, ensemble_vars):
+    """
+    Smooths once using the given smooth_param. Returns only the nll, which is the parameter to
+    be minimized using the scipy.minimize() function.
+
+    Parameters:
+    smooth_param (float): Smoothing parameter.
+    block (list): List of blocks.
+    cov_mats (np.ndarray): Covariance matrices.
+    ys (np.ndarray): Observations.
+    m0s (np.ndarray): Initial mean state.
+    S0s (np.ndarray): Initial state covariance.
+    Cs (np.ndarray): Measurement function.
+    As (np.ndarray): State-transition matrix.
+    Rs (np.ndarray): Measurement noise covariance.
+
+    Returns:
+    float: Negative log-likelihood.
+    """
+    # Adjust Q based on smooth_param and cov_matrix
+    Qs = smooth_param * cov_mats
+    nlls = jnp.sum(inner_smooth_min_routine_vmap(ys, m0s, S0s, As, Qs, Cs, Rs))
+    return nlls
+
+
+def final_forwards_backwards_pass(process_cov, s, ys, m0s, S0s, Cs, As, Rs, ensemble_vars):
+    """
+    Perform final smoothing with the optimized smoothing parameters.
+
+    Parameters:
+        process_cov: Shape (keypoints, state_coords, state_coords). Process noise covariance matrix
+        s: Shape (keypoints,). We scale the process noise covariance by this value at each keypoint
+        ys: Shape (keypoints, frames, observation_coordinates). Observations for all keypoints.
+        m0s: Shape (keypoints, state_coords). Initial ensembled mean state for each keypoint.
+        S0s: Shape (keypoints, state_coords, state_coords). Initial ensembled state covars fek.
+        Cs: Shape (keypoints, obs_coords, state_coords). Observation measurement coeff matrix.
+        As: Shape (keypoints, state_coords, state_coords). Process matrix for each keypoint.
+        Rs: Shape (keypoints, obs_coords, obs_coords). Measurement noise covariance.
+
+    Returns:
+        smoothed means: Shape (keypoints, timepoints, coords).
+            Kalman smoother state estimates outputs for all frames/all keypoints.
+        smoothed covariances: Shape (num_keypoints, num_state_coordinates, num_state_coordinates)
+    """
+
+    # Initialize
+    n_keypoints = ys.shape[0]
+    ms_array = []
+    Vs_array = []
+    nlls_array = []
+    Qs = s[:, None, None] * process_cov
+    # Run forward and backward pass for each keypoint
+    for k in range(n_keypoints):
+        mf, Vf, nll, nll_array = jax_forward_pass_nlls(
+            ys[k], m0s[k], S0s[k], As[k], Qs[k], Cs[k], Rs[k], ensemble_vars[:, k, :])
+        ms, Vs = jax_backward_pass(mf, Vf, As[k], Qs[k])
+        ms_array.append(np.array(ms))
+        Vs_array.append(np.array(Vs))
+        nlls_array.append(np.array(nll_array))
+
+    smoothed_means = np.stack(ms_array, axis=0)
+    smoothed_covariances = np.stack(Vs_array, axis=0)
+
+    return smoothed_means, smoothed_covariances, nlls_array