Source code for democratic_detrender.helper_functions

""" This module contains helper functions for statistical calculations and data processing. """

import numpy as np



[docs]
def durbin_watson(residuals):
    """
    Calculate the Durbin-Watson statistic for a given set of residuals.

    Parameters:
        residuals: array-like, residuals from a model

    Returns:
        float: Durbin-Watson statistic
    """

    residual_terms = np.diff(residuals)
    numerator = np.nansum(residual_terms ** 2)
    denominator = np.nansum(residuals ** 2)
    assert denominator != 0.0
    return numerator / denominator




[docs]
def get_detrended_lc(y, detrending_model):
    """
    Get detrended light curve (LC).

    Parameters:
        y (array): Light curve (LC).
        detrending_model (array): Stellar detrending model evaluated at the same time as LC.

    Returns:
        array: Detrended light curve evaluated at the same time as input LC.
    """
    detrended_lc = ((y + 1) / (detrending_model + 1)) - 1

    return np.array(detrended_lc)




[docs]
def determine_cadence(times):
    time_gaps = {}
    for ii in range(1, len(times)):
        time_gap = np.round(times[ii] - times[ii - 1], 4)
        if time_gap in time_gaps.keys():
            time_gaps[time_gap] += 1
        else:
            time_gaps[time_gap] = 1

    # find the key that corresponds to the most data gaps, this is the cadence
    cadence = max(time_gaps, key=time_gaps.get)
    return cadence




[docs]
def find_nearest(array, value):
    # returns the value in an array closest to another input value

    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]




[docs]
def bin_data(xs, ys, window):

    import warnings

    xmin = np.min(xs)
    xmax = np.max(xs)

    x_bin = np.arange(xmin - window, xmax + window, window)

    y_bin = []
    for ii in range(0, len(x_bin)):
        y_bin.append([])

    for jj in range(1, len(x_bin)):
        x_bin_jj = x_bin[jj]
        x_bin_jj_minus_1 = x_bin[jj - 1]

    for ii in range(0, len(xs)):
        found_bin = False
        x_ii = xs[ii]
        y_ii = ys[ii]

        for jj in range(1, len(x_bin)):
            x_bin_jj = x_bin[jj]
            x_bin_jj_minus_1 = x_bin[jj - 1]

            if not found_bin:
                if x_bin_jj_minus_1 <= x_ii <= x_bin_jj:
                    found_bin = True
                    y_bin[jj].append(y_ii)

        if not found_bin:
            print("careful, the time " + str(x_ii) + " didn't find a bin!")

    y_bin_mean = []
    for ii in range(0, len(y_bin)):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)
            y_bin_mean.append(np.nanmean(np.array(y_bin[ii])))

    return x_bin, y_bin_mean



import numpy as np
from scipy.stats import median_abs_deviation


[docs]
def ensemble_step(
    y_detrended,
    yerr_detrended=None,
    method="median",
):
    """
    Combine multiple detrending methods into a single flux time series
    (method-marginalized light curve) and propagate uncertainties.

    Parameters
    ----------
    y_detrended : array-like, shape (n_times, n_methods)
        Detrended flux values. Each column is the SAME target light curve
        after a different detrending method. Rows are timestamps.
        Can include NaNs.

    yerr_detrended : array-like, shape (n_times,)
        The per-point uncertainty for the target light curve BEFORE
        method-marginalization. If None, it's treated as zeros.
        (This matches how your snippet uses `yerr_detrended` as a 1D array.)

    method : {"median", "mean"}
        How to combine across methods at each timestamp:
        - "median": use np.nanmedian across methods,
                    and use MAD as the method-scatter term.
        - "mean":   use np.nanmean across methods,
                    and use standard deviation as the method-scatter term.

    Returns
    -------
    flux_mm : ndarray, shape (n_times,)
        Method-marginalized flux (the final combined light curve).

    flux_err_mm : ndarray, shape (n_times,)
        Updated per-point uncertainty after marginalizing over method.
        This is:
            sqrt( yerr_detrended^2 + scatter_term^2 )
        where scatter_term is MAD (for median) or std (for mean),
        both computed across methods at that timestamp.

    Notes
    -----
    - We transpose internally only to mirror your original code, where you
      did y_detrended.T then axis=1. After transpose we have
      shape (n_methods, n_times), so axis=0 would also work, but we'll
      stick to axis=0/axis=1 choices that reproduce your math exactly.
    - MAD uses scale=1/1.4826 to match your exact call, i.e. the *raw*
      unscaled MAD, not the Gaussian-scaled MAD.
    """

    # ensure ndarray, shape (n_times, n_methods)
    y_detrended = np.array(y_detrended, dtype=float)
    if y_detrended.ndim != 2:
        raise ValueError("y_detrended must be 2D: shape (n_times, n_methods)")

    n_times, n_methods = y_detrended.shape

    # handle input per-point errors
    if yerr_detrended is None:
        yerr_detrended_arr = np.zeros(n_times, dtype=float)
    else:
        yerr_detrended_arr = np.array(yerr_detrended, dtype=float)
        if yerr_detrended_arr.shape != (n_times,):
            raise ValueError(
                "yerr_detrended must be 1D with shape (n_times,)"
            )

    # transpose to match your axis usage (methods, times)
    y_T = y_detrended.T  # shape (n_methods, n_times)

    if method.lower() == "median":
        # method-marginalized flux = nanmedian across methods
        flux_mm = np.nanmedian(y_T, axis=0)  # (n_times,)

        # scatter between methods at each timestamp via MAD
        scatter_term = median_abs_deviation(
            y_T,
            axis=0,
            scale=1 / 1.4826,
            nan_policy="omit",
        )  # (n_times,)

    elif method.lower() == "mean":
        # method-marginalized flux = nanmean across methods
        flux_mm = np.nanmean(y_T, axis=0)  # (n_times,)

        # scatter between methods via std
        scatter_term = np.nanstd(y_T, axis=0)  # (n_times,)

    else:
        raise ValueError("method must be 'median' or 'mean'")

    # final marginalized errors:
    # sqrt( original_error^2 + method-scatter^2 )
    flux_err_mm = np.sqrt(yerr_detrended_arr.astype(float) ** 2 + scatter_term ** 2)

    return flux_mm, flux_err_mm