Source code for democratic_detrender.outlier_rejection
""" This module contains functions to perform outlier rejection on light curve data. """
import numpy as np
[docs]
def reject_outliers_out_of_transit(
time, flux, flux_err, mask, mask_fitted_planet, time_window, sigma_window
):
"""
rejects outliers via moving median and sigma clipping outside of transit mask
input:
time = array of time values
flux = array of flux values
flux_err = array of flux error values
mask = array of all mask values
mask = array of mask values only for the planet we are fitting
time_window = int, how much time around which to determine median on
sigma_window = int, how many sigmas to clip
returns:
flux_out = array of flux values with outliers value changed to np.nan
flux_err_out = array of flux error values with outliers value changed to np.nan
"""
if len(time) != len(flux):
print("error, mismatched time and flux data length")
# find the time right before and after data gaps, this is used to only do outlier rejection after 1 full time_window
# data gap is considered anything greater than 1 time window wide
time_gap_adjacent = [time[0]]
for ii in range(1, len(time)):
if time[ii] - time[ii - 1] > time_window:
time_gap_adjacent.append(time[ii])
time_gap_adjacent.append(time[ii - 1])
# also add in right before and after transits to time_gap_adjacent
for ii in range(1, len(time)):
if mask[ii] and not mask[ii - 1]:
time_gap_adjacent.append(time[ii - 1])
if not mask[ii] and mask[ii - 1]:
time_gap_adjacent.append(time[ii])
time_out = []
flux_out = []
flux_err_out = []
mask_out = []
mask_fitted_planet_out = []
moving_median = []
for ii in range(0, len(time)):
current_time = time[ii]
current_flux = flux[ii]
current_flux_err = flux_err[ii]
current_mask = mask[ii]
current_mask_fitted_planet = mask_fitted_planet[ii]
# find indices that we want to include in the moving median
# include if greater than minimum window value
# and smaller than maximum window value
# and not during a transit
indices = np.where(
np.logical_and(
np.logical_and(
time >= current_time - time_window / 2.0,
time <= current_time + time_window / 2.0,
),
~mask,
)
)
current_flux_median = np.median(flux[indices])
# only do outlier rejection outside of transits
if not current_mask:
# only do outlier rejection if not within 1 time window of a data gap or transit
near_time_gaps = False
for time_gap in time_gap_adjacent:
if np.abs(current_time - time_gap) < time_window:
near_time_gaps = True
if not near_time_gaps:
if (
current_flux + sigma_window * current_flux_err
>= current_flux_median
and current_flux - sigma_window * current_flux_err
<= current_flux_median
):
time_out.append(current_time)
flux_out.append(current_flux)
flux_err_out.append(current_flux_err)
mask_out.append(current_mask)
mask_fitted_planet_out.append(current_mask_fitted_planet)
moving_median.append(current_flux_median)
else:
moving_median.append(current_flux_median)
else:
time_out.append(current_time)
flux_out.append(current_flux)
flux_err_out.append(current_flux_err)
mask_out.append(current_mask)
mask_fitted_planet_out.append(current_mask_fitted_planet)
moving_median.append(np.nan)
else:
time_out.append(current_time)
flux_out.append(current_flux)
flux_err_out.append(current_flux_err)
mask_out.append(current_mask)
mask_fitted_planet_out.append(current_mask_fitted_planet)
moving_median.append(np.nan)
time_out = np.array(time_out)
flux_out = np.array(flux_out)
flux_err_out = np.array(flux_err_out)
mask_out = np.array(mask_out)
mask_fitted_planet_out = np.array(mask_fitted_planet_out)
return (
time_out,
flux_out,
flux_err_out,
mask_out,
mask_fitted_planet_out,
moving_median,
)
[docs]
def reject_outliers_everywhere(
time, flux, flux_err, time_window, npoints_window, sigma_window
):
"""
rejects outliers via moving median and sigma clipping outside of transit mask
input:
time = array of time values
flux = array of flux values
flux_err = array of flux error values
time_window = float, much time before and after data gaps (of length >time_window) for which we don't do outlier rejection
npoints_window = int, how points around which to determine median on
sigma_window = int, how many sigmas to clip
returns:
time_out = array of time values with outliers removed
flux_out = array of flux values with outliers removed
"""
if len(time) != len(flux):
print("error, mismatched time and flux data length")
# find the time right before and after data gaps, this is used to only do outlier rejection after 1 full time_window
# data gap is considered anything greater than 1 time window wide
time_gap_adjacent = [time[0], time[len(time) - 1]]
for ii in range(1, len(time)):
if time[ii] - time[ii - 1] > time_window:
time_gap_adjacent.append(time[ii])
time_gap_adjacent.append(time[ii - 1])
time_out = []
flux_out = []
moving_median = []
for ii in range(0, len(time)):
current_time = time[ii]
current_flux = flux[ii]
current_flux_err = flux_err[ii]
# only do outlier rejection if not within 1 time window of a data gap or transit
near_time_gaps = False
for time_gap in time_gap_adjacent:
if np.abs(current_time - time_gap) < time_window:
near_time_gaps = True
if not near_time_gaps:
# find indices that we want to include in the moving median
# include if greater than minimum window value
# and smaller than maximum window value
n_around = int(np.floor(npoints_window / 2.0))
indices = np.arange(ii - n_around, ii + n_around + 1)
current_flux_median = np.nanmedian(flux[indices])
if (
current_flux + sigma_window * current_flux_err >= current_flux_median
and current_flux - sigma_window * current_flux_err
<= current_flux_median
):
time_out.append(current_time)
flux_out.append(current_flux)
else:
if not np.isnan(current_flux):
moving_median.append(current_flux_median)
else:
time_out.append(current_time)
flux_out.append(current_flux)
else:
time_out.append(current_time)
flux_out.append(current_flux)
time_out = np.array(time_out)
flux_out = np.array(flux_out)
return time_out, flux_out