Source code for democratic_detrender.get_lc

""" This module contains functions to obtain light curve data and transit information from the NASA Exoplanet Archive. """

import numpy as np
import pandas as pd

from democratic_detrender.helper_functions import determine_cadence, find_nearest

import lightkurve as lk



[docs]
def tic_id_from_simbad(other_id):
    """
    Queries Simbad to obtain the TIC ID corresponding to the given identifier.

    Note: This function requires the astroquery and astropy packages to be installed.

    Parameters:
        other_id (str): Identifier for which TIC ID needs to be obtained.

    Returns:
        str: TIC ID obtained from Simbad. Returns None if no TIC ID is found.

    Examples:
        >>> tic_id = tic_id_from_simbad('HD 12345')
        >>> print(tic_id)

    """

    # Import necessary libraries
    from astroquery.simbad import Simbad
    import astropy

    # Query Simbad to obtain the object IDs
    ID_table = Simbad.query_objectids(other_id)

    # Check if the result is a valid table
    if type(ID_table) is not astropy.table.table.Table:
        return None

    # Convert the 'ID' column to string
    ID_table["id"] = ID_table["id"].astype(str)

    # Convert the table to a Pandas DataFrame
    ID_pandas = ID_table.to_pandas()

    # Filter for TIC IDs
    tic_id = ID_pandas[ID_pandas["id"].str.contains("TIC")]

    # Return the TIC ID, if found
    return tic_id["id"].values[0]




[docs]
def tic_id_from_exoplanet_archive(other_id):
    """
    Parameters:
        other_id (str): Identifier for which the TIC ID needs to be obtained.

    Returns:
        str: TIC ID obtained from the Exoplanet Archive. Returns None if no TIC ID is found.
    """

    # if SIMBAD can't get TIC ID, looks for it in exoplanet archive
    # most of this is from transit_info_from_exoplanet_archive tbh; hesitant to try to merge functions

    # SIMBAD should be able to grab data for already confirmed systems
    # this is primarily for TOI candidates

    # Exoplanet Archive URL for TOI data
    a = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?table=toi&select=toipfx,tid,pl_tranmid,pl_orbper,pl_trandurh&format=csv"

    # Read TOI data from the Exoplanet Archive
    exoplanets = pd.read_csv(a)

    # Rename columns for consistency
    column_dict = {
        "toipfx": "toi_host",
        "tid": "tic_id",
        "pl_tranmid": "t0 [BJD]",
        "pl_orbper": "period [days]",
        "pl_trandurh": "duration [hours]",
    }

    exoplanets.rename(columns=column_dict, inplace=True)

    exoplanets["toi_host"] = "TOI-" + exoplanets["toi_host"].astype(str)
    exoplanets["tic_id"] = "TIC " + exoplanets["tic_id"].astype(str)

    # replacing any chars that don't match with ones that do; standardizes user input

    other_id = other_id.lstrip("toi- TOI")
    other_id = "TOI-" + other_id  # there's probably a smarter way to do this

    tic_id = exoplanets["tic_id"].loc[exoplanets["toi_host"] == other_id]

    return tic_id.values[0]




[docs]
def transit_info_from_exoplanet_archive(tic_id):

    """
    Queries the Exoplanet Archive to obtain transit information (t0, period, and duration) for a given TIC ID.

    Parameters:
        tic_id (str): TIC ID for which transit information needs to be obtained.

    Returns:
        result: DataFrame containing transit information (t0, period, and duration) for the specified TIC ID.
        Returns an empty DataFrame if no information is found.

    """
    import pyvo as vo

    # Connect to the Exoplanet Archive TAP service
    service = vo.dal.TAPService("https://exoplanetarchive.ipac.caltech.edu/TAP")

    # Query the planetary comparison table for transit information
    a = service.search(
        "SELECT \
                       tic_id, pl_tranmid, pl_orbper, pl_trandur\
                       FROM pscomppars"
    )

    exoplanets = a.to_table()

    exoplanets = exoplanets.to_pandas()

    # Rename columns for consistency
    column_dict = {
        "pl_tranmid": "t0 [BJD]",
        "pl_orbper": "period [days]",
        "pl_trandur": "duration [hours]",
    }

    exoplanets.rename(columns=column_dict, inplace=True)

    # Filter for the specified TIC ID
    result = exoplanets[exoplanets["tic_id"] == tic_id]

    # if there's no row in the planetary comparison table, check TOI table
    if result.empty:
        print("Exoplanet Archive: TOI Table")
        print("----------------------------")
        a = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?table=toi&select=tid,pl_tranmid,pl_orbper,pl_trandurh&format=csv"

        exoplanets = pd.read_csv(a)

        # Rename columns for consistency
        column_dict = {
            "tid": "tic_id",
            "pl_tranmid": "t0 [BJD]",
            "pl_orbper": "period [days]",
            "pl_trandurh": "duration [hours]",
        }

        exoplanets.rename(columns=column_dict, inplace=True)
        exoplanets["tic_id"] = "TIC " + exoplanets["tic_id"].astype(str)

        # Filter for the specified TIC ID in the TOI table
        result = exoplanets[exoplanets["tic_id"] == tic_id]

    else:
        print("Exoplanet Archive: Planet Comparison Table")
        print("------------------------------------------")

    return result




[docs]
def get_transit_info(object_id):

    """
    Takes an object ID, queries Simbad to get the matching TIC ID, then queries the Exoplanet Archive
    to extract transit information including t0, period, and duration.
    
    if no Simbad match found, then returns None and prints error message
    if no exoplanet archive match found, then returns None and prints error message
    
    Parameters:
        object_id (str): Object ID for which transit information needs to be obtained.

    Returns:
        pandas.DataFrame or str or None:
            Returns a DataFrame containing transit information if found.
            Returns the TIC ID as a string if no transit information is found but a TIC ID is retrieved.
            Returns None if neither a TIC ID nor transit information is found.

    """

    tic_id = tic_id_from_simbad(object_id)

    if tic_id == None:
        # search on exoplanet archive
        print("No TIC ID match found on Simbad")
        print("Checking on exoplanet archive")

        tic_id = tic_id_from_exoplanet_archive(object_id)

        if tic_id == None:
            print("No TIC ID match found on exoplanet archive either.")
            return None

        else:
            print("Found " + tic_id)
            transit_info = transit_info_from_exoplanet_archive(tic_id)

            if transit_info.empty:
                print("No PC found with that TIC ID match on exoplanet archive")
                return tic_id

            else:
                return transit_info

    else:  # Simbad does find TIC ID
        transit_info = transit_info_from_exoplanet_archive(tic_id)

        if transit_info.empty:
            print("No PC found with that TIC ID match on exoplanet archive")
            return tic_id

        else:
            return transit_info




[docs]
def get_light_curve(
    object_id,
    flux_type,
    TESS=False,
    Kepler=False,
    user_period=None,
    user_t0=None,
    user_duration=None,
    planet_number=1,
    mask_width=1.3,
    remove_PDCSAP_blend=True
):
    """
    Obtains light curve data based on the object ID, flux type, and optional user-provided transit parameters.

    Parameters:
        object_id (str): Object ID for which the light curve needs to be obtained.
        flux_type (str): Type of flux data to retrieve (e.g., 'sap_flux', 'pdcsap_flux').
        TESS (bool, optional): Whether the object is observed by TESS. Defaults to False.
        Kepler (bool, optional): Whether the object is observed by Kepler. Defaults to False.
        user_period (float, optional): User-provided period for the transit. Defaults to None.
        user_t0 (float, optional): User-provided transit midpoint. Defaults to None.
        user_duration (float, optional): User-provided transit duration. Defaults to None.
        planet_number (int, optional): Number of the planet in the system. Defaults to 1.
        mask_width (float, optional): Width multiplier for creating transit masks. Defaults to 1.3.
        remove_PDCSAP_blend (bool, optional). Whether to remove the assumed blend factor from PDCSAP data. Defaults to True.

    Returns:
        tuple: A tuple containing the light curve data:
            np.array: Time values (xs).
            np.array: Flux values (ys).
            np.array: Flux error values (ys_err).
            np.array: Transit mask (mask).
            np.array: Fitted planet mask (mask_fitted_planet).
            np.array: Transit midpoints (t0s_in_data).
            np.array: Transit periods (period).
            np.array: Transit durations (duration).
            list: Quarters of observation (quarters).
            list: Crowding information (crowding).
            list: Flux fraction information (flux_fraction).

    """

    transit_info = get_transit_info(object_id)

    if transit_info is None:
        print("no TIC ID found")
        return None

    # if transit_info is a string then we just returned tic id bc no planet info found
    if isinstance(transit_info, str):

        if user_period == None or user_t0 == None or user_duration == None:
            print(
                "no transit info found, so you must enter all 3 of period, t0, and duration"
            )
            return None

        else:
            transit_dic = {
                "tic_id": [transit_info],
                "t0 [BJD]": [float(user_t0)],
                "period [days]": [float(user_period)],
                "duration [hours]": [float(user_duration)],
            }

            transit_info = pd.DataFrame(transit_dic)

    print("NASA Exoplanet Archive planet parameters:")
    print("planet #, [    tic_id      ,    t0 [BJD]    ,  P [days]   , tdur [hrs]]")
    transit_info_list = transit_info.astype(str).values.tolist()
    for ii in range(0, len(transit_info_list)):
        print("planet " + str(ii + 1) + ", " + str(transit_info_list[ii]))

    tic_id = str(transit_info["tic_id"].values[0])

    if user_period != None:
        periods = np.array(transit_info["period [days]"].values, dtype=float)
        periods[planet_number - 1] = user_period

        print("using periods = " + str(periods))

    else:
        periods = np.array(transit_info["period [days]"].values, dtype=float)

    if user_t0 != None:
        t0s = np.array(transit_info["t0 [BJD]"].values, dtype=float)
        t0s[planet_number - 1] = user_t0

        print("using t0s = " + str(t0s))

    else:
        t0s = np.array(transit_info["t0 [BJD]"].values, dtype=float)

    if user_duration != None:
        durations = np.array(transit_info["duration [hours]"].values, dtype=float)
        durations[planet_number - 1] = user_duration

        print("using durations = " + str(durations))

    else:
        durations = np.array(transit_info["duration [hours]"].values, dtype=float)

    # if no duration values input, just assume a 2 hour duration
    for ii in range(0, len(durations)):
        if np.isnan(durations[ii]):
            print(
                "no duration information on exoplanet archive for the "
                + str(ii + 1)
                + "th planet"
            )
            print("assuming 2 hours for " + str(ii + 1) + "th planet duration!")
            durations[ii] = 2.0

    print("")
    print("")

    nplanets = len(periods)

    if TESS:
        # switch to TESS BJD
        t0s = t0s - 2457000

        if flux_type == "qlp":
            lc_files = lk.search_lightcurve(
                tic_id, mission="TESS", author="qlp"
            ).download_all(quality_bitmask="default")

        else:
            # pull in short cadence TESS SPOC LC
            lc_files_short_cadence = lk.search_lightcurve(
                tic_id, mission="TESS", author="SPOC", cadence="short"
            ).download_all(quality_bitmask="default", flux_column=flux_type)

            # pull in long cadence TESS SPOC LC
            lc_files_long_cadence = lk.search_lightcurve(
                tic_id, mission="TESS", author="SPOC", cadence="long"
            ).download_all(quality_bitmask="default", flux_column=flux_type)

            # use short cadence TESS data if if exists, else use long cadence
            if lc_files_short_cadence == []:
                lc_files = lc_files_long_cadence
            else:
                lc_files = lc_files_short_cadence

    if Kepler:
        # switch to Kepler BJD
        t0s = t0s - 2454833

        # pull in Kepler LC
        # CURRENTLY ONLY LOADS LONG CADENCE DATA BY DEFAULT
        # ISSUE IS WITH RUN TIME IN OUTLIER REJECTION, BUT WILL LIKELY NEED TO MAKE OTHER SPEED UPS....
        lc_files = lk.search_lightcurve(
            tic_id, mission="Kepler", author="Kepler", cadence="long"
        ).download_all(quality_bitmask="default", flux_column=flux_type)

    quarters = []
    crowding = []
    flux_fraction = []

    try:
        for file in lc_files:
            quarters.append([np.min(file.time.value), np.max(file.time.value)])

            if flux_type != "qlp":
                crowding.append(file.CROWDSAP)
                flux_fraction.append(file.FLFRCSAP)

    except TypeError:
        if TESS:
            mission = "TESS"
        else:
            mission = "Kepler"
        error_message = (
            "no "
            + mission
            + " "
            + flux_type
            + " data found for "
            + object_id
            + ", so the code will break..."
        )

        print("")
        print("")
        print("")
        print("")
        print(error_message)
        print("")
        print("")
        print("")
        print("")

        return None


    #remove blend factor from PDCSAP data
    if remove_PDCSAP_blend:

        if flux_type == 'pdcsap_flux':
            xs = []
            ys = []
            ys_err = []
            for ii in range(0, len(lc_files)):
                #normalize flux value so you can add the blend factors
                lc = lc_files[ii].normalize().remove_nans() 
                f = flux_fraction[ii]
                c = crowding[ii]

                # Retrieve flux for the individual light curve
                x = lc.time.value
                y = lc.flux.value
                y_err = lc.flux_err.value


                # Apply the transformation to ys (flux)
                y_transformed = y / c


                # Append the modified light curve to the list
                xs.extend(x)
                ys.extend(y_transformed)
                ys_err.extend(y_err)


            # make the lists np arrays
            xs = np.array(xs)
            ys = np.array(ys)
            ys_err = np.array(ys_err)


        else:
            lc = lc_files.stitch().remove_nans()

            xs = lc.time.value
            ys = lc.flux.value
            ys_err = lc.flux_err.value


    else:
        lc = lc_files.stitch().remove_nans()

        xs = lc.time.value
        ys = lc.flux
        ys_err = lc.flux_err


    #define lc_mask for mask determination 
    lc_mask = lc_files.stitch().remove_nans() 
    mask = np.zeros(np.shape(xs), dtype=bool)
    for ii in range(0, nplanets):
        masks = lc_mask.create_transit_mask(
            period=periods[ii],
            duration=durations[ii] / 24.0 * mask_width,
            transit_time=t0s[ii],
        )
        mask += masks

    mask_fitted_planet = lc_mask.create_transit_mask(
        period=periods[planet_number - 1],
        duration=durations[planet_number - 1] / 24.0 * mask_width,
        transit_time=t0s[planet_number - 1],
    )

    # save the period, duration, and t0 for the planet we are fitting for...
    period = np.array([periods[planet_number - 1]])
    t0 = t0s[planet_number - 1]
    duration = np.array([durations[planet_number - 1]])

    nan_values = []
    if np.isnan(period[0]):
        nan_values.append("period")
    if np.isnan(t0):
        nan_values.append("t0")
    if np.isnan(duration[0]):
        nan_values.append("duration")

    if nan_values != []:
        print("")
        print("")
        print("")
        print("")
        print(
            str(nan_values)
            + " input is (are) not a number(s), so the code will break..."
        )
        print("")
        print("")
        print("")
        print("")

        return None

    print("using the following params for the planet we are fitting")
    print("--------------------------------------------------------")

    if TESS:
        print("[  t0 [TESS BJD]  , P [days], tdur [hrs]")
    if Kepler:
        print("[ t0 [Kepler BJD] , P [days], tdur [hrs]")
    print("[" + str(t0) + ", " + str(period[0]) + ", " + str(duration[0]) + "]")

    min_time = xs.min()
    max_time = xs.max()
    t0s_all = []
    while t0 > min_time:
        t0 -= period[0]

    while t0 < max_time:
        t0s_all.append(t0)
        t0 += period[0]

    cadence = determine_cadence(xs)

    t0s_in_data = []
    for t0 in t0s_all:
        nearest_lc_time = find_nearest(xs, t0)

        # if there is a data point within the cadence (times some uncertainty lets say 3) of
        # expected transit then there should be transit data
        if np.abs(t0 - nearest_lc_time) < 3 * cadence:
            t0s_in_data.append(t0)

    print("")
    print(str(len(t0s_in_data)) + " transits (or epochs) in total")
    print("")

    mu = np.median(ys)
    ys = ys / mu - 1
    ys_err = ys_err / mu

    return (
        np.array(xs),
        np.array(ys),
        np.array(ys_err),
        mask,
        mask_fitted_planet,
        np.array(t0s_in_data),
        period,
        duration,
        quarters,
        crowding,
        flux_fraction,
    )