In [1]:
from astropy.io import fits
import numpy as np
import pandas as pd
import seaborn as sns
import math

import requests
from astroquery.mast import Tesscut

from astropy.table import Table
from astropy.coordinates import SkyCoord

import re

# For matplotlib plotting
import matplotlib
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.animation as animation

# For animation display
from matplotlib import rc
from IPython.display import HTML
rc('animation', html='jshtml')

# For bokeh plotting
from bokeh import plotting
plotting.output_notebook()

In [2]:
testing = pd.read_csv('tois_latest.csv')
display(testing)

# filtering out only the TIC ID for the time being.
filtered_tois = pd.read_csv('tois_latest.csv',usecols=['TIC ID'])
display(filtered_tois)

Unnamed: 0,TIC ID,TOI,Previous CTOI,Master,SG1A,SG1B,SG2,SG3,SG4,SG5,...,Stellar Radius (R_Sun) err,Stellar Metallicity,Stellar Metallicity err,Stellar Mass (M_Sun),Stellar Mass (M_Sun) err,Sectors,Date TOI Alerted (UTC),Date TOI Updated (UTC),Date Modified,Comments
0,231663901,101.01,,5,5,5,5,5,5,5,...,0.043847,,,1.050,0.129454,127,2018-09-05,2021-10-07,2021-10-29 00:00:00,WASP-46 b
1,149603524,102.01,,5,5,5,5,5,5,5,...,0.050000,0.24000,0.050000,1.280,0.190812,"1,2,3,4,6,7,8,9,10,11,12,13,27,28,29,30,31,32,...",2019-05-07,2021-08-24,2021-10-29 00:00:00,WASP 62 b
2,336732616,103.01,,5,5,5,5,5,5,5,...,,,,1.270,0.196969,1,2018-09-05,2020-10-27,2021-10-29 00:00:00,HATS-3 b
3,231670397,104.01,,5,5,5,5,5,5,5,...,0.102573,,,1.160,0.166129,127,2018-09-05,2021-12-01,2021-12-07 12:08:34,WASP-73 b
4,144065872,105.01,,5,5,5,5,5,5,5,...,0.059699,,,1.030,0.127209,128,2018-09-05,2021-12-08,2021-12-21 12:05:17,WASP-95; epoch kept from qlp-s28-tois
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5762,158022899,5664.01,,3,4,3,3,1,4,4,...,0.090000,,,0.950,0.117553,2450,2022-06-02,2022-06-02,2022-06-03 12:08:33,found in faint-star QLP search
5763,274223601,5665.01,,3,4,3,3,1,4,4,...,,,,1.130,,161723242550,2022-06-02,2022-06-02,2022-06-03 12:08:33,found in faint-star QLP search
5764,235684964,5666.01,,3,4,3,3,1,4,4,...,,,,0.970,,"14,15,16,17,18,19,20,21,22,23,24,40,41,47,48,4...",2022-06-02,2022-06-02,2022-06-03 12:08:33,found in faint-star QLP search
5765,28242530,5667.01,,3,4,3,3,1,4,4,...,,-0.06858,0.060966,0.820,,232450,2022-06-02,2022-06-02,2022-06-03 12:08:33,found in faint-star QLP search


Unnamed: 0,TIC ID
0,231663901
1,149603524
2,336732616
3,231670397
4,144065872
...,...
5762,158022899
5763,274223601
5764,235684964
5765,28242530


In [3]:
# testing purposes
# import random

# count = 0
# for i in range(0, 29):
#     first_tic_id = filtered_tois.iloc[random.randint(0, 5766)]['TIC ID'].item()
#     exomast_url = "https://exo.mast.stsci.edu/api/v0.1/"
#     list_tce_query = f"{exomast_url}dvdata/tess/{first_tic_id}/tces/"
#     response = requests.get(list_tce_query)
#     tce_dict = response.json()
#     if len(tce_dict['TCE']) == 0:
#         count += 1
# print(count)

In [4]:
def grab_data(tois, index):
    """"
    Interates through given number of TOIs from the filtered tois file, and creates synthetic data from the even numbered candidates.

    Parameters
    ----------
    tois : pandas dataframe file of planet candidates that will be used
        Of the form [{TIC_ID:}, ...]
    index : at what number the function should stop grabbing candidates
    
    Returns
    -------
    unsure as of right now (preferably two lists of 1. synthesized data and 2. planet candidates)
    """
    exomast_url = "https://exo.mast.stsci.edu/api/v0.1/"
    for i in range(0, index):
        first_tic_id = tois.iloc[i]['TIC ID'].item()
        list_tce_query = f"{exomast_url}dvdata/tess/{first_tic_id}/tces/"
        
        response = requests.get(list_tce_query)
        tce_dict = response.json()
        
        # here I need to add a safety for if there is no TCE associated to the data.
        # perhaps I can assign it to a special category "actual no TCE list", but I probably can't utilize it
        if len(tce_dict['TCE']) == 0:
            continue
        else:
            first_sector, first_tce = tce_dict['TCE'][0].split(":")
        
            dv_metadata_query = f"{exomast_url}dvdata/tess/{first_tic_id}/info/?tce={first_tce}&sector={first_sector}"
            response = requests.get(dv_metadata_query)
            metadata = response.json()
        
            # turning the json file into an astropy table
            first_lightcurve = json_to_table(data_dict['fields'],data_dict['data'])
        
            detrended_light_curve = first_lightcurve['LC_INIT'].data.tolist()
            detrended_light_curve = list(filter(lambda x: not math.isnan(x), detrended_light_curve))
            df = pd.DataFrame(detrended_light_curve, columns = ['light_flux'])
        
            if (i % 2 == 0):
                df = synthesize_data(df)
            else:
                # think of something to put here
                df

In [5]:
def synthesize_data(light_curve):
    """"
    Takes a detrended light curve and removes the sign of planet transits.

    Parameters
    ----------
    light_curve : pandas dataframe of light_flux without time stamps
        Of the form [{light_flux:}, ...]
    
    Returns
    -------
    response : `pandas.core.frame.DataFrame`
    """
    
    high_quartile = light_curve.quantile(0.75) + 1.5 * (light_curve.quantile(0.75)-light_curve.quantile(0.25))
    low_quartile = light_curve.quantile(0.25) - 1.5 * (light_curve.quantile(0.75)-light_curve.quantile(0.25))
    median_light_curve = light_curve[(light_curve < high_quartile) & (light_curve > low_quartile)]
    return median_light_curve

In [None]:
def json_to_table(fields, data):
    """"
    Takes a json object and turns it into an astropy table.

    Parameters
    ----------
    fields : list of dicts
        Of the form [{colname:,datatype:,description:}, ...]
    data : list of dicts
       Of the form [{col1:, col2:, ...},{col1:, col2:, ...}, ...]

    Returns
    -------
    response : `astropy.table.Table`
    """

    rx = re.compile(r"varchar\((\d+)\)")
    
    data_table = Table()

    for col, atype in [(x['colname'], x['datatype']) for x in fields]:
        col = col.strip()
        if "varchar" in atype:
            match = rx.search(atype)
            atype = "U" + match.group(1)
        if atype == "real":
            atype = "float"
        data_table[col] = np.array([x.get(col, None) for x in data], dtype=atype)

    return data_table