In [2]:
import argparse
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random 
import networkx as nx

from tensorflow import keras
from tqdm import tqdm
from pathlib import Path
from astropy import units as u
from astropy.coordinates import SkyCoord
from tensorflow.keras.utils import pad_sequences

from astroOracle.dataloader import LSSTSourceDataSet, load, get_augmented_data, get_static_features, ts_length, get_ts_upto_days_since_trigger, ts_flag_value, static_flag_value, augment_ts_length_to_days_since_trigger
from astroOracle.loss import WHXE_Loss
from astroOracle.taxonomy import get_taxonomy_tree, source_node_label
from astroOracle.vizualizations import make_gif, plot_reliability_diagram, plot_data_set_composition, plot_day_vs_class_score, plot_lc, make_z_plots
from astroOracle.interpret_results import get_conditional_probabilites, save_all_cf_and_rocs, save_leaf_cf_and_rocs, save_all_phase_vs_accuracy_plot
from astroOracle.train_RNN import default_batch_size
from astroOracle.LSST_Source import LSST_Source

In [3]:
# This step takes a while because it has load from disc to memory...
print("Loading data from disc...")
test_dir = Path("../processed/test")
train_dir = Path("../processed/train")

X_ts_test = load(f"{test_dir}/x_ts.pkl")
X_ts_train = load(f"{train_dir}/x_ts.pkl")

Loading data from disc...


In [4]:
X_ts_test[0]

Unnamed: 0,scaled_time_since_first_obs,detection_flag,scaled_FLUXCAL,scaled_FLUXCALERR,band_label
0,0.000000,0,-0.016419,0.014178,0.8700
1,0.000249,0,0.016658,0.030055,1.0150
2,0.049960,0,-0.012748,0.004795,0.6215
3,0.050392,0,0.000265,0.007020,0.7545
4,0.079403,0,-0.001897,0.002851,0.4760
...,...,...,...,...,...
363,8.677405,0,0.025315,0.019436,1.0150
364,8.717478,0,0.003752,0.004272,0.6215
365,8.717718,0,0.004470,0.002783,0.4760
366,8.717998,0,-0.004031,0.002925,0.4760


In [5]:
wavelengths_to_filter = {
    (320 + 400) / (2 * 1000): 'u',
    (400 + 552) / (2 * 1000): 'g',
    (552 + 691) / (2 * 1000): 'r',
    (691 + 818) / (2 * 1000): 'i',
    (818 + 922) / (2 * 1000): 'z',
    (950 + 1080) / (2 * 1000):'y',
}

In [6]:
for i in range(len(X_ts_test)):
    
    # Reverse the scale
    X_ts_test[i]['FLUXCAL'] = X_ts_test[i]['scaled_FLUXCAL'] * 1000
    X_ts_test[i]['FLUXCALERR'] = X_ts_test[i]['scaled_FLUXCALERR'] * 1000
    X_ts_test[i]['days_since_first_observation'] = X_ts_test[i]['scaled_time_since_first_obs'] * 100

    # Replace the wavelength with filter lable
    X_ts_test[i]['filter'] = [wavelengths_to_filter[n] for n in X_ts_test[i]['band_label']]

    # Remove redundant columns
    X_ts_test[i].drop(columns=['scaled_FLUXCAL', 'scaled_FLUXCALERR', 'scaled_time_since_first_obs','band_label'], inplace=True)

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x110e11d20>>
Traceback (most recent call last):
  File "/Users/vedshah/anaconda3/envs/temp/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


In [7]:
X_ts_test[0]

Unnamed: 0,detection_flag,FLUXCAL,FLUXCALERR,days_since_first_observation,filter
0,0,-16.418797,14.177922,0.0000,z
1,0,16.658297,30.054749,0.0249,y
2,0,-12.748434,4.794619,4.9960,r
3,0,0.264926,7.020427,5.0392,i
4,0,-1.897147,2.851411,7.9403,g
...,...,...,...,...,...
363,0,25.315403,19.435930,867.7405,y
364,0,3.751634,4.271937,871.7478,r
365,0,4.469953,2.783310,871.7718,g
366,0,-4.031081,2.925218,871.7998,g


In [None]:
for i in range(len(X_ts_train)):
    
    # Reverse the scale
    X_ts_train[i]['FLUXCAL'] = X_ts_train[i]['scaled_FLUXCAL'] * 1000
    X_ts_train[i]['FLUXCALERR'] = X_ts_train[i]['scaled_FLUXCALERR'] * 1000
    X_ts_train[i]['days_since_first_observation'] = X_ts_train[i]['scaled_time_since_first_obs'] * 100

    # Replace the wavelength with filter lable
    X_ts_train[i]['filter'] = [wavelengths_to_filter[n] for n in X_ts_train[i]['band_label']]

    # Remove redundant columns
    X_ts_train[i].drop(columns=['scaled_FLUXCAL', 'scaled_FLUXCALERR', 'scaled_time_since_first_obs','band_label'], inplace=True)

In [None]:
def save(save_path , obj):
    with open(save_path, 'wb') as f:
        pickle.dump(obj, f)

In [None]:
X_ts_train[0]

In [None]:
save(f"x_ts_train.pkl", X_ts_train)
save(f"x_ts_test.pkl", X_ts_test)