In [1]:
# Import packages and modules
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Probe a patch of the sky

Get light curves from a specific location in the sky and a specific time window. Set observation filter and other parameters. The size of path in sky is 100 arc seconds in radius

In [2]:
# specify API parameters to filter the data
circle_ra = 255.9302 # right acession of the object in the sky
circle_dec = 11.8654 # declination of  the object in  the sky
circle_radius = 0.028 # circle radius in degrees, ~100 arcseconds
filter_band = "r" # observation filterm
observation_nums_min = 3 # minimum number of observations
time_range_start = 58194.0 # time in modified julian date
time_range_end = 58483.0
flag_mask = 32768
table_format = "ipac_table"

circle = "POS=CIRCLE"+"+"+str(circle_ra)+"+"+str(circle_dec)+"+"+str(circle_radius)
band = "BANDNAME="+ filter_band
obs = "NOBS_MIN="+str(observation_nums_min)
time_range = "TIME=" + str(time_range_start)+ "+" + str(time_range_end)
mask = "BAD_CATFLAGS_MASK=" + str(flag_mask)
table_format = "FORMAT=" + str(table_format)
params = circle+ "&" + band + "&" + obs + "&" + time_range + "&" + mask + "&" + table_format

url= "https://irsa.ipac.caltech.edu/cgi-bin/ZTF/nph_light_curves?" + params

header = pd.read_csv(url, header=None, sep='|', skiprows=50,usecols=range(1,25), nrows=1) # extract column names
data = pd.read_csv(url, header=None, delim_whitespace=True, skiprows=55) # extract data
data.columns = header.iloc[0].str.strip()

# Find unique objects in the dataset

Use the information to find all unique objects in that patch of sky. Then extract the whole light curve of the objects over the whole time range. We will use this data to start calculating features.

In [3]:
oid_unique = data.oid.unique()

In [4]:
# get complete light curve for each object
lightcurves = {} # empty dictionary to hold dataframe for each light curve
for obj in oid_unique:
    url = 'https://irsa.ipac.caltech.edu/cgi-bin/ZTF/nph_light_curves?ID=' + str(obj) + '&BAD_CATFLAGS_MASK=32768&FORMAT=ipac_table'
    data = pd.read_csv(url, header=None, delim_whitespace=True, skiprows=55) # extract data
    header = pd.read_csv(url, header=None, sep='|', skiprows=50,usecols=range(1,25), nrows=1)
    data.columns = header.iloc[0].str.strip()
    lightcurves [str(obj)] = data

# Calculate features

In [5]:
# weighted mean
def weighted_mean(mag,mag_err):
    mag2 = (mag_err*mag_err) # mag err square
    mag2_inv = 1/mag2.values; # take inverse of the values
    w = pd.Series(mag2_inv) # covert it back to s series
    sw = w.sum() # sum of weights
    wmag = mag*w # multiply magnitude with weights
    wmean = wmag.sum()/sw # weighted mean
    return wmean

In [6]:
# welsh J, K statistics
def welsh_staton(mag_series,wmean):
    N = len(mag_series)
    d_i = N/(N-1)*(mag_series - wmean) # replace mean by weighted mean
    d_i1 = d_i.shift(periods=-1)
    d_i1.fillna(0, inplace = True)
    Pi = d_i*d_i1
    Pi_val = Pi.values
    Psign = np.sign(Pi_val)
    Jval = Psign*np.sqrt(np.abs(Pi_val))
    J = np.sum(Jval) 
    K1 = abs(d_i.values)/N
    K2 = np.sqrt(1/N*np.sum(d_i.values*d_i.values))
    K = np.sum(K1*K2)
    return J, K 

In [7]:
mean = []
wmean = [] # weighted mean
MAD = []
IQR = []
f60 = []
f70 = []
f80 = []
f90 = []
skew = []
kurtosis = []
welsh_K = []
welsh_J = []

for lc in lightcurves:
    df = lightcurves[lc]
    N = len(df)
    wmean_temp = weighted_mean(df.mag,df.magerr)
    K_temp, J_temp =  welsh_staton(df.mag, wmean_temp )
    mean.append(df.mag.mean())
    wmean.append(wmean_temp) 
    deviation = abs(df.mag - df.mag.median())
    MAD.append(deviation.median())
    IQR.append(df.mag.quantile(0.75) - df.mag.quantile(0.25))
    f60.append(df.mag.quantile(0.80) - df.mag.quantile(0.2))
    f70.append(df.mag.quantile(0.85) - df.mag.quantile(0.15))
    f80.append(df.mag.quantile(0.9) - df.mag.quantile(0.10))
    f90.append(df.mag.quantile(0.95) - df.mag.quantile(0.05))
    skew.append(df.mag.skew())
    kurtosis.append(df.mag.kurtosis())
    welsh_J.append(J_temp)
    welsh_K.append(K_temp)
    
features = pd.DataFrame()
features['mean'] = mean
features['wmean'] = wmean
features['MAD'] = MAD
features['IQR'] = IQR
features['f60'] = f60
features['f70'] = f70
features['f80'] = f80
features['f90'] = f90
features['skew'] = skew
features['kurtosis'] = kurtosis
features['oid'] = lightcurves.keys()
features['welsh_J'] = welsh_J
features['welsh_K'] = welsh_K
features.set_index('oid')

Unnamed: 0_level_0,mean,wmean,MAD,IQR,f60,f70,f80,f90,skew,kurtosis,welsh_J,welsh_K
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
535211300013119,20.933098,20.906107,0.114201,0.228526,0.285935,0.364945,0.457659,0.574936,0.122916,-0.09138,0.026219,1.912423
535211300013156,17.010256,17.010104,0.00856,0.017048,0.021558,0.026835,0.032895,0.04294,0.098838,0.94102,0.000148,0.047717
535211300013160,16.683809,16.683682,0.00823,0.016586,0.019975,0.025735,0.034491,0.043954,0.202629,1.750657,0.000146,0.38301
535211300013193,18.936782,18.932775,0.030067,0.060543,0.069927,0.089354,0.117518,0.175412,-0.528533,1.493851,0.002058,0.704188
535211300013235,16.517925,16.517776,0.00856,0.01688,0.021508,0.026185,0.034168,0.048931,0.464462,2.824437,0.000181,0.119191
535211300013236,16.142187,16.142098,0.006546,0.013322,0.018642,0.023455,0.030096,0.038361,0.514588,4.103155,0.000137,0.131599
535211300013259,17.635542,17.63491,0.016413,0.032641,0.040597,0.0498,0.060711,0.076,-0.373403,0.861683,0.000472,0.290856
535211300021592,21.420837,21.383264,0.149454,0.349274,0.382861,0.455369,0.58587,0.685739,0.437024,0.135239,0.047644,2.484659
535211300021699,17.241562,17.241244,0.010302,0.020893,0.026758,0.032352,0.042602,0.059191,-0.223447,2.596,0.000261,0.279795
535211300021721,19.560204,19.545855,0.047465,0.094015,0.122031,0.166518,0.219423,0.297722,-0.270217,4.352604,0.00684,2.188841


# Labeled Data set
Now lets check the labeled data set from http://variables.cn:88/ztf/ . The label is column 24 of the dataset

In [8]:
labeled_data = pd.read_csv('Labeled_data.txt', header=None, delim_whitespace=True, skiprows=37) # extract data
# header_pd = pd.read_csv('Labeled_data.txt', header=None, sep=' ', skiprows=9, nrows=26, error_bad_lines=False )
labeled_data.head()
# header_pd.head(26)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,ZTFJ000000.14+721413.7,2,0.00061,72.23716,0.29915,0.263,6.308,58388.255579,19.613,18.804,...,6.308,0.624,0.873,0.54,0.438,-13.49,-27.331,EW,0.19,0.078
1,ZTFJ000000.19+320847.2,3,0.0008,32.14645,0.287059,0.01,8.024,58280.478081,15.311,14.61,...,8.024,0.94,0.977,0.219,0.197,-7.506,-10.079,EW,0.02,0.017
2,ZTFJ000000.26+311206.3,4,0.00109,31.20176,0.362217,0.132,6.281,58283.461994,16.35,15.844,...,6.281,0.951,0.96,0.233,0.226,-7.83,-9.245,EW,0.013,0.02
3,ZTFJ000000.30+711634.1,5,0.00125,71.27616,0.268515,0.16,5.236,58657.423517,19.144,17.875,...,5.236,0.363,0.623,0.173,0.154,-9.865,-22.037,EW,0.0,0.005
4,ZTFJ000000.30+233400.5,6,0.00125,23.56682,0.269874,0.193,6.302,58437.268664,17.89,16.944,...,6.302,0.91,0.976,0.373,0.352,-7.075,-8.819,EW,0.098,0.034


In [9]:
label = pd.Series(labeled_data.loc[:,24])
label.unique()

array(['EW', 'BYDra', 'SR', 'RSCVN', 'RR', 'DSCT', 'EA', 'Mira', 'RRc',
       'CEP', 'CEPII'], dtype=object)