In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
from sklearn import datasets

In [None]:
widths = (8,7,4,13,43)
header_pd = pd.read_fwf('Labeled_data.txt', widths = widths,skiprows=7, nrows=27)
labeled_data = pd.read_csv('Labeled_data.txt', header=None, delim_whitespace=True, skiprows=37) # extract data
labeled_data.columns = header_pd.iloc[:,3]
labeled_data.head()
#shuffle data
labeled_data_sampled = labeled_data.sample(n=1000, random_state=1)

In [None]:
def weighted_mean(mag,mag_err):
    mag2 = (mag_err*mag_err) # mag err square
    mag2_inv = 1/mag2.values; # take inverse of the values
    w = pd.Series(mag2_inv) # covert it back to s series
    sw = w.sum() # sum of weights
    wmag = mag*w # multiply magnitude with weights
    wmean = wmag.sum()/sw # weighted mean
    return wmean

In [None]:
# welsh J, K statistics
def welsh_staton(mag_series,wmean):
    N = len(mag_series)
    d_i = N/(N-1)*(mag_series - wmean) # replace mean by weighted mean
    d_i1 = d_i.shift(periods=-1)
    d_i1.fillna(0, inplace = True)
    Pi = d_i*d_i1
    Pi_val = Pi.values
    Psign = np.sign(Pi_val)
    Jval = Psign*np.sqrt(np.abs(Pi_val))
    J = np.sum(Jval) 
    K1 = abs(d_i.values)/N
    K2 = np.sqrt(1/N*np.sum(d_i.values*d_i.values))
    K = np.sum(K1*K2)
    return J, K 

In [None]:
g_mean = []
g_wmean = [] # weighted mean
g_MAD = []
g_IQR = []
g_f60 = []
g_f70 = []
g_f80 = []
g_f90 = []
g_skew = []
g_kurtosis = []
g_welsh_K = []
g_welsh_J = []

r_mean = []
r_wmean = [] # weighted mean
r_MAD = []
r_IQR = []
r_f60 = []
r_f70 = []
r_f80 = []
r_f90 = []
r_skew = []
r_kurtosis = []
r_welsh_K = []
r_welsh_J = []

In [None]:
# lightcurves = {} # empty dictionary to hold dataframe for each light curve
circle_radius = 0.00028 # 1 arcsec = 0.00028 degress
t_format = "ipac_table"
table_format = "FORMAT=" + str(t_format)
flag_mask = 32768
mask = "BAD_CATFLAGS_MASK=" + str(flag_mask)
collect="COLLECTION="+"ztf_dr2"
numobs = "NOBS_MIN=20"
filter_band = "g"
label = []
SourceID =[]
start_time = time.time()
k = 0
for i in labeled_data_sampled.index:
    label.append(labeled_data_sampled.Type[i])
    SourceID.append(labeled_data_sampled.SourceID[i])
    ra = labeled_data_sampled.RAdeg[i]
    dec = labeled_data_sampled.DEdeg[i]
    circle = "POS=CIRCLE"+"+"+str(ra)+"+"+str(dec)+"+"+str(circle_radius)
    band = "BANDNAME="+ filter_band
    params = circle + "&" +  mask + "&" + numobs + "&" + collect + "&" + table_format
    
    url= "https://irsa.ipac.caltech.edu/cgi-bin/ZTF/nph_light_curves?" + params
    data = pd.read_csv(url, header=None, delim_whitespace=True, skiprows=55) # extract data
    header = pd.read_csv(url, header=None, sep='|', skiprows=50,usecols=range(1,25), nrows=1)
    data.columns = header.iloc[0].str.strip()
    lc = data
    
    dfg = lc.loc[lc["filtercode"] == "zg"]
    dfr = lc.loc[lc["filtercode"] == "zr"]
    
    if len(dfg) >0:
        N = len(dfg)
        wmean_temp = np.average(dfg.mag, weights = dfg.magerr*dfg.magerr)
#         wmean_temp = weighted_mean(dfg.mag,dfg.magerr)
        K_temp, J_temp =  welsh_staton(dfg.mag, wmean_temp )
        g_mean.append(dfg.mag.mean())
        g_wmean.append(wmean_temp) 
        deviation = abs(dfg.mag - dfg.mag.median())
        g_MAD.append(deviation.median())
        g_IQR.append(dfg.mag.quantile(0.75) - dfg.mag.quantile(0.25))
        g_f60.append(dfg.mag.quantile(0.80) - dfg.mag.quantile(0.2))
        g_f70.append(dfg.mag.quantile(0.85) - dfg.mag.quantile(0.15))
        g_f80.append(dfg.mag.quantile(0.9) - dfg.mag.quantile(0.10))
        g_f90.append(dfg.mag.quantile(0.95) - dfg.mag.quantile(0.05))
        g_skew.append(dfg.mag.skew())
        g_kurtosis.append(dfg.mag.kurtosis())
        g_welsh_J.append(J_temp)
        g_welsh_K.append(K_temp)
    else:
        g_mean.append(np.NaN)
        g_wmean.append(np.NaN) 
        g_MAD.append(np.NaN)
        g_IQR.append(np.NaN)
        g_f60.append(np.NaN)
        g_f70.append(np.NaN)
        g_f80.append(np.NaN)
        g_f90.append(np.NaN)
        g_skew.append(np.NaN)
        g_kurtosis.append(np.NaN)
        g_welsh_J.append(np.NaN)
        g_welsh_K.append(np.NaN)
        
    if len(dfr) >0:
        N = len(dfr)
#         wmean_temp = weighted_mean(dfr.mag,dfr.magerr)
        wmean_temp = np.average(dfr.mag, weights = dfr.magerr*dfr.magerr)
        K_temp, J_temp =  welsh_staton(dfr.mag, wmean_temp )
        r_mean.append(dfr.mag.mean())
        r_wmean.append(wmean_temp) 
        deviation = abs(dfr.mag - dfr.mag.median())
        r_MAD.append(deviation.median())
        r_IQR.append(dfr.mag.quantile(0.75) - dfr.mag.quantile(0.25))
        r_f60.append(dfr.mag.quantile(0.80) - dfr.mag.quantile(0.2))
        r_f70.append(dfr.mag.quantile(0.85) - dfr.mag.quantile(0.15))
        r_f80.append(dfr.mag.quantile(0.9) - dfr.mag.quantile(0.10))
        r_f90.append(dfr.mag.quantile(0.95) - dfr.mag.quantile(0.05))
        r_skew.append(dfr.mag.skew())
        r_kurtosis.append(dfr.mag.kurtosis())
        r_welsh_J.append(J_temp)
        r_welsh_K.append(K_temp)
    else:
        r_mean.append(np.NaN)
        r_wmean.append(np.NaN) 
        r_MAD.append(np.NaN)
        r_IQR.append(np.NaN)
        r_f60.append(np.NaN)
        r_f70.append(np.NaN)
        r_f80.append(np.NaN)
        r_f90.append(np.NaN)
        r_skew.append(np.NaN)
        r_kurtosis.append(np.NaN)
        r_welsh_J.append(np.NaN)
        r_welsh_K.append(np.NaN)
    k = k+1
    print(k)
end_time = time.time()# total time taken
print(f"Runtime of the program is {end_time - start_time}")


In [None]:
features = pd.DataFrame()
# g filter data
features['g_mean'] = g_mean
features['g_wmean'] = g_wmean
features['g_MAD'] = g_MAD
features['g_IQR'] = g_IQR
features['g_f60'] = g_f60
features['g_f70'] = g_f70
features['g_f80'] = g_f80
features['g_f90'] = g_f90
features['g_skew'] = g_skew
features['g_kurtosis'] = g_kurtosis
features['g_welsh_J'] = g_welsh_J
features['g_welsh_K'] = g_welsh_K

# r filter data
features['r_mean'] = r_mean
features['r_wmean'] = r_wmean
features['r_MAD'] = r_MAD
features['r_IQR'] = r_IQR
features['r_f60'] = r_f60
features['r_f70'] = r_f70
features['r_f80'] = r_f80
features['r_f90'] = r_f90
features['r_skew'] = r_skew
features['r_kurtosis'] = r_kurtosis
features['r_welsh_J'] = r_welsh_J
features['r_welsh_K'] = r_welsh_K

features['label'] = label

features['sourceid'] = SourceID
features.set_index('sourceid')

In [None]:
features.to_csv('features2.csv',index=False)

In [None]:
df = pd.read_csv('features2.csv', header=None, skiprows=1)
header = pd.read_csv('features2.csv', header=None, nrows=1)
df.columns = header.iloc[0]

In [None]:
df.head()