In [64]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.time import Time
import glob
import aplpy
import sqlite3 
import gzip 
import io
import os

import copy
import json

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

ARCHIVAL_DIR = '/epyc/data/ztf/alerts/'

pd.options.display.max_columns = 150

In [95]:
from os import listdir
from os.path import isfile, join
data_files = [f for f in listdir(ARCHIVAL_DIR+'public/') if isfile(join(ARCHIVAL_DIR+'public/', f))]
csv_ts = [f.split('public_')[-1].split('.')[0][:8] for f in data_files]
alert_ts = list(set(csv_ts).remove(''))

In [99]:
set(csv_ts).remove('')

KeyError: '.'

## Create test dataset

In [66]:
conn = sqlite3.connect('/astro/users/ykwang/data/sqlite_archival.db')
df = pd.read_sql_query("SELECT * FROM ZTF_objects", conn).drop_duplicates()
df.columns = ['ztf_object_id', 'SIMBAD_otype', 'ra', 'dec', 'ROSAT_IAU_NAME',
       'SIMBAD_include', 'last_obs', 'seen_flag', 'interest_flag', 'notes']
len(df['ztf_object_id'].unique())

56000

In [67]:
df = df.loc[df['SIMBAD_include'] == 1].sort_values('last_obs',ascending=False).drop_duplicates(subset=['ztf_object_id'])

In [72]:
with open('archival_sources.txt', 'w') as f:
    for oid in df['ztf_object_id'].unique():
        f.write(f'{oid}\n')

In [52]:
cur = conn.cursor()
cur.execute("SELECT * FROM lightcurves")
dflc = pd.DataFrame(cur.fetchall()).drop_duplicates()
dflc.columns = ['ztf_object_id', 'jd', 'fid', 'magpsf', 'sigmapsf', 'diffmaglim', 'isdiffpos', 'magnr', 'sigmagnr', 'field', 'rcid']
dflc[['jd', 'magpsf', 'sigmapsf', 'diffmaglim', 'magnr', 'sigmagnr', 'field', 'rcid']] = dflc[['jd', 'magpsf', 'sigmapsf', 'diffmaglim', 'magnr', 'sigmagnr', 'field', 'rcid']].astype(float)
dflc['fid'] = dflc['fid'].astype(int)

In [53]:
len(dflc)

3099068

In [54]:
len(dflc['ztf_object_id'].unique())

52361

In [55]:
# df = df.query(f'last_obs > {now_jd - day_thres}')
dflc = dflc.loc[dflc['ztf_object_id'].isin(df['ztf_object_id'])]

In [56]:
%%time
dflc['sign'] = 2* (dflc['isdiffpos'] == 't') - 1

u = 10**(-0.4*dflc['magnr']) + dflc['sign'] * 10**(-0.4*dflc['magpsf'])
dflc['dc_mag'] = -2.5 * np.log10(u)
dflc['dc_sigmag'] = np.sqrt(
    (10**(-0.4*dflc['magnr'])* dflc['sigmagnr']) **2. + 
    (10**(-0.4*dflc['magpsf']) * dflc['sigmapsf'])**2.) / u
dflc['dc_mag_ulim'] = -2.5 * np.log10(10**(-0.4*dflc['magnr']) + 10**(-0.4*dflc['diffmaglim']))
dflc['dc_mag_llim'] = -2.5 * np.log10(10**(-0.4*dflc['magnr']) - 10**(-0.4*dflc['diffmaglim']))

CPU times: user 530 ms, sys: 14.8 ms, total: 545 ms
Wall time: 578 ms


In [57]:
top_oids = dflc.ztf_object_id.value_counts().index[:100]

In [58]:
data = dflc.loc[dflc['ztf_object_id'].isin(top_oids[:10])]

In [59]:
%%time
data['utc'] = pd.to_datetime(data['jd'], unit='D', origin='julian')
data.set_index(['ztf_object_id', 'fid'], inplace=True)
data = data.sort_values('utc').sort_index()

CPU times: user 239 ms, sys: 35.7 ms, total: 275 ms
Wall time: 424 ms


## Create von neuman metric

In [60]:
vn = data.groupby(["ztf_object_id", "fid"])['dc_mag'].apply(lambda x: (x[1:] - x[:-1])**2 / ((len(x) - 1)*x.std()))



In [61]:
vn

ztf_object_id  fid  ztf_object_id  fid
ZTF17aaaikoz   1    ZTF17aaaikoz   1      0.000257
                                   1      0.003801
                                   1      0.000002
                                   1      0.007117
                                   1      0.000119
                                            ...   
ZTF18abhhnjk   3    ZTF18abhhnjk   3      0.000011
                                   3      0.001363
                                   3      0.000769
                                   3      0.000505
                                   3      0.001257
Name: dc_mag, Length: 14263, dtype: float64

In [None]:
%time
data['expd_mean']=data.groupby(["ztf_object_id", "fid"])['dc_mag'].apply(lambda x: x.expanding().apply(np.mean))
data['expd_std']=data.groupby(["ztf_object_id", "fid"])['dc_mag'].apply(lambda x: x.expanding().apply(np.std))

In [63]:
test = data.groupby(["ztf_object_id", "fid"])[['dc_mag', 'expd_mean']].apply(lambda x: print(((x['expd_mean'] - x['dc_mag'])>0).rolling(4).sum()))



ztf_object_id  fid
ZTF17aaaikoz   1      NaN
               1      NaN
               1      NaN
               1      3.0
               1      3.0
                     ... 
               1      3.0
               1      3.0
               1      2.0
               1      2.0
               1      3.0
Length: 353, dtype: float64
ztf_object_id  fid
ZTF17aaaikoz   2      NaN
               2      NaN
               2      NaN
               2      2.0
               2      3.0
                     ... 
               2      3.0
               2      3.0
               2      3.0
               2      4.0
               2      4.0
Length: 877, dtype: float64
ztf_object_id  fid
ZTF17aaaikoz   3      NaN
               3      NaN
               3      NaN
               3      1.0
               3      2.0
                     ... 
               3      4.0
               3      4.0
               3      4.0
               3      4.0
               3      3.0
Length: 74, dtype: float64
zt

In [38]:
data.groupby(["ztf_object_id", "fid"])[['dc_mag', 'expd_mean']].apply(lambda x: x.rolling(4).apply(lambda y: print(y)))


            dc_mag  expd_mean
11391    17.274313  17.274313
11392    17.340516  17.307414
14567    17.156626  17.257152
17758    18.042777  17.453558
38207    15.550437  17.072934
...            ...        ...
4685766  17.837621  16.793689
4698986  17.965584  16.797076
4699123  17.871179  16.800171
4699736  17.261071  16.801496
4699833  16.240828  16.799889

[353 rows x 2 columns]
            dc_mag  expd_mean
9047     15.371219  15.371219
11328    15.559349  15.465284
14675    15.610381  15.513650
16469    15.236446  15.444349
16473    15.345055  15.424490
...            ...        ...
4676186  17.252072  17.145695
4684901  16.326447  17.144749
4685243  16.376873  17.143863
4685284  17.424926  17.144187
4699571  16.468059  17.143409

[877 rows x 2 columns]
            dc_mag  expd_mean
2577721  16.604675  16.604675
2634917  16.094359  16.349517
2812398  16.735582  16.478206
2812401  16.703859  16.534619
2853183  17.085467  16.644789
...            ...        ...
4676129  16.935356  16

In [31]:
len(data)

14292

## Length of outburst