## This notebook collects light curves of each variable type from Xiaodian's ZTF labeled dataset

In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
import sqlalchemy as db

Xiaodian's ZTF labeled dataset feature and label table

In [2]:
widths = (8,7,4,13,43)
header_pd = pd.read_fwf('../databases/Labeled_data.txt', widths = widths,skiprows=7, nrows=27)
labeled_data = pd.read_csv('../databases/Labeled_data.txt', header=None, delim_whitespace=True, skiprows=36) # extract data
labeled_data.columns = header_pd.iloc[:,3]
labeled_data.groupby(['Type']).count()

-------------,ID,SourceID,RAdeg,DEdeg,Per,R21,phi21,T0,gmag,rmag,...,phi21_g,phi21_r,R2_g,R2_r,Amp_g,Amp_r,log(FAP_g),log(FAP_r),Dmin_g,Dmin_r
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BYDra,84697,84697,84697,84697,84697,84697,84697,84697,84697,84697,...,84697,84697,84697,84697,84697,84697,84697,84697,84697,84697
CEP,1262,1262,1262,1262,1262,1262,1262,1262,1262,1262,...,1262,1262,1262,1262,1262,1262,1262,1262,1262,1262
CEPII,358,358,358,358,358,358,358,358,358,358,...,358,358,358,358,358,358,358,358,358,358
DSCT,16709,16709,16709,16709,16709,16709,16709,16709,16709,16709,...,16709,16709,16709,16709,16709,16709,16709,16709,16709,16709
EA,49943,49943,49943,49943,49943,49943,49943,49943,49943,49943,...,49943,49943,49943,49943,49943,49943,49943,49943,49943,49943
EW,369707,369707,369707,369707,369707,369707,369707,369707,369707,369707,...,369707,369707,369707,369707,369707,369707,369707,369707,369707,369707
Mira,11879,11879,11879,11879,11879,11879,11879,11879,11879,11879,...,11879,11879,11879,11879,11879,11879,11879,11879,11879,11879
RR,32518,32518,32518,32518,32518,32518,32518,32518,32518,32518,...,32518,32518,32518,32518,32518,32518,32518,32518,32518,32518
RRc,13875,13875,13875,13875,13875,13875,13875,13875,13875,13875,...,13875,13875,13875,13875,13875,13875,13875,13875,13875,13875
RSCVN,81393,81393,81393,81393,81393,81393,81393,81393,81393,81393,...,81393,81393,81393,81393,81393,81393,81393,81393,81393,81393


In [3]:
label = labeled_data.Type.unique()
label = np.delete(label, np.where(label == 'CEPII')) # CEPII has very few example so we ignore it
label = np.delete(label, np.where(label == 'CEP')) # CEPII has very few example so we ignore it

print(label)

['BYDra' 'EW' 'SR' 'RSCVN' 'RR' 'DSCT' 'EA' 'Mira' 'RRc']


Download light curve csv file for fetaure and label row for 2000 of each label type. Store lightcurves in an SQLite database

In [5]:
engine = db.create_engine('sqlite:///../databases/save_LC_2000each.db', echo=False)
sqlite_connection = engine.connect()
for variable_type in label:
    print(str(variable_type))
    df = labeled_data[labeled_data['Type']==variable_type]
    print(len(df))
    for i in range(0,2):
        k = df.index[i]
        url = 'http://variables.cn:88/seldataz.php?SourceID=' + str(df.SourceID[k])   
        try:
            lc = pd.read_csv(url, header='infer')
            lc['Type'] = variable_type
            lc['ID'] = labeled_data.ID[k]
        except:
            continue
        sqlite_table = labeled_data.ID[k]
        lc.to_sql(sqlite_table, sqlite_connection, if_exists='replace')
        print(i)

BYDra
84697
0
1
EW
369707
0
1
SR
119261
0
1
RSCVN
81393
0
1
RR
32518
0
1
DSCT
16709
0
1
EA
49943
0
1
Mira
11879
0
1
RRc
13875
0
1


In [6]:
print("end")

end
