In [2]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as mp
import pandas as pd
import seaborn as sns
import markdown
#import scipy.stats as stats
import sys
sys.path.append('/Users/vs/Dropbox/Python')
sys.path.append('/Users/vs/Dropbox/Python/gloess/')
import shutil
import glob
import re
import os
import linecache


bigfontsize=20
labelfontsize=16
tickfontsize=16
sns.set_context('talk')
mp.rcParams.update({'font.size': bigfontsize,
                     'axes.labelsize':labelfontsize,
                     'xtick.labelsize':tickfontsize,
                     'ytick.labelsize':tickfontsize,
                     'legend.fontsize':tickfontsize,
                     })

Adding data to Gloess database files
---
* Notebook to add new data to gloess database files
* Using data from Monson et al (2012) as example


In [3]:
### Set working directory

os.chdir('/Users/vs/Dropbox/CHP/Galactic_Cepheids/')

In [4]:
colspecs = [(0,12), (13,24), (23,29), (30,35), (36,41), (42,47)]
mtr_df = pd.read_fwf('apj447280t3_mrt.txt', colspecs=colspecs, skiprows=20, header=None, names=('Cepheid', 'MJD', 'mag_3p6', 'err_3p6', 'mag_4p5', 'err_4p5') )

In [5]:
avs_df = pd.read_csv('apj447280t4_ascii.txt', skiprows=5, delim_whitespace=True, header=None, names=('Cep1', 'Cep2', 'logP', 'mag_3p6', 'err_3p6', 'mag_4p5', 'err_4p5', 'color', 'err_color'), skipfooter=3)

  if __name__ == '__main__':


In [6]:
#df.Year.str.cat(df.Quarter)
avs_df['cepID'] = avs_df.Cep1.str.cat(avs_df.Cep2)
avs_df = avs_df.drop(['Cep1', 'Cep2'], 1)

In [7]:
avs_df['cepID'].unique()

array(['SVul', 'GYSge', 'SVVul', 'UCar', 'ellCar', 'TMon', 'WZSgr',
       'RUSct', 'SZAql', 'YOph', 'CDCyg', 'XCyg', 'TTAql', 'V340Nor',
       'TWNor', 'zetaGem', 'betaDor', 'SNor', 'SSge', 'DLCas', 'UVul',
       'WSgr', 'etaAql', 'UAql', 'XSgr', 'USgr', 'V367Sct', 'YSgr', 'VCen',
       'CVMon', 'deltaCep', 'CEaCas', 'CFCas', 'CEbCas', 'FFAql', 'TVul',
       'RTAur'], dtype=object)

In [8]:
mtr_df

Unnamed: 0,Cepheid,MJD,mag_3p6,err_3p6,mag_4p5,err_4p5
0,{beta} Dor,55138.0119,1.809,0.019,1.793,0.010
1,{beta} Dor,55140.2499,1.779,0.021,1.819,0.004
2,{beta} Dor,55140.9730,1.805,0.018,1.864,0.006
3,{beta} Dor,55142.0659,1.876,0.019,1.939,0.007
4,{beta} Dor,55142.8811,1.933,0.009,1.969,0.008
5,{beta} Dor,55144.2599,1.959,0.018,1.953,0.013
6,{beta} Dor,55146.2149,1.890,0.016,1.872,0.009
7,{beta} Dor,55147.1152,1.823,0.020,1.805,0.005
8,{beta} Dor,55147.7998,1.810,0.021,1.791,0.010
9,{beta} Dor,55149.2967,1.763,0.020,1.782,0.010


In [9]:
### reformat shitty names
mtr_df['Cepheid'].unique()

mtr_df.Cepheid.ix[mtr_df['Cepheid'] == '{beta} Dor'] = 'beta Dor'
mtr_df.Cepheid.ix[mtr_df['Cepheid'] == '{delta} Cep'] = 'delta Cep'
mtr_df.Cepheid.ix[mtr_df['Cepheid'] == '{eta} Aql'] = 'eta Aql'
mtr_df.Cepheid.ix[mtr_df['Cepheid'] == '{zeta} Gem'] = 'zeta Gem'
mtr_df.Cepheid.ix[mtr_df['Cepheid'] == '{ell} Car'] = 'l Car'

avs_df.cepID.ix[avs_df['cepID'] == 'ellCar'] = 'lCar'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [10]:
unique_names = mtr_df['Cepheid'].unique()


In [11]:
unique_names

array(['beta Dor', 'CD Cyg', 'CEa Cas', 'CEb Cas', 'CF Cas', 'CV Mon',
       'delta Cep', 'DL Cas', 'eta Aql', 'FF Aql', 'GY Sge', 'RT Aur',
       'RU Sct', 'SV Vul', 'SZ Aql', 'S Nor', 'S Sge', 'S Vul', 'TT Aql',
       'TW Nor', 'T Mon', 'T Vul', 'U Aql', 'U Car', 'U Sgr', 'U Vul',
       'V340 Nor', 'V367 Sct', 'V Cen', 'WZ Sgr', 'W Sgr', 'X Cyg',
       'X Sgr', 'Y Oph', 'Y Sgr', 'zeta Gem', 'l Car'], dtype=object)

In [12]:
def gloess_setup(cepheid):
    mag_columns = []
    err_columns = []
    cols = ['MJD']
    bands = ['U', 'B', 'V', 'R', 'I', 'J', 'H', 'K', '3p6', '4p5', '5p8', '8p0']
    for wlen in np.arange(len(bands)):
        mag_name = ('mag_' + str(bands[wlen]))
        err_name = ('err_' + str(bands[wlen]))
        mag_columns.append(mag_name)
        err_columns.append(err_name)
        cols.append(mag_name)
        cols.append(err_name)
    
    cols.append('Reference')

    cap_cepheid = str.upper(cepheid)
    orig_file = '/Users/vs/Dropbox/All_Cepheids_ever/MilkyWay/cepheids/' + cap_cepheid
    cepID = re.sub(' ', '', cepheid)
    new_file  = cepID + '.gloess_in'
    old_file = os.path.exists(orig_file)
    if (old_file == False):
        period = 10**(avs_df.logP[avs_df['cepID']==cepID].values)
        smooth = '0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10'
        with open(new_file, 'w') as f:
            f.write("{0:s}\n".format(cepheid))
            f.write("{0:s}\n".format(period))
            f.write("0\n")
            f.write("{0:s}\n".format(smooth))

    is_there = os.path.exists(new_file)
    if (is_there == False):
        shutil.copy(orig_file, new_file)
    linecache.clearcache()
    smooth_line = linecache.getline(new_file, 4)
    smooth = smooth_line.split()
    df = pd.read_csv(new_file, header=None, skiprows=4, names=(cols), comment='-', delim_whitespace=True)
    return(cepID, df, smooth)


In [13]:
def add_to_file(cepheid):
    ### select rows from df
    new_df = mtr_df[mtr_df['Cepheid']==cepheid]
    new_df = new_df.drop(['Cepheid'],1)
    cepID, orig_df, smooth = gloess_setup(cepheid)
    complete_df = pd.concat([orig_df, new_df], ignore_index=True)
    outfile = cepID + '.gloess_in'
    period_line = linecache.getline(outfile, 3)
    period = period_line.split()
    
    printcols = ['MJD']
    bands = ['U', 'B', 'V', 'R', 'I', 'J', 'H', 'K', '3p6', '4p5', '5p8', '8p0']
    for wlen in np.arange(len(bands)):
        mag_name = ('mag_' + str(bands[wlen]))
        err_name = ('err_' + str(bands[wlen]))
        printcols.append(mag_name)
        printcols.append(err_name)
    printcols.append('Reference')
    
    with open(outfile, 'w') as f:
        f.write("{0:s}\n".format(cepheid))
        f.write("{0:s}\n".format(period))
        f.write("{0:d}\n".format(len(complete_df)))
        f.write("{0:s}\n".format(smooth))
    complete_df.to_csv(outfile, na_rep= 99.99, float_format='%3.4f', header=None, index=False, mode='a', sep=' ', columns=printcols)
    return(complete_df)

In [15]:
#gal_df.apply(lambda line: all_the_gloess(line), axis=1)
for cepheids in np.arange(len(unique_names)):
    add_to_file(unique_names[cepheids])

In [223]:
rtaur_df = add_to_file('RT Aur')

In [224]:
cepheid = 'beta Dor'
new_df = mtr_df[mtr_df['Cepheid']==cepheid]

In [151]:
avs_df.logP[avs_df['cepID']=='betaDor'].values

array([ 0.993])

In [138]:
unique_avs_names

array(['SVul', 'GYSge', 'SVVul', 'UCar', 'lCar', 'TMon', 'WZSgr', 'RUSct',
       'SZAql', 'YOph', 'CDCyg', 'XCyg', 'TTAql', 'V340Nor', 'TWNor',
       'zetaGem', 'betaDor', 'SNor', 'SSge', 'DLCas', 'UVul', 'WSgr',
       'etaAql', 'UAql', 'XSgr', 'USgr', 'V367Sct', 'YSgr', 'VCen',
       'CVMon', 'deltaCep', 'CEaCas', 'CFCas', 'CEbCas', 'FFAql', 'TVul',
       'RTAur'], dtype=object)

In [174]:
rtaur_df

Unnamed: 0,MJD,Reference,err_3p6,err_4p5,err_5p8,err_8p0,err_B,err_H,err_I,err_J,...,mag_5p8,mag_8p0,mag_B,mag_H,mag_I,mag_J,mag_K,mag_R,mag_U,mag_V
0,55147.129,99.99,99.99,99.99,99.99,99.99,99.99,3.86,99.99,99.99,...,99.99,99.99,0.006,99.99,99.99,99.99,3.87,99.99,99.99,99.99
1,55149.24,99.99,99.99,99.99,99.99,99.99,99.99,3.79,99.99,99.99,...,99.99,99.99,0.005,99.99,99.99,99.99,3.77,99.99,99.99,99.99
2,55150.307,99.99,99.99,99.99,99.99,99.99,99.99,3.81,99.99,99.99,...,99.99,99.99,0.007,99.99,99.99,99.99,3.81,99.99,99.99,99.99
3,55150.853,99.99,99.99,99.99,99.99,99.99,99.99,3.86,99.99,99.99,...,99.99,99.99,0.007,99.99,99.99,99.99,3.87,99.99,99.99,99.99
4,55152.332,99.99,99.99,99.99,99.99,99.99,99.99,3.84,99.99,99.99,...,99.99,99.99,0.006,99.99,99.99,99.99,3.82,99.99,99.99,99.99
5,55153.447,99.99,99.99,99.99,99.99,99.99,99.99,3.79,99.99,99.99,...,99.99,99.99,0.008,99.99,99.99,99.99,3.78,99.99,99.99,99.99
6,55154.7,99.99,99.99,99.99,99.99,99.99,99.99,3.88,99.99,99.99,...,99.99,99.99,0.007,99.99,99.99,99.99,3.88,99.99,99.99,99.99
7,55156.478,99.99,99.99,99.99,99.99,99.99,99.99,3.79,99.99,99.99,...,99.99,99.99,0.007,99.99,99.99,99.99,3.78,99.99,99.99,99.99
8,55157.812,99.99,99.99,99.99,99.99,99.99,99.99,3.81,99.99,99.99,...,99.99,99.99,0.006,99.99,99.99,99.99,3.82,99.99,99.99,99.99
9,55158.969,99.99,99.99,99.99,99.99,99.99,99.99,3.95,99.99,99.99,...,99.99,99.99,0.008,99.99,99.99,99.99,3.95,99.99,99.99,99.99


In [178]:
    cols = ['MJD']
    bands = ['U', 'B', 'V', 'R', 'I', 'J', 'H', 'K', '3p6', '4p5', '5p8', '8p0']
    for wlen in np.arange(len(bands)):
        mag_name = ('mag_' + str(bands[wlen]))
        err_name = ('err_' + str(bands[wlen]))
        cols.append(mag_name)
        cols.append(err_name)
cols

['MJD',
 'mag_U',
 'err_U',
 'mag_B',
 'err_B',
 'mag_V',
 'err_V',
 'mag_R',
 'err_R',
 'mag_I',
 'err_I',
 'mag_J',
 'err_J',
 'mag_H',
 'err_H',
 'mag_K',
 'err_K',
 'mag_3p6',
 'err_3p6',
 'mag_4p5',
 'err_4p5',
 'mag_5p8',
 'err_5p8',
 'mag_8p0',
 'err_8p0']