# reading tables from berg 2012

[link](https://iopscience.iop.org/article/10.1088/0004-637X/754/2/98#apj433966t4)

In [119]:
import numpy as np
import pandas as pd
import astropy

# Table 1

In [130]:
raw_df = pd.read_csv('data/apj433966t1_ascii.txt', header = None, skipfooter = 6, skiprows = 6, sep = '\t+')
columns = ['Galaxy', 'RA', 'DEC', 'F_4.5', 'F_K_s', 'D', 'Ref', 'M_B', 'M_4.5', 'M_K_s', 'B-V', 'log(L_4.5)', 'log(M_star)']
raw_df.columns = columns

  raw_df = pd.read_csv('data/apj433966t1_ascii.txt', header = None, skipfooter = 6, skiprows = 6, sep = '\t+')


In [131]:
clean_df = raw_df[['Galaxy', 'RA', 'DEC']].copy()

cols_to_split = ['F_K_s']

#first, for cols w upper limits 
for key in cols_to_split:
    temp_df = raw_df[key].str.split(' ', expand = True).copy()
    
    value_temp_df = temp_df[0].str.split('<', expand = True).replace(to_replace='', value=np.nan).astype(float)
    clean_df[key] = value_temp_df[0]
    ul_key = f'{key}_upper_limit'
    clean_df[ul_key] = value_temp_df[1]

    err_key = f'{key}_err'
    clean_df[err_key] = temp_df[2].replace(to_replace='', value=np.nan).astype(float)

cols_to_split = ['D', 'M_K_s', 'log(L_4.5)', 'log(M_star)']

#then, for cols without upper limits 
for key in cols_to_split:
    temp_df = raw_df[key].str.split(' ', expand = True).copy()
    
    clean_df[key] = temp_df[0].replace(to_replace='', value=np.nan).astype(float)

    err_key = f'{key}_err'
    clean_df[err_key] = temp_df[2].replace(to_replace='', value=np.nan).astype(float)


cols_to_split = ['M_B', 'M_4.5']

#then, for cols with random appended strings in err cols (refs) 
for key in cols_to_split:
    temp_df = raw_df[key].str.split(' ', expand = True).copy()
    
    clean_df[key] = temp_df[0].astype(float)

    err_key = f'{key}_err'
    err_temp_df = temp_df[2].str.split('^', expand = True)
    clean_df[err_key] = err_temp_df[0].replace(to_replace='', value=np.nan).astype(float)

cols_to_split = ['B-V']

#then, for cols with random appended strings in value cols (refs) 
for key in cols_to_split:
    temp_df = raw_df[key].str.split(' ', expand = True).copy()
    value_temp_df = temp_df[0].str.split('^', expand = True)
    clean_df[key] = value_temp_df[0].replace(to_replace='', value=np.nan).astype(float)

    err_key = f'{key}_err'
    err_temp_df = temp_df[2].str.split('^', expand = True)
    clean_df[err_key] = err_temp_df[0].replace(to_replace='', value=np.nan).astype(float)



Fix RA + DEC to decimal degrees 

In [132]:
from astropy.coordinates import SkyCoord
import astropy.units as u 

In [142]:
sc = SkyCoord(clean_df['RA'], clean_df['DEC'], unit=(u.hourangle, u.deg))
clean_df['RA'] = sc.ra.value
clean_df['DEC'] = sc.dec.value

In [143]:
clean_df.to_csv('cleaned/table1.csv', index=False)

# Table 3

In [110]:
raw_df = pd.read_csv('data/apj433966t3_ascii.txt', header = None, skipfooter = 1, skiprows = 8, sep = '\t+')
columns = ['Ion', 'UGC 521 A',  'UGC 695 E', 'UGC 1056 A', 'UGC 1056 B', 'UGC 1176 A', 
           'NGC 784 A', 'NGC 784 B', 'UGC 2716 A', 'KKH 037 A', 'NGC 2537 A']
raw_df.columns = columns


  raw_df = pd.read_csv('data/apj433966t3_ascii.txt', header = None, skipfooter = 1, skiprows = 8, sep = '\t+')


In [116]:
clean_df = raw_df[['Ion']].copy()

# split +/-
for key in columns[1:]:
    temp_df = raw_df[key].str.split(' ', expand = True).copy()
    
    clean_df[key] = temp_df[0].replace(to_replace='', value=np.nan).astype(float)

    err_key = f'{key} err'
    clean_df[err_key] = temp_df[2].replace(to_replace='', value=np.nan).astype(float)


In [118]:
clean_df.to_csv('cleaned/table3_test.csv', index=False)

# Table 4

In [145]:
raw_df = pd.read_csv('data/apj433966t4_ascii.txt', header = None, skipfooter = 1, skiprows = 6, sep = '\t+')
columns = ['Galaxy','Halpha Region','t_2', 't_3', 'O+/H+', 'O++/H+', 'O/H', '12 + log(O/H)', 'N+/H+', 'log(N/O)', 'N/H']
raw_df.columns = columns

  raw_df = pd.read_csv('data/apj433966t4_ascii.txt', header = None, skipfooter = 1, skiprows = 6, sep = '\t+')


In [151]:
clean_df = raw_df[['Galaxy', 'Halpha Region']].copy()

# split +/-
for key in columns[2:]:
    temp_df = raw_df[key].str.split(' ', expand = True).copy()
    
    clean_df[key] = temp_df[0].replace(to_replace='', value=np.nan).astype(float)

    err_key = f'{key}_err'
    clean_df[err_key] = temp_df[2].replace(to_replace='', value=np.nan).astype(float)

In [190]:
table1 = pd.read_csv('cleaned/table1.csv')
clean_df_with_mass = clean_df.set_index('Galaxy').join(table1[['Galaxy', 'log(M_star)', 'log(M_star)_err']].set_index('Galaxy'), how='left')
clean_df_with_mass.reset_index(inplace=True)

In [194]:
clean_df_with_mass.to_csv('cleaned/table4_with_mass.csv', index=False)