In [None]:
import pickle
import random
import os, sys, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.table import Table

import dask
import dask.dataframe as dd

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
pd.options.mode.chained_assignment = None
font = {'family': 'serif',
        'color':  'darkred',
        'weight': 'normal',
        'size': 16,
        }

In [None]:
dat = Table.read('lae_matched', format='fits') #Importing the SC4K table crossmatched with COSMOS2020 to integretate BB photometry
laes = dat.to_pandas()
laes.reset_index()
laes

In [None]:
redshift = []
for k in range(len(laes)): #Grouping the SC4K sources by redshift following Sobral et. al 2018
    test=laes.iloc[k]['col4']
    if test==b'NB392':
        l=2.2
        redshift.append(l)
        continue
    if test==b'NB501':
        l=3.1
        redshift.append(l)
        continue
    if test==b'NB711':
        l=4.8
        redshift.append(l)
        continue
    if test==b'NB816':
        l=5.7
        redshift.append(l)
        continue
    if test==b'IA427':
        l=2.5
        redshift.append(l)
        continue
    if test==b'IA464':
        l=2.81
        redshift.append(l)
        continue
    if test==b'IA484':
        l=2.98
        redshift.append(l)
        continue
    if test==b'IA505':
        l=3.16
        redshift.append(l)
        continue
    if test==b'IA527':
        l=3.33
        redshift.append(l)
        continue
    if test==b'IA574':
        l=3.74
        redshift.append(l)
        continue
    if test==b'IA624':
        l=4.12
        redshift.append(l)
        continue
    if test==b'IA679':
        l=4.58
        redshift.append(l)
        continue
    if test==b'IA709':
        l=4.82
        redshift.append(l)
        continue
    if test==b'IA738':
        l=5.05
        redshift.append(l)
        continue
    if test==b'IA767':
        l=5.32
        redshift.append(l)
        continue
    if test==b'IA827':
        l=5.78
        redshift.append(l)
        continue
laes['Redshift']=redshift

In [None]:
laes.to_pickle('SC4K_COSMOS') 

In [None]:
dat = Table.read('COSMOS2020_limited_redshift.fits') #Importing the COSMOS2020 sample already restricted at z = 2-6
cosmos = dat.to_pandas()
k=laes["ID"].to_numpy()
mask = ~cosmos['ID'].isin(k) #Removing the SC4K sources from COSMOS2020
nolae = cosmos[mask]
nolae.reset_index()
nolae.to_pickle('NEW_COSMOS') 

### The following two cells are just plots comparing the LAE vs nLAE redshift and i-band distribution, to be used in the 2nd notebook for creating the training/testing samples

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

bin_edges = np.linspace(min(min(nolae["lp_zPDF"]), min(laes["Redshift"])),
                        max(max(nolae["lp_zPDF"]), max(laes["Redshift"])), 11)

plt.figure(figsize=(10, 6))

plt.hist(x=np.array(nolae["lp_zPDF"]), bins=bin_edges, color='r',
         alpha=0.7, rwidth=0.85, density=True, label='Non-LAE', edgecolor='black')  
plt.hist(x=np.array(laes["Redshift"]), bins=bin_edges, color='b',
         alpha=0.7, rwidth=0.85, density=True, label='LAE', edgecolor='black')        

plt.grid(axis='y', alpha=0.5, linestyle='--', linewidth=0.7)
plt.xlabel('Redshift', fontsize=12)
plt.ylabel('Relative Frequency', fontsize=12)
plt.title('Redshift Distribution - Non-LAE and LAE Samples', fontsize=14)
plt.legend(fontsize=10)
plt.tick_params(axis='both', which='major', labelsize=10)
plt.tight_layout()

plt.show()

In [None]:
plt.hist(x=np.array(nolae["lp_zBEST"]), bins=8, color='b',
                            alpha=0.7, rwidth=0.85)   
plt.grid(axis='y', alpha=0.75)
plt.xlabel('Redshift')
plt.ylabel('Frequency')
plt.title('Redshift Distribution Non-LAE Sample')
plt.xlim([0, 8])
plt.legend()
plt.grid(False)
plt.savefig('Redshift Dist Non-LAE Sample.eps',format='eps')