# URL fetcher
---
This notebook grabs the URLs from which the images can be downloaded.

It is to be run on the Astro Data Lab (https://datalab.noirlab.edu/) 's Jupyter Notebook interface

### Preamble

In [1]:
# Imports
from getpass import getpass
import numpy as np
import dl
from dl import authClient
from pyvo.dal import sia
from tqdm import tqdm
import pandas as pd

In [2]:
# Data source
DEF_ACCESS_URL = "https://datalab.noirlab.edu/sia/des_dr2" # DES DR2 SIA service URL
svc = sia.SIAService(DEF_ACCESS_URL)

In [None]:
# Signin
token = dl.authClient.login(input("Enter user name: "),getpass("Enter password: "))
dl.authClient.whoAmI()

### Main Code

In [4]:
def url_to_band(access_url):
    """
    For a given URL, this function extracts the band (g/r/i/z/Y),
    taking advantage of the SIA's filename convention
    """
    return access_url.split('.fits.fz')[0][-1]

In [9]:
fov_as = 7.5        # Field of view in arcsec
fov = fov_as / 3600 # ...and in degrees

csv_path = 'objs_2910.csv' # .csv file containing RA, Dec of 2e5 objects selected
                           #    by SQL query in selecting_data/loose_cuts.txt
df = pd.read_csv(csv_path, index_col=0)
df_small = df[['t1_ra','t1_dec']]          # Only care about objects' RA and Dec

list_out = []                              # Will be a list of lists of image download URLs

In [11]:
for index, row in tqdm(df_small.iterrows(), total = len(df_small)): # Iterating over 2e5 objects
    ra = row['t1_ra']
    dec= row['t1_dec']
    fov_2d = (fov/np.cos(dec*np.pi/180), # Converting the FoV to 2 dimensions on the celestial sphere
              fov)
    
    img_table = svc.search((ra, dec),
                           fov_2d,
                           verbosity=2).to_table() # Yields a table of files pertaining to this object,
                                                   #  many of which (e.g. masks) are not needed/relevant
        
    obj_urls = [] # To store the URLs for this object
    for url_row in img_table:
        access_url = url_row['access_url']
        if ('extn=1' in access_url) & ('nobkg' not in access_url) & ('_det.fits' not in access_url): # Relevant URLs
            obj_urls.append(access_url + '?ID='+str(index) + ',BAND=' + str(url_to_band(access_url)))
            
    if len(obj_urls) == 5: # Checking nothing has gone wrong (e.g. files missing on the server,
                           #                                       too many files due to object being on edge of tile)
                           # obj_urls then contains 1 URL each for g,r,i,z,Y bands
        list_out += obj_urls # Adding to the end of the big list of URLs

100%|██████████| 2910/2910 [04:40<00:00, 10.39it/s]


In [12]:
np_out = np.array(list_out) # Converting to np array so that it can be written out easily

In [13]:
np.savetxt('img_url_list.txt', np_out, fmt='%s') # Saving big list of URLs, ~2MB