In [239]:
# give access to importing dwarfz
import os, sys
dwarfz_package_dir = os.getcwd().split("dwarfz")[0]
if dwarfz_package_dir not in sys.path:
    sys.path.insert(0, dwarfz_package_dir)

import dwarfz
    
# back to regular import statements

%matplotlib inline
from matplotlib import pyplot as plt

import seaborn as sns
sns.set(context="poster", style="ticks", font_scale=1.4)

import numpy as np
import pandas as pd
import glob
import shutil

from scipy.special import expit
import pathlib

import send2trash

In [21]:
COSMOS_filename = pathlib.Path(dwarfz.data_dir_default) / "COSMOS_reference.sqlite"
COSMOS = dwarfz.datasets.COSMOS(COSMOS_filename)

In [22]:
HSC_filename = pathlib.Path(dwarfz.data_dir_default) / "HSC_COSMOS_median_forced.sqlite3"
HSC = dwarfz.datasets.HSC(HSC_filename)

In [23]:
matches_filename = pathlib.Path(dwarfz.data_dir_default) / "matches.sqlite3"
matches_df = dwarfz.matching.Matches.load_from_filename(matches_filename)

In [5]:
combined = matches_df[matches_df.match].copy()
combined["ra"]       = COSMOS.df.loc[combined.index].ra
combined["dec"]      = COSMOS.df.loc[combined.index].dec
combined["photo_z"]  = COSMOS.df.loc[combined.index].photo_z
combined["log_mass"] = COSMOS.df.loc[combined.index].mass_med

photometry_cols = [
    "gcmodel_flux","gcmodel_flux_err","gcmodel_flux_flags", "gcmodel_mag",
    "rcmodel_flux","rcmodel_flux_err","rcmodel_flux_flags", "rcmodel_mag",
    "icmodel_flux","icmodel_flux_err","icmodel_flux_flags", "icmodel_mag",
    "zcmodel_flux","zcmodel_flux_err","zcmodel_flux_flags", "zcmodel_mag",
    "ycmodel_flux","ycmodel_flux_err","ycmodel_flux_flags", "ycmodel_mag",
]

for col in photometry_cols:
    combined[col] = HSC.df.loc[combined.catalog_2_ids][col].values

## Turn magnitudes into colors

In [6]:
combined["g_minus_r"] = combined.gcmodel_mag - combined.rcmodel_mag
combined["r_minus_i"] = combined.rcmodel_mag - combined.icmodel_mag
combined["i_minus_z"] = combined.icmodel_mag - combined.zcmodel_mag
combined["z_minus_y"] = combined.zcmodel_mag - combined.ycmodel_mag

## Filter out bad data

In [7]:
mask =    np.isfinite(combined["g_minus_r"]) & np.isfinite(combined["r_minus_i"]) \
        & np.isfinite(combined["i_minus_z"]) & np.isfinite(combined["z_minus_y"]) \
        & np.isfinite(combined["icmodel_mag"]) \
        & (~combined.gcmodel_flux_flags) & (~combined.rcmodel_flux_flags) \
        & (~combined.icmodel_flux_flags) & (~combined.zcmodel_flux_flags) \
        & (~combined.ycmodel_flux_flags)

combined = combined[mask]

# Create classification labels

In [9]:
low_z    = (combined.photo_z  < .15)
low_mass = (combined.log_mass > 8) & (combined.log_mass < 9)

In [10]:
combined["low_z_low_mass"] = (low_z & low_mass)
combined.low_z_low_mass.mean()

0.002185072976027315

# Load the IDs of the narrowband population

In [13]:
target_ids = pd.read_csv("../narrowband_deconfusion/target_galaxies-HSC_ids.csv")
target_ids.head()

Unnamed: 0,COSMOS_id,HSC_id
0,219965,43158447025312151
1,220286,43158584464277542
2,220319,43158584464268517
3,220421,43158447025292549
4,220691,43158584464268544


In [14]:
contaminant_ids = pd.read_csv("../narrowband_deconfusion/contaminant_galaxies-HSC_ids.csv")
contaminant_ids.head()

Unnamed: 0,COSMOS_id,HSC_id
0,220029,43158584464246406
1,220142,43158859342153228
2,220169,43158859342173270
3,220274,43158447025298878
4,220476,43158721903220831


# Setup locations of images

In [264]:
targets_path = pathlib.Path.home() / "dwarfz" \
                                   / "galaxies_narrowband" \
                                   / "target"
targets_path.mkdir(parents=True, exist_ok=True)

In [277]:
contaminants_path = pathlib.Path.home() / "dwarfz" \
                                        / "galaxies_narrowband" \
                                        / "contaminant"
contaminants_path.mkdir(parents=True, exist_ok=True)

# Copy over pre-downloaded images

In [199]:
google_dir = pathlib.Path.home() / "Google Drive" \
                                 / "HSC_COSMOS_galaxies" \
                                 / "galaxies"

predownloaded_ids = {int(image_dir.name)
                     for image_dir in google_dir.iterdir()
                     if image_dir.is_dir()}

In [266]:
i = 0

for (_, _, HSC_id) in target_ids.itertuples():
    if HSC_id in predownloaded_ids:
        image_dir = google_dir / str(HSC_id)
        new_dir = targets_path / image_dir.name
        if not new_dir.is_dir():
            new_dir.symlink_to(image_dir, target_is_directory=True)
        i += 1
        
print("symlinked {} galaxies".format(i))

symlinked 4162 galaxies


In [267]:
j = 0

for (_, _, HSC_id) in contaminant_ids.itertuples():
    if HSC_id in predownloaded_ids:
        image_dir = google_dir / str(HSC_id)
        new_dir = contaminants_path / image_dir.name
        if not new_dir.is_dir():
            new_dir.symlink_to(image_dir, target_is_directory=True)
        j += 1
        
print("symlinked {} galaxies".format(j))

symlinked 11256 galaxies


In [121]:
# num galaxies remaining to download
target_ids.shape[0] + contaminant_ids.shape[0] - i - j

11521

In [127]:
target_ids_to_download = set(target_ids.HSC_id) - predownloaded_ids

In [130]:
contaminant_ids_to_download = set(contaminant_ids.HSC_id) - predownloaded_ids

In [137]:
ids_to_download = target_ids_to_download |  contaminant_ids_to_download

# Get the images from the quarry

For technical details, see: https://hsc-release.mtk.nao.ac.jp/das_quarry/manual.html

I'll be downloading these directly into the google drive folder. Then, when everything is complete, I'll just symlink them into the appropriate project folders, split by target and contaminant.

## Create a coordinates list


In [141]:
galaxy_coords = combined[["catalog_2_ids", "ra", "dec"]]
galaxy_coords = galaxy_coords.rename(columns={"catalog_2_ids":"HSC_index"})
galaxy_coords = galaxy_coords.set_index("HSC_index")
galaxy_coords = galaxy_coords.loc[ids_to_download]
galaxy_coords.head()


Unnamed: 0_level_0,ra,dec
HSC_index,Unnamed: 1_level_1,Unnamed: 2_level_1
43158597349179408,150.235849,2.280739
43159013961007125,149.604142,2.505246
43158876522053655,149.78253,2.485514
43158597349179418,150.263496,2.281219
43158872227086362,149.864641,2.318558


In [142]:
width = "20asec"
filters = ["HSC-G", "HSC-R", "HSC-I", "HSC-Z", "HSC-Y"]
rerun = "pdr1_deep"

In [182]:
quarry_input_dir = pathlib.Path("galaxy_images_training") \
    / "quarry_input_files"
quarry_input_dir.mkdir(exist_ok=True)
quarry_name_format = "tmp_quarry_{:>04d}.txt"

batch_i = 0
files_in_batch = 0

max_files_per_batch = 1000

tmp_filename = quarry_input_dir / quarry_name_format.format(batch_i)
f = open(tmp_filename, mode="w")
print("#?     ra          dec       filter   sw      sh      rerun", file=f)


print_formatter = " {galaxy.ra:.6f}deg  {galaxy.dec:.6f}deg  {filter}  {width}  {width}  {rerun} # {galaxy.Index}"
for galaxy in galaxy_coords.itertuples():
    for filter in filters:            
        print(print_formatter.format(galaxy=galaxy,
                                     width=width,
                                     filter=filter,
                                     rerun=rerun),
              file=f)
        files_in_batch += 1
        if files_in_batch == max_files_per_batch:
            f.close()
            files_in_batch = 0
            batch_i += 1
            tmp_filename = quarry_input_dir / quarry_name_format.format(batch_i)
            f = open(tmp_filename, mode="w")
            print("#?     ra          dec       filter   sw      sh      rerun", file=f)


f.close()

In [157]:
!head -n 10 $tmp_filename

#?     ra          dec       filter   sw      sh      rerun
 150.358331deg  2.680556deg  HSC-G  20asec  20asec  pdr1_deep # 43158468500160068
 150.358331deg  2.680556deg  HSC-R  20asec  20asec  pdr1_deep # 43158468500160068
 150.358331deg  2.680556deg  HSC-I  20asec  20asec  pdr1_deep # 43158468500160068
 150.358331deg  2.680556deg  HSC-Z  20asec  20asec  pdr1_deep # 43158468500160068
 150.358331deg  2.680556deg  HSC-Y  20asec  20asec  pdr1_deep # 43158468500160068
 150.144028deg  2.269632deg  HSC-G  20asec  20asec  pdr1_deep # 43158734788132421
 150.144028deg  2.269632deg  HSC-R  20asec  20asec  pdr1_deep # 43158734788132421
 150.144028deg  2.269632deg  HSC-I  20asec  20asec  pdr1_deep # 43158734788132421
 150.144028deg  2.269632deg  HSC-Z  20asec  20asec  pdr1_deep # 43158734788132421


In [158]:
!wc -l $tmp_filename

     716 galaxy_images_training/quarry_input_files/tmp_quarry_0057.txt


In [159]:
!ls  galaxy_images_training/quarry_input_files/ | wc -l

      58


In [160]:
!ls -lh galaxy_images_training/quarry_input_files/ | head -n 10

total 9696
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0000.txt
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0001.txt
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0002.txt
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0003.txt
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0004.txt
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0005.txt
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0006.txt
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0007.txt
-rw-r--r--@ 1 egentry  staff    80K Apr  4 14:05 tmp_quarry_0008.txt


###  Make the request via curl

1)

First you need to setup you authentication information. Add it to a file like `galaxy_images_training/curl_netrc` which should look like:
```machine hsc-release.mtk.nao.ac.jp login <your username> password <your password>```
This allows you to script the curl calls, without being prompted for your password each time

2a) 

The `curl` call (in (2b)) will spit out files into a somewhat unpredicatably named directory, like `arch-170928-231223`. You should rename this to match the batch suffix. You *really should* do this right away, so you don't get confused. In general I add the rename onto the same line as the curl call:
``` curl ... | tar xvf - && mv arch-* quarry_files_a     ```

This only works if it finds one `arch-` directory, but you *really* shouldn't have multiple arch directories at any given time; that's a recipe for getting your galaxies mixed up.

2b) 

Here's the actual curl invocation:

``` curl --netrc-file galaxy_images_training/curl_netrc https://hsc-release.mtk.nao.ac.jp/das_quarry/cgi-bin/quarryImage --form list=@<coord list filename> | tar xvf -     ```



In [196]:
filenames = sorted(quarry_input_dir.iterdir())

min_batch_number_to_pull = 1
max_batch_number_to_pull = 100

new_data_dir = targets_path.parent / "staging"

new_data_dir.mkdir(exist_ok=True)

for i, filename in enumerate(filenames):
    if i < min_batch_number_to_pull: 
        continue
    if i >= max_batch_number_to_pull:
        break

    print("Currently processing file: {}".format(os.path.basename(filename)), end="\r", flush=True)

    os.system(("curl -k --netrc-file galaxy_images_training/curl_netrc "
               "https://hsc-release.mtk.nao.ac.jp/das_quarry/cgi-bin/quarryImage "
               "--form list=@{filename} "
               "| tar -xvf -").format(filename=filename))


    arch_dirs = list(pathlib.Path.cwd().glob("arch-*"))
    assert(len(arch_dirs)==1)
    arch_dir = arch_dirs[0]
    with open(filename, "r") as f:
        _ = f.readline() # skip header
        line_number = 1 # 1 indexed, and then also with header
        for line in f:
            line_number += 1
            HSC_id = int(line.split("#")[-1].strip())
            HSC_dir = new_data_dir / str(HSC_id)
            HSC_dir.mkdir(exist_ok=True)
            

            image_filenames = list(arch_dir.glob( 
                str(line_number) + "-cutout-HSC-?-????-pdr1_deep.fits"
            ))
            
            if len(image_filenames) == 0:
                continue
            elif len(image_filenames) >1:
                raise RuntimeError("Too many files for line {} id {}".format(
                    line_number, HSC_id,
                ))
            image_filename = image_filenames[0]
            
            # rename with HSC id and move to within `new_data_dir`
            image_filename.rename(
                HSC_dir / image_filename.name.replace(
                    "{}-cutout".format(line_number),
                    "{}-cutout".format(HSC_id),
                )
            )
            

    arch_dir.rmdir()



Currently processing file: tmp_quarry_0057.txt

# Remove incomplete dirs, then transfer to main google drive directory

In [240]:
staging_dir = google_dir.parent / "staging"

In [244]:
num_removed = 0
for staged_dir in staging_dir.iterdir():
    if not staged_dir.is_dir(): continue
    
    num_images = len({*staged_dir.glob("*.fits")})
    if num_images>5:
        raise ValueError("{} has {} fits files".format(staged_dir, num_images))
    elif num_images < 5:
        print("too few images in {} (n={}); removing".format(
            staged_dir,
            num_images,
        ))
        num_removed += 1
        send2trash.send2trash(str(staged_dir))
    else:
        staged_dir.rename(staged_dir.parent.parent / "galaxies" / staged_dir.name)

num_removed

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788101886 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788108305 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788132862 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349177253 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788102056 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349172177 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349179933 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349148648 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349177095 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349150280 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43159138515048473 (n=0); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788131471 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349158019 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349173492 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788102005 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788134567 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349174218 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788126855 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788125084 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788128334 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788135482 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349177237 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788133775 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158464205183157 (n=0); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349177005 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788105673 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788132839 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788130844 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349179225 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349150132 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349176094 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788107631 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349150556 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788103057 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788110440 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788104424 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788135157 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349181855 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349151220 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788105194 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158872227060682 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788127837 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349157916 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349180374 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349178757 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349151860 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788129774 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788132932 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788128652 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788102905 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349176641 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349156410 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788135770 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788125724 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349174341 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788106705 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158468500150469 (n=0); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788106198 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349175258 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349179495 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788102994 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788105526 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788129388 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349181214 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349174914 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788133472 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788111722 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788131573 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788133224 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349155604 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788104452 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788106761 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349171324 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788123531 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349172793 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788135030 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349156350 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788136487 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788130770 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349150268 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788100924 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349148808 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788125008 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349152768 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788127935 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788105857 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788106813 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349148057 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788130915 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788107599 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349152760 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788100723 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788132427 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349170292 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788106276 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349171516 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788128344 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349152935 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158872227059260 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788105410 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788100506 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788133516 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788102251 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788135271 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349174073 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349179102 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349147269 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349173894 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349154678 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349173268 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349179104 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158601644114149 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349149301 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349175957 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349154011 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349153650 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349175392 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349173913 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158872227080665 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349148736 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349182011 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349158134 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349181062 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349179441 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349174950 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788129193 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788131137 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788102127 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788132580 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788134648 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788131130 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349150351 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788110247 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788131792 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788127856 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349174141 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158747673024680 (n=0); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788130255 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788102151 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788105374 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349171291 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349175631 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788133040 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349150320 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349156022 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158863637141694 (n=0); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349177193 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349181085 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788108667 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349177158 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349176842 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788124459 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349170389 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788130523 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349154466 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349179914 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788135607 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349170638 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349178667 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158872227076840 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349150469 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788101855 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788108188 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349157082 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349152504 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788127157 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788128127 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788125638 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349181560 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349171578 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788125607 (n=3); removing
too few images in /Users/ege

too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788126242 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158885111978118 (n=0); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349151710 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158335356164124 (n=0); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349151146 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158597349171541 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788107900 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788131208 (n=3); removing
too few images in /Users/egentry/Google Drive/HSC_COSMOS_galaxies/staging/43158734788102218 (n=3); removing
too few images in /Users/ege

1655

# Now link these new images to the project data directory

In [293]:
pre_linked_ids  = {int(path.name) for path in contaminants_path.iterdir() if path.is_dir()} 
pre_linked_ids |= {int(path.name) for path in targets_path.iterdir() if path.is_dir()} 
len(pre_linked_ids)

15413

In [304]:
narrowband_ids = set(target_ids.HSC_id) | set(contaminant_ids.HSC_id)
len(narrowband_ids)

26934

In [299]:
all_downloaded_ids = {int(path.name) for path in google_dir.iterdir()
                      if path.is_dir()}
len(all_downloaded_ids)

183278

In [324]:
num_to_link = 0
already_linked = 0
missing = 0
for HSC_id in narrowband_ids:
    if HSC_id in pre_linked_ids:
        already_linked += 1
    if HSC_id not in all_downloaded_ids:
        missing += 1
        
    if HSC_id in target_ids.HSC_id.values:
        class_path = targets_path
    elif HSC_id in contaminant_ids.HSC_id.values:
        class_path = contaminants_path
    else:
        raise ValueError("HSC id {} in neither targets nor contaminants".format(HSC_id))
        
    image_dir = google_dir / str(HSC_id)
    new_dir = class_path / image_dir.name
    if not new_dir.is_dir():
#         new_dir.symlink_to(image_dir, target_is_directory=True)
        pass
        
    num_to_link += 1
    
    
print("just linked: ", num_to_link)
print("previously linked: ", already_linked)
print("missing: ", missing)

just linked:  26934
previously linked:  15413
missing:  1655


# Check that the directory contents are correct

In [326]:
for path in targets_path.iterdir():
    if not path.is_dir():
        continue
    
    HSC_id = int(path.name)
    if HSC_id not in target_ids.HSC_id.values:
        raise ValueError("HSC id {} should not be in target path".format(HSC_id))

In [327]:
for path in contaminants_path.iterdir():
    if not path.is_dir():
        continue
    
    HSC_id = int(path.name)
    if HSC_id not in contaminant_ids.HSC_id.values:
        raise ValueError("HSC id {} should not be in contaminant path".format(HSC_id))