# Prototype MAF

# XXX-Looks like trying to use pandas at all causes a memory leak? 

In [1]:
import copy
import time
import numpy as np
import healpy as hp
import sqlite3
import pandas as pd
import matplotlib.pylab as plt
%matplotlib inline

from slicer import Slicer, MeanMetric, CountMetric, CoaddM5Metric, PlotMoll, gen_summary_row
from rubin_sim.data import get_baseline


In [2]:
# Read in a small example visit history
baseline_file = get_baseline()

con = sqlite3.connect(baseline_file)
#df = pd.read_sql("select * from observations where night < 61;", con)
df = pd.read_sql("select * from observations;", con)
#df = pd.read_sql("select observationID,fieldRA,fieldDec,night,fiveSigmaDepth,filter,rotSkyPos from observations;", con)
con.close()

# 6 depths

In [3]:
start = time.time()
#summary_stats = []
subsets = {}
for filtername in 'ugrizy':
    subsets[filtername] = np.where(df["filter"] == filtername)[0]
#subsets["all"] = np.arange(np.size(df["filter"]))

for key in subsets:
    info = {"run_name": "baseline_v4.3.1_0yrs"}
    info["observations_subset"] =  "filter=%s" % key
    sub_data = df.iloc[np.where(df["filter"] == filtername)[0]]
    metric = CoaddM5Metric(unit="Coadd %s (mags)" % filtername)
    sl = Slicer(nside=128)
    hp_array, info = sl(sub_data, metric, info=info)
    #summary_stats.append(gen_summary_row(info, "mean", np.nanmean(hp_array)))
    #summary_stats.append(gen_summary_row(info, "median", np.nanmedian(hp_array)))


end = time.time()

In [4]:
print("runtime=", end-start)

runtime= 118.73780012130737


# 6 depth, 6 counts, the slow way

In [5]:
start = time.time()
#summary_stats = []
subsets = {}
for filtername in 'ugrizy':
    subsets[filtername] = np.where(df["filter"] == filtername)[0]
#subsets["all"] = np.arange(np.size(df["filter"]))

for key in subsets:
    info = {"run_name": "baseline_v4.3.1_0yrs"}
    info["observations_subset"] =  "filter=%s" % key
    sub_data = df.iloc[np.where(df["filter"] == filtername)[0]]
    metric = CoaddM5Metric(unit="Coadd %s (mags)" % filtername)
    sl = Slicer(nside=128)
    hp_array, info = sl(sub_data, metric, info=info)
    #summary_stats.append(gen_summary_row(info, "mean", np.nanmean(hp_array)))
    #summary_stats.append(gen_summary_row(info, "median", np.nanmedian(hp_array)))

for key in subsets:
    info = {"run_name": "baseline_v4.3.1_0yrs"}
    info["observations_subset"] =  "filter=%s" % key
    sub_data = df.iloc[np.where(df["filter"] == filtername)[0]]
    metric = CountMetric(unit="Coadd %s (mags)" % filtername)
    sl = Slicer(nside=128)
    hp_array, info = sl(sub_data, metric, info=info)
    #summary_stats.append(gen_summary_row(info, "mean", np.nanmean(hp_array)))
    #summary_stats.append(gen_summary_row(info, "median", np.nanmedian(hp_array)))


end = time.time()

KeyboardInterrupt: 

In [None]:
print("runtime=", end-start)

# 6 depths, 6 counts the fast way

In [None]:
# now to run two things on the same slicer


start = time.time()
#summary_stats = []
subsets = {}
for filtername in 'ugrizy':
    subsets[filtername] = np.where(df["filter"] == filtername)[0]
#subsets["all"] = np.arange(np.size(df["filter"]))

for key in subsets:
    info = {"run_name": "baseline_v4.3.1_0yrs"}
    info["observations_subset"] =  "filter=%s" % key
    sub_data = df.iloc[np.where(df["filter"] == filtername)[0]]
    metric = CoaddM5Metric(unit="Coadd %s (mags)" % filtername)
    metrics = [metric]
    infos = [info]
    info = {"run_name": "baseline_v4.3.1_0yrs"}
    info["observations_subset"] =  "filter=%s" % key
    metrics.append(CountMetric(unit="Coadd %s (mags)" % filtername))
    infos.append(info)
    
    sl = Slicer(nside=128)
    hp_array, info = sl(sub_data, metrics, info=infos)
    

end = time.time()


In [None]:
print("runtime=", end-start)

# 6 depths 6 counts the fast way, turn off cache

In [None]:

start = time.time()
#summary_stats = []
subsets = {}
for filtername in 'ugrizy':
    subsets[filtername] = np.where(df["filter"] == filtername)[0]
#subsets["all"] = np.arange(np.size(df["filter"]))

for key in subsets:
    info = {"run_name": "baseline_v4.3.1_0yrs"}
    info["observations_subset"] =  "filter=%s" % key
    sub_data = df.iloc[np.where(df["filter"] == filtername)[0]]
    metric = CoaddM5Metric(unit="Coadd %s (mags)" % filtername)
    metrics = [metric]
    infos = [info]
    info = {"run_name": "baseline_v4.3.1_0yrs"}
    info["observations_subset"] =  "filter=%s" % key
    metrics.append(CountMetric(unit="Coadd %s (mags)" % filtername))
    infos.append(info)
    
    sl = Slicer(nside=128, cache=False)
    hp_array, info = sl(sub_data, metrics, info=infos)
    

end = time.time()

In [None]:
print("runtime=", end-start)

In [None]:
# I bet this is pandas indexing vs numpy indexing slowing things down