starting full integration

vargatn · Feb 28, 2020 · d8136dd · d8136dd
1 parent 0bbc597
commit d8136dd
Show file tree

Hide file tree

Showing 6 changed files with 340 additions and 32 deletions.
diff --git a/bin/epsilon_concentric_sampler_v01.py b/bin/epsilon_concentric_sampler_v01.py
@@ -0,0 +1,142 @@
+from __future__ import print_function, division
+import fitsio as fio
+import numpy as np
+
+import skysampler.emulator as emulator
+
+try:
+    import cPickle as pickle
+except:
+    import pickle
+
+tag = "epsilon_concentric_sample_v03-test"
+
+NSAMPLES = 2000000
+NCHUNKS = 150
+BANDWIDTH = 0.05
+
+LOGR_DRAW_RMINS = [-3, -0.5,]
+LOGR_DRAW_RMAXS = [-0.5, 0.,]
+LOGR_CAT_RMAXS = [0., 0.5,]
+
+root_path = "/e/ocean1/users/vargatn/EMULATOR/EPSILON/resamples/"
+wide_data_path = "/e/ocean1/users/vargatn/EMULATOR/EPSILON/multi-indexer-gamma_v001_clust__z0_l1_py2.p"
+deep_data_path = "/e/ocean1/users/vargatn/EMULATOR/EPSILON/run-ugriz-mof02_naive-cleaned.fits"
+
+deep_c_settings = {
+    "columns": [
+        ("MAG_I", ("bdf_mag", 3)),
+        ("COLOR_G_R", (("bdf_mag", 1), ("bdf_mag", 2), "-")),
+        # ("COLOR_R_I", (("bdf_mag", 2), ("bdf_mag", 3), "-")),
+        # ("COLOR_I_Z", (("bdf_mag", 3), ("bdf_mag", 4), "-")),
+    ],
+    "logs": [False, False,],
+    "limits": [(17, 22.5), (-1, 3),],
+    # "logs": [False, False, False, False],
+    # "limits": [(17, 25.5), (-1, 3), (-1, 3), (-1, 3)],
+}
+deep_smc_settings = {
+    "columns": [
+        ("GABS", (("bdf_g", 0), ("bdf_g", 1), "SQSUM")),
+        ("SIZE", ("bdf_T", 1, "+")),
+        ("FRACDEV", "bdf_fracdev"),
+        ("MAG_I", ("bdf_mag", 3)),
+        ("COLOR_G_R", (("bdf_mag", 1), ("bdf_mag", 2), "-")),
+        # ("COLOR_R_I", (("bdf_mag", 2), ("bdf_mag", 3), "-")),
+        # ("COLOR_I_Z", (("bdf_mag", 3), ("bdf_mag", 4), "-")),
+    ],
+    "logs": [False, True, False, False, False, ],
+    "limits": [(0., 1.), (-1, 5), (-3, 4), (17, 25.5), (-1, 3), ],
+    # "logs": [False, True, False, False, False, False, False, ],
+    # "limits": [(0., 1.), (-1, 5), (-3, 4), (17, 25.5), (-1, 3), (-1, 3), (-1, 3)],
+}
+wide_cr_settings = {
+    "columns": [
+        ("MAG_I", "MOF_CM_MAG_CORRECTED_I"),
+        ("COLOR_G_R", ("MOF_CM_MAG_CORRECTED_G", "MOF_CM_MAG_CORRECTED_R", "-")),
+        # ("COLOR_R_I", ("MOF_CM_MAG_CORRECTED_R", "MOF_CM_MAG_CORRECTED_I", "-")),
+        # ("COLOR_I_Z", ("MOF_CM_MAG_CORRECTED_I", "MOF_CM_MAG_CORRECTED_Z", "-")),
+        ("LOGR", "DIST"),
+    ],
+    "logs": [False, False, True],
+    "limits": [(17, 22.5), (-1, 3), (1e-3, 50.), ],
+    # "logs": [False, False, False, False, True],
+    # "limits": [(17, 22.5), (-1, 3), (-1, 3), (-1, 3), (1e-3, 50.),],
+}
+wide_r_settings = {
+    "columns": [
+        ("MAG_I", "MOF_CM_MAG_CORRECTED_I"),
+        ("LOGR", "DIST"),
+    ],
+    "logs": [False, True,],
+    "limits": [(17, 22.5), (1e-3, 50.),],
+}
+# columns = {
+#     "cols_dc": ["COLOR_G_R", "COLOR_R_I", "COLOR_I_Z",],
+#     "cols_wr": ["LOGR",],
+#     "cols_wcr": ["COLOR_G_R", "COLOR_R_I", "COLOR_I_Z", "LOGR",],
+# }
+columns = {
+    "cols_dc": ["COLOR_G_R",],
+    "cols_wr": ["LOGR",],
+    "cols_wcr": ["COLOR_G_R", "LOGR"],
+}
+
+if __name__ == "__main__":
+
+    nrbins = len(LOGR_DRAW_RMINS)
+    print("started reading")
+    mdl = pickle.load(open(wide_data_path, "rb"))
+    deep = fio.read(deep_data_path)
+    print("finished reading")
+
+    master_seed = np.random.randint(0, np.iinfo(np.int32).max, 1)[0]
+    rng = np.random.RandomState(seed=master_seed)
+    seeds = rng.randint(0, np.iinfo(np.int32).max, nrbins * 4)
+
+    print("starting concentric shell resampling")
+    for i in np.arange(nrbins):
+        print("rbin", i)
+
+        outname = root_path + tag + "_{:1d}".format(master_seed) + "_rbin{:d}".format(i)
+        print(outname)
+
+        # update configs
+        _deep_c_settings = emulator.construct_deep_container(deep, deep_c_settings, drop="MAG_I", seed=seeds[nrbins * i + 0])
+        _deep_smc_settings = emulator.construct_deep_container(deep, deep_smc_settings, seed=seeds[nrbins * i + 1])
+
+        tmp_wide_cr_settings = wide_cr_settings.copy()
+        tmp_wide_cr_settings["limits"][-1] = (10**-3, 10**LOGR_CAT_RMAXS[i])
+        _wide_cr_settings = emulator.construct_wide_container(mdl, tmp_wide_cr_settings, drop="MAG_I", seed=seeds[nrbins * i + 2])
+
+        tmp_wide_r_settings = wide_r_settings.copy()
+        tmp_wide_r_settings["limits"][-1] = (10**-3, 10**LOGR_CAT_RMAXS[i])
+        _wide_r_settings = emulator.construct_wide_container(mdl, tmp_wide_r_settings, drop="MAG_I", seed=seeds[nrbins * i + 3])
+
+        # create infodicts
+        infodicts, samples = emulator.make_naive_infodicts(_wide_cr_settings, _wide_r_settings, _deep_c_settings,
+                                                           _deep_smc_settings,
+                                                           columns, nsamples=NSAMPLES, nchunks=NCHUNKS, bandwidth=BANDWIDTH,
+                                                           rmin=LOGR_DRAW_RMINS[i],
+                                                           rmax=LOGR_DRAW_RMAXS[i])
+
+        fname = outname + "_samples.fits"
+        print(fname)
+        fio.write(fname, samples.to_records(), clobber=True)
+        master_dict = {
+            "columns": infodicts[0]["columns"],
+            "bandwidth": infodicts[0]["bandwidth"],
+            "deep_c_settings": deep_c_settings,
+            "deep_smc_settings": deep_smc_settings,
+            "wide_r_settings": tmp_wide_r_settings,
+            "wide_cr_settings": tmp_wide_cr_settings,
+            "rmin": infodicts[0]["rmin"],
+            "rmax": infodicts[0]["rmin"],
+        }
+        pickle.dump(master_dict, open(outname + ".p", "wb"))
+        print("calculating scores")
+        result = emulator.run_scores(infodicts)
+        print("finished calculating scores")
+        fname = outname + "_scores.fits"
+        print(fname)
+        fio.write(fname, result.to_records(), clobber=True)
diff --git a/bin/epsilon_concentric_sampler_v02.py b/bin/epsilon_concentric_sampler_v02.py
@@ -0,0 +1,142 @@
+from __future__ import print_function, division
+import fitsio as fio
+import numpy as np
+
+import skysampler.emulator as emulator
+
+try:
+    import cPickle as pickle
+except:
+    import pickle
+
+tag_root = "epsilon_concentric_sample_v06"
+NREPEATS = 10
+NSAMPLES = 1500000
+NCHUNKS = 150
+BANDWIDTH = 0.05
+
+LOGR_DRAW_RMINS = [-3, -0.5, 0., 0.5]
+LOGR_DRAW_RMAXS = [-0.5, 0., 0.5, 1.2]
+LOGR_CAT_RMAXS = [0., 0.5, 1.2, 2.]
+
+root_path = "/e/ocean1/users/vargatn/EMULATOR/EPSILON/resamples/"
+wide_data_path = "/e/ocean1/users/vargatn/EMULATOR/EPSILON/multi-indexer-gamma_v001_clust__z0_l1_py2.p"
+deep_data_path = "/e/ocean1/users/vargatn/EMULATOR/EPSILON/run-ugriz-mof02_naive-cleaned.fits"
+
+deep_c_settings = {
+    "columns": [
+        ("MAG_I", ("bdf_mag", 3)),
+        ("COLOR_G_R", (("bdf_mag", 1), ("bdf_mag", 2), "-")),
+        ("COLOR_R_I", (("bdf_mag", 2), ("bdf_mag", 3), "-")),
+#         ("COLOR_I_Z", (("bdf_mag", 3), ("bdf_mag", 4), "-")),
+    ],
+    "logs": [False, False, False],
+    "limits": [(17, 22.5), (-1, 3), (-1, 3),],
+#     "logs": [False, False, False, False],
+#     "limits": [(17, 25.5), (-1, 3), (-1, 3), (-1, 3)],
+}
+deep_smc_settings = {
+    "columns": [
+        ("GABS", (("bdf_g", 0), ("bdf_g", 1), "SQSUM")),
+        ("SIZE", ("bdf_T", 1, "+")),
+        ("FRACDEV", "bdf_fracdev"),
+        ("MAG_I", ("bdf_mag", 3)),
+        ("COLOR_G_R", (("bdf_mag", 1), ("bdf_mag", 2), "-")),
+        ("COLOR_R_I", (("bdf_mag", 2), ("bdf_mag", 3), "-")),
+        ("COLOR_I_Z", (("bdf_mag", 3), ("bdf_mag", 4), "-")),
+    ],
+    # "logs": [False, True, False, False, False, False],
+    # "limits": [(0., 1.), (-1, 5), (-3, 4), (17, 25.5), (-1, 3), (-1, 3)],
+    "logs": [False, True, False, False, False, False, False, ],
+    "limits": [(0., 1.), (-1, 5), (-3, 4), (17, 25.5), (-1, 3), (-1, 3), (-1, 3)],
+}
+wide_cr_settings = {
+    "columns": [
+        ("MAG_I", "MOF_CM_MAG_CORRECTED_I"),
+        ("COLOR_G_R", ("MOF_CM_MAG_CORRECTED_G", "MOF_CM_MAG_CORRECTED_R", "-")),
+        ("COLOR_R_I", ("MOF_CM_MAG_CORRECTED_R", "MOF_CM_MAG_CORRECTED_I", "-")),
+#         ("COLOR_I_Z", ("MOF_CM_MAG_CORRECTED_I", "MOF_CM_MAG_CORRECTED_Z", "-")),
+        ("LOGR", "DIST"),
+    ],
+    "logs": [False, False, False, True],
+    "limits": [(17, 22.5), (-1, 3), (-1, 3), (1e-3, 50.), ],
+#     "logs": [False, False, False, False, True],
+#     "limits": [(17, 22.5), (-1, 3), (-1, 3), (-1, 3), (1e-3, 50.),],
+}
+wide_r_settings = {
+    "columns": [
+        ("MAG_I", "MOF_CM_MAG_CORRECTED_I"),
+        ("LOGR", "DIST"),
+    ],
+    "logs": [False, True,],
+    "limits": [(17, 22.5), (1e-3, 50.),],
+}
+columns = {
+    "cols_dc": ["COLOR_G_R", "COLOR_R_I",],
+    "cols_wr": ["LOGR",],
+    "cols_wcr": ["COLOR_G_R", "COLOR_R_I", "LOGR",],
+}
+
+if __name__ == "__main__":
+
+    nrbins = len(LOGR_DRAW_RMINS)
+    print("started reading")
+    mdl = pickle.load(open(wide_data_path, "rb"))
+    deep = fio.read(deep_data_path)
+    print("finished reading")
+
+    for nrep in np.arange(NREPEATS):
+        tag = tag_root + "_run" + str(nrep)
+        print("running repeat", nrep, "out of", NREPEATS)
+        print(tag)
+
+        master_seed = np.random.randint(0, np.iinfo(np.int32).max, 1)[0]
+        rng = np.random.RandomState(seed=master_seed)
+        seeds = rng.randint(0, np.iinfo(np.int32).max, nrbins * 4)
+
+        print("starting concentric shell resampling")
+        for i in np.arange(nrbins):
+            print("rbin", i)
+
+            outname = root_path + tag + "_{:1d}".format(master_seed) + "_rbin{:d}".format(i)
+            print(outname)
+
+            # update configs
+            _deep_c_settings = emulator.construct_deep_container(deep, deep_c_settings, drop="MAG_I", seed=seeds[nrbins * i + 0])
+            _deep_smc_settings = emulator.construct_deep_container(deep, deep_smc_settings, seed=seeds[nrbins * i + 1])
+
+            tmp_wide_cr_settings = wide_cr_settings.copy()
+            tmp_wide_cr_settings["limits"][-1] = (10**-3, 10**LOGR_CAT_RMAXS[i])
+            _wide_cr_settings = emulator.construct_wide_container(mdl, tmp_wide_cr_settings, drop="MAG_I", seed=seeds[nrbins * i + 2])
+
+            tmp_wide_r_settings = wide_r_settings.copy()
+            tmp_wide_r_settings["limits"][-1] = (10**-3, 10**LOGR_CAT_RMAXS[i])
+            _wide_r_settings = emulator.construct_wide_container(mdl, tmp_wide_r_settings, drop="MAG_I", seed=seeds[nrbins * i + 3])
+
+            # create infodicts
+            infodicts, samples = emulator.make_naive_infodicts(_wide_cr_settings, _wide_r_settings, _deep_c_settings,
+                                                               _deep_smc_settings,
+                                                               columns, nsamples=NSAMPLES, nchunks=NCHUNKS, bandwidth=BANDWIDTH,
+                                                               rmin=LOGR_DRAW_RMINS[i],
+                                                               rmax=LOGR_DRAW_RMAXS[i])
+
+            fname = outname + "_samples.fits"
+            print(fname)
+            fio.write(fname, samples.to_records(), clobber=True)
+            master_dict = {
+                "columns": infodicts[0]["columns"],
+                "bandwidth": infodicts[0]["bandwidth"],
+                "deep_c_settings": deep_c_settings,
+                "deep_smc_settings": deep_smc_settings,
+                "wide_r_settings": tmp_wide_r_settings,
+                "wide_cr_settings": tmp_wide_cr_settings,
+                "rmin": infodicts[0]["rmin"],
+                "rmax": infodicts[0]["rmin"],
+            }
+            pickle.dump(master_dict, open(outname + ".p", "wb"))
+            print("calculating scores")
+            result = emulator.run_scores(infodicts)
+            print("finished calculating scores")
+            fname = outname + "_scores.fits"
+            print(fname)
+            fio.write(fname, result.to_records(), clobber=True)
diff --git a/bin/radial_indexer.py b/bin/radial_indexer.py
@@ -10,53 +10,65 @@
 parser.add_argument('--noclust', action="store_true")
 parser.add_argument('--norands', action="store_true")
 
-survey_fnames = np.sort(glob.glob(paths.config["catalogs"]["survey"]["wide_data_expr"]))
-clust_path = paths.config["catalogs"]["targets"]["clust"]
-rands_path = paths.config["catalogs"]["targets"]["rands"]
+_survey_fnames_expr = "/e/ocean1/users/vargatn/DES/Y3_DATA/DES_Y3_GOLD_MOF_base*h5"
+survey_fnames = np.sort(glob.glob(_survey_fnames_expr))
+
+clust_path = "/e/ocean1/users/vargatn/EMULATOR/DELTA/y3_gold_2.2.1_wide_sofcol_run2_redmapper_v6.4.22+2_lgt20_vl02_catalog.fit"
+rands_path = "/e/ocean1/users/vargatn/EMULATOR/DELTA/y3_gold_2.2.1_wide_sofcol_run2_redmapper_v6.4.22+2_randcat_z0.10-0.95_lgt020_vl02.fit"
+
+tag = "multi-indexer-epsilon_narrow-z_v001"
+
+NPROC = 150
+
+clust_tag = tag + "clust_"
+rands_tag = tag + "rands_"
+work_dir = "/e/eser2/vargatn/EMULATOR/EPSILON/indexer/"
+
+redshift_bins = [[0.3, 0.35], [0.45, 0.5], [0.6, 0.65]]
+lambda_bins = [[55, 60],]
 
-clust_tag = paths.config["tag"] + "_clust_"
-rands_tag = paths.config["tag"] + "_rands_"
-fname_root = paths.config["work_dir"]
 
 if __name__ == "__main__":
     args = parser.parse_args()
     if not args.noclust:
         print("starting clusters")
         i = 0
-        for z, zbin in enumerate(paths.config["parameter_bins"]["redshift_bins"]):
-            for l, lbin in enumerate(paths.config["parameter_bins"]["lambda_bins"]):
+        for z, zbin in enumerate(redshift_bins):
+            for l, lbin in enumerate(lambda_bins):
                 if args.ibin == -1 or args.ibin == i:
                     print("z", z, "lambda", l)
-                    fname_root = fname_root + clust_tag + "z" + str(z) + "_l" + str(l)
+                    fname_root = work_dir + clust_tag + "z" + str(z) + "_l" + str(l)
+                    # print(fname_root)
                     print(fname_root)
                     target = indexer.TargetData(clust_path, mode="clust")
                     pars = ["redshift", "richness"]
                     limits = [zbin, lbin]
                     target.select_range(pars, limits)
 
                     survey = indexer.SurveyData(survey_fnames)
-                    print("HERE")
+
                     imaker = indexer.MultiIndexer(survey, target, fname_root)
-                    imaker.run(nprocess=paths.config["nproc"])
+                    imaker.run(nprocess=NPROC)
+                    raise KeyboardInterrupt
 
                 i += 1
 
     # if not args.norands:
     #     print("starting randoms")
     #     i = 0
-    #     for z, zbin in enumerate(paths.config["parameter_bins"]["redshift_bins"]):
-    #         for l, lbin in enumerate(paths.config["parameter_bins"]["lambda_bins"]):
+    #     for z, zbin in enumerate(redshift_bins):
+    #         for l, lbin in enumerate(lambda_bins):
     #             if args.ibin == -1 or args.ibin == i:
     #                 print("z", z, "lambda", l)
-    #                 fname_root = fname_root + rands_tag + "_z" + str(z) + "_l" + str(l)
+    #                 fname_root = work_dir + rands_tag + "_z" + str(z) + "_l" + str(l)
     #                 print(fname_root)
     #                 random = indexer.TargetData(rands_path, mode="rands")
     #                 pars = ["redshift", "richness"]
     #                 limits = [zbin, lbin]
     #                 random.select_range(pars, limits)
-    #                 random.draw_subset(2000)
+    #                 random.draw_subset(3000)
     #
     #                 survey = indexer.SurveyData(survey_fnames)
     #                 imaker = indexer.MultiIndexer(survey, random, fname_root)
-    #                 imaker.run(nprocess=paths.config["nproc"])
+    #                 imaker.run(nprocess=NPROC)
     #             i += 1