## Separate isolated vs. repeating moonquakes

A moonquake is considered "repeating" if it has a correlation coefficient of at least 0.9 with one or more other moonquakes, on one or more geophones. All other moonquakes (i.e. those that have correlation coefficients of below 0.9 with all other events, on all geophones) are considered "isolated." 

### Import libraries

In [6]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import os
import glob
import sys

# Import functions
fxndir = '../functions/'
sys.path.insert(0,fxndir)
from moon2data import *

### Load moonquake catalog

In [7]:
mqdir1 = '../catalogs/quality_control/'
mqcat = pd.read_csv(mqdir1 + 'A17_moonquakes_catalog_nodupes_HQ.csv')
mqcat.drop(list(mqcat.filter(regex='Unnamed|index')), axis=1, inplace=True)

### Load moonquake files

In [8]:
cc_thresh = 0.9
catdir = '/data/ytamama/Apollo17/catalogs/cc_ABCD/select_high_cc_nodupes_' + str(round(cc_thresh,2)) + '/'
fnames = glob.glob(f'{catdir}*.csv')

### Define directories for isolated and repeating moonquakes

In [9]:
isolated_dir = '/data/ytamama/Apollo17/catalogs/cc_ABCD/isolated/'
repeating_dir = '/data/ytamama/Apollo17/catalogs/cc_ABCD/repeating/'

### Iterate through individual families and classify them as isolated or repeating

In [10]:
isolated_cat = []
repeating_cat = []
evids_repeating = []
for fname in fnames:
    
    # Read catalog
    cat = pd.read_csv(fname)
    cat = cat[['evid','evid_ref','geophone','mod_arrival_time','corr_coeffs','dt_values','minfreq','maxfreq','grade','grade_new']]
    cat.to_csv(fname)
    
    numevts = len(np.unique(cat.evid.tolist()))
    
    # Isolated
    if numevts < 2:
        os.system(f'cp {fname} {isolated_dir}') 
        if len(isolated_cat) == 0:
            isolated_cat = cat
        else:
            isolated_cat = pd.concat([isolated_cat,cat])
            
    # Repeating
    else:
        os.system(f'cp {fname} {repeating_dir}')  
        if len(repeating_cat) == 0:
            repeating_cat = cat
        else:
            repeating_cat = pd.concat([repeating_cat,cat])

### Remove duplicates (e.g. repeating events classified as "isolated")

In [11]:
evids_isolated = np.unique(isolated_cat.evid.tolist())
evids_repeating = np.unique(repeating_cat.evid.tolist())

# Remove duplicates
isolated_cat2 = isolated_cat[~isolated_cat.evid.isin(evids_repeating)]

In [13]:
len(np.unique(repeating_cat.evid.tolist())) + len(np.unique(isolated_cat2.evid.tolist()))

8101

### Add a column classifying events as "isolated" or "repeating"

In [14]:
# Isolated
classifiers = []
for r in np.arange(0,len(isolated_cat2)):
    classifiers.append('isolated')
isolated_cat2['isol_or_rpt'] = classifiers

# Repeating
classifiers = []
for r in np.arange(0,len(repeating_cat)):
    classifiers.append('repeating')
repeating_cat['isol_or_rpt'] = classifiers

### Combine dataframes

In [None]:
combined_cat = pd.concat([repeating_cat, isolated_cat2])
combined_cat = combined_cat.reset_index()
combined_cat = combined_cat[['evid','evid_ref','geophone','mod_arrival_time','corr_coeffs','dt_values','minfreq','maxfreq','grade','grade_new','isol_or_rpt']]

### Count # of events per family (1 if isolated)

In [16]:
numevts_col = []
for r in np.arange(0,len(combined_cat)):
    row = combined_cat.iloc[r]
    evid_ref = row.evid_ref
    rows = combined_cat.loc[combined_cat.evid_ref == evid_ref]
    numevts_col.append(len(np.unique(rows.evid.tolist())))     

In [20]:
# Save
mqdir = '../catalogs/final_catalogs/'
combined_cat['num_per_family'] = numevts_col
combined_cat.to_csv(mqdir + 'A17_moonquakes_catalog_HQ_final.csv')