## Flag and remove duplicate events in the Grade A-D catalog

### Import libraries

In [4]:
# Imports
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import os
import glob
import sys
import random

# Import functions
sys.path.insert(0,'../functions/')
from moon2data import *

### Load catalog of Grade A through D moonquakes

In [5]:
mqdir = '../catalogs/quality_control/'
cat = pd.read_csv(mqdir + 'A17_moonquakes_catalog.csv')
cat.drop(list(cat.filter(regex='Unnamed|index')), axis=1, inplace=True)

### Flag detections with pick times within 30 seconds of each other

In [6]:
has_overlap = []
for r in np.arange(0,len(cat)):
    row = cat.iloc[r]
    evid = row.evid
    picktime = datetime.strptime(row.picktime, '%Y-%m-%d %H:%M:%S.%f')
    starttime = picktime - timedelta(seconds = 30)
    starttime_str = datetime.strftime(starttime, '%Y-%m-%d %H:%M:%S.%f')
    endtime = picktime + timedelta(seconds = 30)
    endtime_str = datetime.strftime(endtime, '%Y-%m-%d %H:%M:%S.%f')
    rows_overlap = cat.loc[(cat.picktime >= starttime_str) & (cat.picktime <= endtime_str)]
    rows_overlap = rows_overlap.loc[rows_overlap.evid != evid]
    if len(rows_overlap) > 0:
        has_overlap.append(1)
    else:
        has_overlap.append(0)

In [None]:
#cat['has_overlap'] = has_overlap
#cat.to_csv(mqdir + 'A17_moonquakes_catalog.csv',index=False)

In [33]:
cat_overlap = cat.loc[cat.has_overlap == 1]
len(np.unique(cat_overlap.evid.tolist()))

1178

### Assign group numbers

In [7]:
# Events with overlap
rows_overlap = cat.loc[cat.has_overlap == 1]
rows_overlap = rows_overlap.sort_values(by=['picktime']).reset_index()

# Keep group number until no longer overlapping with next event
groupnum = 1
groupnums = []
for r in np.arange(0,len(rows_overlap)-1):
    groupnums.append(groupnum)
    
    # Check if overlap with next event
    row1 = rows_overlap.iloc[r]
    picktime1 = datetime.strptime(row1.picktime, '%Y-%m-%d %H:%M:%S.%f')
    row2 = rows_overlap.iloc[r+1]
    picktime2 = datetime.strptime(row2.picktime, '%Y-%m-%d %H:%M:%S.%f')
    deltat = (picktime2 - picktime1).total_seconds()
    if deltat > 60:
        groupnum += 1
# 
groupnums.append(groupnum)

In [10]:
# Assign group numbers to entire dataframe
rows_overlap['group'] = groupnums
groupnums_all = []
for r in np.arange(0,len(cat)):
    row = cat.iloc[r]
    evid = row.evid
    geonum = row.geophone
    row_overlap = rows_overlap.loc[(rows_overlap.evid == evid) & (rows_overlap.geophone == geonum)]
    if len(row_overlap) == 0:
        groupnums_all.append(0)
    else:
        row_overlap = row_overlap.iloc[0]
        groupnums_all.append(row_overlap.group)

In [10]:
cat['group'] = groupnums_all
#cat.to_csv(mqdir + 'A17_moonquakes_catalog.csv',index=False)

### Remove duplicates, keeping higher-grade events

In [75]:
evids_keep = []
evids_toss = []
for groupnum in groupnums:
    rows = cat.loc[cat.group == groupnum]
    rows = rows.sort_values(by=['grade'])
    evid_keep = rows.evid.tolist()[0]
    rows_toss = rows.loc[rows.evid != evid_keep]
    evids_keep.append(evid_keep)
    toss = rows_toss.evid.tolist()
    evids_toss = np.concatenate([evids_toss,toss])

In [76]:
combined_list = np.concatenate([evids_toss,evids_keep])
len(np.unique(combined_list))

1178

In [77]:
12085-11791

294

In [78]:
11791/12085

0.9756723210591642

In [63]:
cat_overlap = cat.loc[cat.has_overlap == 1]
len(np.unique(cat_overlap.evid.tolist()))

1178

In [61]:
groupnum = 5
rows = cat.loc[cat.group == groupnum]
rows = rows.sort_values(by=['grade'])
evid_keep = rows.evid.tolist()[0]
len(np.unique(rows.evid.tolist()))
rows_toss = rows.loc[rows.evid != evid_keep]
toss = rows_toss.evid.tolist()
len(np.unique(toss))

2

In [46]:
evid_keep

'770424-15-N3'

In [48]:
toss

['770424-15-N1', '770424-15-N1', '770424-15-N1', '770424-15-N1']

In [41]:
max(groupnums)

581

In [38]:
len(np.unique(evids_toss))

606

In [39]:
len(np.unique(evids_toss)) + len(np.unique(evids_keep))

1185

In [18]:
cat_nodupes = cat.loc[(cat.evid.isin(evids_keep)) | (cat.has_overlap == 0)]
cat_nodupes = cat_nodupes.reset_index()
cat_nodupes.drop(list(cat_nodupes.filter(regex='Unnamed|index|has_overlap|group')), axis=1, inplace=True)
#cat_nodupes.to_csv(mqdir + 'A17_moonquakes_catalog_nodupes.csv',index=False)

In [19]:
len(np.unique(cat_nodupes.evid.tolist()))

11791

In [20]:
12085-11791

294

In [24]:
len(np.unique(cat_nodupes.evid.tolist()))

11791