## Goal
- Set up pandas dataframe of RV + MJD data to use in PCA - done!

## Steps
- Sort plates by n visits per target
- Pick plate with n > 20 visits/target for initial pca

## Questions
- ...

In [49]:
import numpy as np
import pandas as pd
import statistics as stat
from astropy.io import fits
from collections import Counter

In [50]:
# Read in FITS file

visit_hdus = fits.open('allVisit-r12-l33.fits')

# print(visit_hdus.info())

In [51]:
# HDU stuff

visit_header = visit_hdus[1].header
visit_data = visit_hdus[1].data

visit_hdus.close()

# print(visit_header)

In [52]:
# Set up variables for FITS file data

all_targets = list(visit_data['TARGET_ID'])
all_mjd = list(visit_data['MJD'])
all_obsvhelio = list(visit_data['OBSVHELIO']) # Heliocentric relative RV from 'observed spectrum template matching'
all_plates = list(visit_data['PLATE'])

In [53]:
# Strip whitespace from Plate IDs

for i, s in enumerate(all_plates):
    all_plates[i] = s.strip()
    
# print(all_plates[:7])

In [95]:
# Create pandas dataframe for all data

all_data = [all_targets, all_plates, all_mjd, all_obsvhelio]

df = pd.DataFrame(all_data).transpose()
df.columns = ['Target ID', 'Plate ID', 'MJD', 'OBSVHELIO (km/s)']

In [96]:
# Show all relevant FITS file data

df

Unnamed: 0,Target ID,Plate ID,MJD,OBSVHELIO (km/s)
0,apo1m.1.Bestars.,Bestars,58017,-523.166
1,apo1m.1.Bestars.,Bestars,58022,-526.084
2,apo1m.1.calibration.VESTA,calibration,56398,25.5533
3,apo1m.1.calibration.VESTA,calibration,56778,10.5618
4,apo1m.1.calibration.,calibration,57743,-36.2044
...,...,...,...,...
1778787,apo25m.4424.116-04.2M23595886+5726058,5583,56261,-60.9357
1778788,apo25m.4424.116-04.2M23595886+5726058,5583,56284,-60.901
1778789,apo25m.4548.105-45.2M23595980+1528407,6560,56584,-5.20138
1778790,apo25m.4548.105-45.2M23595980+1528407,6560,56588,-5.03455


In [55]:
# Get list of unique plates

unique_plates = []

for i in Counter(df['Plate ID']):
    if i not in unique_plates:
        unique_plates.append(i)
        
# print("Number of unique plates:", len(unique_plates))

Number of unique plates: 2383


In [67]:
# Get mode number of visits per target on each plate

nvisits_mode = []
# filtered_plates = []

for p in unique_plates:
    dff = df[df['Plate ID'] == p]
    count_dict = Counter(dff['Target ID'])
    mode_visits = stat.mode(count_dict.values())
    nvisits_mode.append(mode_visits)
#    if mode_visits > 19:
#        filtered_plates.append(p)

In [71]:
# Create pandas df of mode number of visits per target on each plate

visitcount_data = [unique_plates, nvisits_mode]
visitcount_df = pd.DataFrame(visitcount_data).transpose()
visitcount_df.columns = ['Plate ID', 'Mode Visit Count/Target']

In [93]:
# Show plates with more than 10 visits per target

visitcount_df[visitcount_df['Mode Visit Count/Target'] > 10] #['Mode Visit Count/Target'].sort_values(ascending=False)

Unnamed: 0,Plate ID,Mode Visit Count/Target
70,9518,11
154,9244,11
234,9290,16
634,8907,14
1269,5631,11
1372,8112,12
1981,9860,12


In [97]:
# Create pandas df for plate 9290

plate9290_df = df[df['Plate ID'] == '9290']
plate9290_df

Unnamed: 0,Target ID,Plate ID,MJD,OBSVHELIO (km/s)
164071,apo25m.5226.150-08-RV.2M03252400+4614203,9290,57706,-86.5657
164072,apo25m.5226.150-08-RV.2M03252400+4614203,9290,57732,-86.3834
164073,apo25m.5226.150-08-RV.2M03252400+4614203,9290,57734,-86.3869
164074,apo25m.5226.150-08-RV.2M03252400+4614203,9290,57735,-86.5328
164075,apo25m.5226.150-08-RV.2M03252400+4614203,9290,57760,-86.3666
...,...,...,...,...
192942,apo25m.5226.150-08-RV.2M03415658+4626067,9290,58068,-26.4901
192943,apo25m.5226.150-08-RV.2M03415658+4626067,9290,58085,-26.5953
192944,apo25m.5226.150-08-RV.2M03415658+4626067,9290,58087,-26.604
192945,apo25m.5226.150-08-RV.2M03415658+4626067,9290,58114,-26.5992


In [98]:
# No longer relevant - setup for pandas df of visit count per target

# target_count = Counter(df['Target ID'])
# target_count = Counter(all_targets)

# filtered_targets = []
# filtered_targets_count = []

# for tc in target_count:
#     if target_count[tc] > 19:
#         filtered_targets.append(tc)
#         filtered_targets_count.append(target_count[tc])
        
# filtered_targets_data = [filtered_targets, filtered_targets_count]
# targets_df = pd.DataFrame(filtered_targets_data).transpose()
# targets_df.columns = ['Target ID', 'Visit Count']

# targets_df

# df[df['Target ID'] == 'apo25m.4230.M15.2M21342357+1215247']