## to do:
- work out how to show number of visits per target for a plate
- sort plates by number of visits (ideally N > 20)
- make plate data into pandas df
- run pca to analyse the N RV measurements of those ~200 stars together

## questions:
- what does 'bestars' represent?
- what is observed spectrum template matching?
- how is field location id determined?
- what does tmass-style object name mean?

In [1]:
import numpy as np
from matplotlib import pyplot as plt
from astropy.io import fits
import fitsio
import statistics as stat
import pandas as pd

## using fitsio
### figuring out plate id data

In [2]:
# Read in fits file data using fitsio

data = fitsio.read('allVisit-r12-l33.fits')

In [3]:
# Define variables for working with PLATE data

plates = data['PLATE']
plateslist = list(plates) #[5:]) # only numerical plate ids

print(plates.size) # Same as total number of visits

1778792


In [4]:
# Strip whitespace in plates list

for i, s in enumerate(plateslist):
    plateslist[i] = s.strip()

print(plateslist[:7]) # Check if strip was successful

['Bestars', 'Bestars', 'calibration', 'calibration', 'calibration', '7545', '7545']


In [5]:
# Plate with greatest number of visits

stat.multimode(plateslist)

['9290']

In [6]:
# Make list of unique plates

unique_plates = []
total_up = 0

for i in plateslist:
    if i not in unique_plates:
        total_up = total_up + 1
        unique_plates.append(i)

print('Number of unique plates:', total_up)

Number of unique plates: 2383


In [7]:
# Count the times that each plate appears; i.e. visits per plate

visit_count = []
for u in unique_plates:
    nvisits = plateslist.count(u)
    visit_count.append(nvisits)

In [8]:
# Create pandas df for visits per plate

plates_mdlist = [unique_plates, visit_count]

df = pd.DataFrame(plates_mdlist).transpose()
df.columns = ['Plate ID', 'Visit Count']

print(df)

         Plate ID Visit Count
0         Bestars          48
1     calibration         237
2            7545         792
3            7917         500
4            5583         794
...           ...         ...
2378         9260         794
2379         5582         794
2380         7540         792
2381        11039         265
2382         8655        1056

[2383 rows x 2 columns]


In [9]:
# Figure out number of unique visits per unique target per plate!

targetlist = list(data['TARGET_ID'])

In [10]:
unique_targets = []
total_ut = 0

for t in targetlist:
    if t not in unique_targets:
        total_ut = total_ut + 1
        unique_targets.append(t)
        
print('Number of unique targets:', total_ut)

KeyboardInterrupt: 

In [None]:
target_visit_count = []
for c in unique_targets:
    nvisits_t = targetlist.count(c)
    target_visit_count.append(nvisits_t)

## using astropy
### figuring out rv data
- Want to pick plate which has >20 visits per target!

In [None]:
# HDU stuff

hdulist = fits.open('allVisit-r12-l33.fits')

hdulist.info()

header = hdulist[1].header
data = hdulist[1].data

hdulist.close()

In [None]:
# Set up data parameters

allplates = list(data['PLATE'])

for i, s in enumerate(allplates):
    allplates[i] = s.strip() # Remove whitespace in plate ids

allmjd = list(data['MJD'])
alltargets = list(data['TARGET_ID'])
allrvs = list(data['OBSVHELIO']) # Heliocentric relative RV from 'observed spectrum template matching'

In [None]:
# Create pandas df

alldata = [allplates, allmjd, alltargets, allrvs]

df = pd.DataFrame(alldata).transpose()
df.columns = ['Plate ID', 'MJD', 'Target ID', 'OBSVHELIO (km/s)']

print(df.to_markdown())

In [None]:
print(df['Target ID'].value_counts())

In [None]:
df['Target ID'].describe()

In [None]:
df.groupby('Target ID')

In [None]:
# Show data only if target was visited 20+ times

dff = df[df['Target ID'].value_counts() > 20]

In [None]:
print(dff)

In [None]:
dff['MJD'].median()