# Data Review
A set of plots intended to give a quick look at a set of RDP and ADS-B data to review and ensure it is as expected, or highlight potential issues.

1. RDP General 
    1. Plot Frequency: bucketed count of plots over the full range of the dataset
    1. Scatter Plot: radial scatter plot of the full dataset, color coded by radial velocity, if CAT 48. Gets really busy for long datasets
    1. Range-Azimuth: pretty much the radial scatter plot unwound to an x-y plot with azimuth on the x and range as y
    1. SNR, CLM, PAM combo plot - not sure what to do with this yet
1. ADS-B General 
    1. ADS-B: radial scatter plot of ADS-B data
    1. ADS-B: altitude histogram
1. Accuracy - match RDP to ADS-B and use it as a proxy for truth data to calculate Pd, range and azimuth accuracy
    1. Pd heat-map by range and azimuth - create cells and calculate p
    1. Range accuracy - heat-map by range and azimuth
    1. Azimuth accuracy - heat-map by range and azimuth

In [None]:
# add tracking functions
from importlib import reload  # Python 3.4+
import pandas as pd
import numpy as np
from statsmodels.nonparametric.smoothers_lowess import lowess

import sys
import os
import datetime
# sys.path.append('C:/Users/ttrinter/git_repo/tracking')
sys.path.append('C:/Users/ttrinter/git_repo/cspeed/data_common')

import visualizations as v

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

sys.path.append("../")
import data_functions as dfunc
import plot_analysis_fcns as paf
import file_fcns as ffunc

save_plots=True
METERS_in_NM = 1852


# # Travis
# location = 'Travis'
# test_date = datetime.datetime(2024,6,20)
# flight_path = '2b'
# sortie_id=49
# save_plots=True

# # Rio
# location = 'Rio_Vista'
# test_date = datetime.datetime(2024,6,20)
# flight_path = '2b'
# sortie_id=48

# YPG

#modes:
modes = {66: 'Mode 1',
         67: 'Mode 3', 
         68: 'Mode B',
         69: 'Mode F',
         70: 'Mode 3T', 
         71: 'Mode F-Repeat'}

test_date = datetime.datetime(2024,7,20)
location = 'YPG'
sortie_id=71
flight_path = modes[sortie_id]
save_plots=True

short_date = test_date.strftime("%Y%m%d")
long_date = test_date.strftime('%m/%d/%Y')
query_date = test_date.strftime('%Y-%m-%d')
name_base = f'{location}_{short_date}_{flight_path}'

if sortie_id in [64,65]:
    name_base = f'{name_base}_reprocessed'

data_dir = f'C:/Users/ttrinter/OneDrive - cspeed.com (1)/Documents/Data/{location}/{query_date}_{sortie_id}'

ffunc.check_create_folder(data_dir)

## Get Data

In [None]:
# MySQL
# import MySQLdb
# sql_host= 'localhost'
# sql_user='root'
# sql_db='flight_tests'
# sql_pwd='CSpeedMySQL'

In [None]:
where_clause = f" WHERE sortie_id={sortie_id} AND test_date = '{test_date}' "
start_timestamp = str(test_date.date())
end_timestamp = str(test_date.date()+datetime.timedelta(days=1))

# where_clause = f"{where_clause} AND timestamp between '{start_timestamp}' and '{end_timestamp}'"

# check for CSV first
file_w_path = f'{data_dir}/rdp_data_{sortie_id}.csv'
if os.path.exists(file_w_path):
    print(f'Reading from csv: {file_w_path}')
    rdp_data = pd.read_csv(file_w_path)
    rdp_data['timestamp'] = pd.to_datetime(rdp_data['timestamp'])
else:
    print('Reading from BigQuery')
    # Read from BigQuery 
    rdp_data = dfunc.get_db_table_data(where_clause, 'radar_data.rdp')

# Read from MySQL
# db=MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_pwd, db=sql_db)
# query = f"SELECT * FROM rdp {where_clause}"

# rdp_data = pd.read_sql(query, con=db)
#fix dates
# for date_field in ['test_date','timestamp']:
#     rdp_data[date_field] = pd.to_datetime(rdp_data[date_field])

print(f'Sortie id: {sortie_id}')
print(dfunc.data_summary(rdp_data))

In [None]:
rdp_data_towrite = ffunc.df_strip_timezone(rdp_data)
rdp_data_towrite.to_csv(f'{data_dir}/rdp_data_{sortie_id}.csv', index=False)
del rdp_data_towrite

In [None]:
#filter to flight test time only
# rdp_data = rdp_data.loc[(rdp_data.timestamp>'2024-05-06 20:12:00') & (rdp_data.timestamp<'2024-05-06 21:15:00') ]

In [None]:
farmicide_df = rdp_data.loc[rdp_data.field_note==1]
tfp_data = rdp_data.loc[~rdp_data.track_no_artas.isna()]

if len(tfp_data)>0:
    tfp = True
else:
    tfp = False
    
len(farmicide_df)

In [None]:
if len(farmicide_df)>0:
    save_name = f'{data_dir}/{name_base}_farmicide.png'
    sup_title = f'{location} Farmicide\n{long_date}'
    v.scatter_targets(farmicide_df, 
                      sup_title=sup_title,
                      save_plot=True, 
                      save_name=save_name)

In [None]:
# Remove non-real plots
if 'field_note' in rdp_data.columns.to_list():
    rdp_data.field_note.fillna(0, inplace=True)
    rdp_data = rdp_data.loc[rdp_data.field_note==0]

len(rdp_data)

In [None]:
# get position data
pos_data = dfunc.get_db_table_data(where_clause, 'radar_data.positions')
pos_data = ffunc.df_strip_timezone(pos_data)
pos_data.head()

In [None]:
# rdp_data.to_csv(f'{data_dir}/rio_vista_rdp_20240619.csv', index=False)

## RDP Plots
### 1.1 Plot Frequency

In [None]:
# Add pass_no
rdp_data = dfunc.add_pass_no(rdp_data, pos_data )
rdp_data.pass_no.value_counts()

In [None]:
# reload(v)
save_name = f'{data_dir}/{name_base}_plot_freq.png'
this_plot = v.plot_frequency(rdp_data, 
                            # plot_title=f'Plot Frequency - Test {window_sec} Second Windows\n{name_base}',
                            # window_sec = window_sec, 
                            plot_title=f'Plot Frequency - Per Pass\n{name_base}',
                            per_pass=True,
                            save_plot=save_plots,
                            save_name=save_name)

In [None]:
# Write to MySQL for local use
# from sqlalchemy import create_engine
# engine = create_engine(f'mysql+pymysql://root:CSpeedMySQL@localhost/flight_tests')
# cnx = engine.connect()
# # flight_tests_mysql=MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_pwd, db='flight_tests')

# rdp_data.iloc[:,0:21].to_sql(name='rdp', con=cnx, if_exists='append', chunksize=1000)
# cnx.commit()

### 1.2 RDP Scatter Plot

In [None]:
# reload(v)
save_name = f'{data_dir}/{name_base}_rdp_polar.png'
v.scatter_targets(rdp_data, 
                 plot_alpha=0.5, 
                 marker="+", 
                 sup_title = f"RDP Scatter Plot\n{name_base}",
                 save_plot=save_plots,
                 save_name=save_name)

In [None]:
reload(v)
plot_title = f'Radial Velocity vs. Time\n{flight_path} {query_date}'
save_name = f'{data_dir}/doppler_time.png'
v.rv_time_scatter(rdp_data, 
                    plot_title=plot_title, 
                    plot_alpha=0.2, 
                    figsize=(14,6),
                    save_plot=save_plots, 
                    plot_color='navy',
                    save_name="doppler_time.png")

In [None]:
data_dir

In [None]:
# reload(v)
if location != 'YPG':
    plot_data = rdp_data.loc[rdp_data.rho < 12]
    save_name = f'{data_dir}/{name_base}_rdp_polar_wra.png'
    v.scatter_targets(plot_data, 
                    plot_alpha=0.5, 
                    marker="+", 
                    sup_title = f"RDP Scatter Plot WRA\n{name_base}",
                    save_plot=save_plots,
                    save_name=save_name)

In [None]:
# reload(v)
if location != 'YPG':
    tfp_data = plot_data.loc[~plot_data.track_no_artas.isna()]
    save_name = f'{data_dir}/{name_base}_tfp_polar.png'
    v.scatter_targets(tfp_data, 
                    plot_alpha=0.5, 
                    marker="+", 
                    sup_title = f"TFP Scatter Plot WRA\n{name_base}",
                    save_plot=save_plots,
                    save_name=save_name)

### 1.3 Range-Azimuth Scatter

In [None]:
# reload(v)
save_name = f'{data_dir}/{name_base}_range_azimuth_scatter.png'
v.range_azimuth_scatter(rdp_data, 
                        plot_title=f"Range-Azimuth Scatter\n{name_base}", 
                        plot_alpha=0.3, 
                        rv_max=100, 
                        figsize=(12,6), 
                        save_plot=save_plots, 
                        save_name=save_name)

In [None]:
rdp_data.cal.apply(abs).mean()

### 1.4 SNR-CLM

In [None]:
# reload(v)
if len(rdp_data.snr.value_counts()) > 0:
    save_name = f'{data_dir}/{name_base}_snr_clm_pam.png'
    v.snr_clm_hist(rdp_data, 
                    max_snr=100, 
                    plot_title = f"SNR-CLM-PAM\n{name_base}",
                    save_plot=save_plots, 
                    save_name=save_name)

## CLM, SNR vs. Range

Compare these to E-City

In [None]:
# sql_txt = f"""SELECT time_of_day, `timestamp`, sortie_id, pam, rho
# FROM flight_tests.rdp
# WHERE sortie_id = {sortie_id}
# AND test_date = '{query_date}'"""

# pam_data = dfunc.query_to_df(sql_txt)
# pam_data.head()

In [None]:
# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# plt.suptitle('PAM vs Range')

# 'E-City'
# ax1 = plt.subplot(121)
# ax1.scatter(data=pam_data.loc[pam_data.pam < 150],
#         x='rho',
#         y='pam', 
#         marker='.', 
#         alpha = 0.1, 
#         color='orange')

# ax1.legend()
# ax1.grid(True)
# ax1.set_title('E-City 04/24/2024')

# ax1.set_xlabel("Range (NM)")
# ax1.set_ylabel("PAM")

# 'YPG'
# ax2 = plt.subplot(122)
# ax2.scatter(data=rdp_data.loc[rdp_data.pam < 150],
#         x='rho',
#         y='pam', 
#         marker='.', 
#         alpha = 0.1, 
#         color='orange')

# ax2.legend()
# ax2.grid(True)
# ax2.set_title(f'YPG {long_date} ')

# ax2.set_xlabel("Range (NM)")
# ax2.set_ylabel("PAM")

# if save_plots == True:
#         save_name = f'{data_dir}/{name_base}_pam_range.png'
#         plt.savefig(save_name)


In [None]:
# fig, ax = plt.subplots(figsize = [10,6])
# ax.scatter(data=rdp_data,
#         x='rho',
#         y='snr', 
#         marker='.', 
#         alpha = 0.2)

# ax.legend()
# ax.grid(True)
# ax.set_title(f'YPG: SNR vs. Range/n{long_date}')

# plt.xlabel("Range (NM)")
# plt.ylabel("SNR")

# if save_plots == True:
#         save_name = f'{data_dir}/{name_base}_snr_range.png'
#         plt.savefig(save_name)


## ADS-B General 
1. ADS-B: radial scatter plot of ADS-B data
1. ADS-B: altitude histogram

In [None]:
# reload(dfunc)
# ADS-B Data
if sortie_id == 65:
    top_sortie = 29
elif sortie_id == 64:
    top_sortie = 32
else:
    top_sortie=sortie_id

max_rho = 100 # only instrumented to 100 NM
min_rho = 0 # looking at plots, it appears that the cone of silence below the aerostat results in minimal or no plots within 10NM

if location == 'Travis':
    max_fl = 300 # reviewing targets below 30,000 feet
    min_theta = 68
    max_theta = 360

elif location == 'Rio_Vista':
    max_fl = 300 # reviewing targets below 30,000 feet
    min_theta = 80
    max_theta = 280

elif location == 'YPG':
    max_fl = 70 # reviewing targets below 10,000 feet
    min_theta = 0
    max_theta = 361
    # top_sortie=32

rdp_start = rdp_data.timestamp.min().strftime("%Y-%m-%d %H:%M:%S")
rdp_end = rdp_data.timestamp.max().strftime("%Y-%m-%d %H:%M:%S")

# Get Data from BigQuery
where_clause = f" WHERE sortie_id={top_sortie} AND `timestamp` BETWEEN '{rdp_start}' AND '{rdp_end}'"
where_clause = f'{where_clause} AND flight_level <= {max_fl} AND rho BETWEEN {min_rho} AND {max_rho}'
where_clause = f'{where_clause} AND (theta BETWEEN {min_theta} AND {max_theta})'

# From BigQuery
adsb_data = dfunc.get_db_table_data(where_clause, 'radar_data.adsb')
adsb_data = ffunc.df_strip_timezone(adsb_data)

# From CSV
# adsb_data = pd.read_csv(f'{file_dir}/adsb/202405071605_adsb.csv')
#fix dates
# for date_field in ['test_date','timestamp']:
#     adsb_data[date_field] = pd.to_datetime(adsb_data[date_field])

# From MySQL
# db=MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_pwd, db=sql_db)
# query = f"SELECT * FROM adsb {where_clause}"
# adsb_data = pd.read_sql(query, con=db)

adsb_data = adsb_data.loc[~adsb_data.longitude.isna()]
adsb_data.drop_duplicates(inplace=True)

# #Slant Adjustment
adsb_data = dfunc.adsb_slant_adjust(adsb_data, radar_fl=0)
adsb_data['slant_adj'] = adsb_data['rho'] - adsb_data['rho_orig']
adsb_data['x'], adsb_data['y'] = zip(*adsb_data[['rho','theta']].apply(lambda p: dfunc.polar_to_cartesian(p.rho, p.theta), axis=1))

# add radial velocity - unless it's already saved to the DB, post code-change
adsb_data = dfunc.add_radial_velocity(adsb_data, window_size = 5)
adsb_data = dfunc.add_pass_no(adsb_data, pos_data )
adsb_data.sort_values('timestamp', inplace=True)

print(dfunc.data_summary(adsb_data, 'adsb'))

In [None]:
# adsb_data.head()
# Write to MySQL for local use
# from sqlalchemy import create_engine
# engine = create_engine(f'mysql+pymysql://root:CSpeedMySQL@localhost/flight_tests')
# cnx = engine.connect()
# flight_tests_mysql=MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_pwd, db='flight_tests')

# adsb_data.to_sql(name='adsb', con=cnx, if_exists='append', chunksize=1000)
# cnx.commit()

### 2.1 ADS-B Scatter Plot

In [None]:
# reload(v)
save_name = f'{data_dir}/{name_base}_adsb_scatter.png'
plot_title = f'ADS-B {name_base}\nflight_level<={max_fl} and range {min_rho}-{max_rho} NM'
v.adsb_altitude(adsb_data, plot_title=plot_title, save_plot=False, save_name=save_name)

In [None]:
vc = adsb_data.target_address.value_counts()
vc[vc>1].head()

if location=='YPG':
    test_plane = 1
elif location in ['Travis','Rio_Vista']:
    test_plane = 10865476
else:
    test_plane = vc.keys()[0]                                                                                

# test_plane = vc.keys()[0]                                                                                

test_adsb = adsb_data.loc[adsb_data.target_address==test_plane]

# One Target
test_adsb = adsb_data.loc[adsb_data.target_address==test_plane]
# test_adsb.reset_index(inplace=True, drop=True)
plot_title = f'ADS-B {name_base}\nTarget Address: {hex(test_plane)[2:]}'
plot_title = f"Test Plane at {location}"
v.adsb_altitude(test_adsb,plot_title=plot_title, save_plot=True, save_name=f'{data_dir}/{location}_test_plane_adsb.png')

### Split out Flight Paths

In [None]:
test_adsb.head()

In [None]:
if (location != 'YPG') & (test_date==datetime.datetime(2024,7,9)):
    # Flight Path 2a
    fp = '3a'
    if fp=='2a':
        fp_start = '2024-07-09 16:42:000'
        fp_end = '2024-07-09 17:20:000'
    elif fp=='3a':
        fp_start = '2024-07-09 17:25:000'
        fp_end = '2024-07-09 18:04:000'
    elif fp=='3c':
        fp_start = '2024-07-09 18:24:000'
        fp_end = '2024-07-09 19:08:000'

    fp_adsb = test_adsb.loc[(test_adsb.timestamp>fp_start) & 
                    (test_adsb.timestamp<fp_end)]
    plot_title = f'Flight Path {fp}\n{location}'
    v.adsb_altitude(fp_adsb,plot_title=plot_title, save_plot=True, save_name=f'{data_dir}/{location}_{fp}_{location}.png')

In [None]:
# reload(v)
save_name = f'{data_dir}/{name_base}_adsb_fl_histogram.png'
plot_title = f'ADS-B Flight Level Histogram\n{name_base}'
v.adsb_flight_level_hist(adsb_data, plot_title=f'{name_base}_flight_level_histogram', figsize=(12,6), save_plot=save_plots, save_name=save_name)

## RDP : ADS-B Matching

In [None]:
adsb_data.loc[adsb_data.target_address==1, 'timestamp'] = adsb_data.loc[adsb_data.target_address==1, 'timestamp'] + datetime.timedelta(seconds=-82.5)

In [None]:
reload(paf)
# test_adsb.reset_index(inplace=True, drop=True)
# dist_cols = ['time_of_day','x','y', 'cal']
dist_cols = ['time_of_day','x','y']

# all targets
rdp_feat = rdp_data.dropna(subset=dist_cols)
adsb_feat = adsb_data.dropna(subset=dist_cols)
test_feat = test_adsb.dropna(subset=dist_cols)

radar_passes = paf.passes_from_rdp(rdp_data)

test_results_rdp = paf.rdp_adsb_pd_analysis(rdp_feat, 
                                        adsb_feat,  
                                        radar_passes,
                                        dist_cols=dist_cols,
                                        out_dir=data_dir, 
                                        file_prefix=f"{name_base}_")

overall_pd = test_results_rdp['overall_pd']
print(f'Overall Pd = {overall_pd}')

In [None]:
range_gate=1
az_gate=3
# reload(paf)

matched_passes_rdp = test_results_rdp['matched_passes_pd']
these_rslts = paf.eval_accuracy(matched_passes_rdp, range_gate=range_gate, az_gate=az_gate)
rslt_row = pd.DataFrame({'match_count': len(matched_passes_rdp), 
        'range_gate': range_gate,
        'az_gate': az_gate, 
        'overall_pd': these_rslts['overall_pd'], 
        'range_error_mean': these_rslts['range_error_mean'],
        'range_error_sd' : these_rslts['range_error_sd'],
        'az_error_mean' : these_rslts['az_error_mean'],
        'az_error_sd' : these_rslts['az_error_sd']}, 
        index=[0])

these_rslts['result_row'] = rslt_row

# matched_passes_pd = ff.df_strip_timezone(matched_passes_pd)
# matched_passes_pd.to_excel(f'matched_passes_{short_date}.xlsx', index=False)
these_rslts['result_row'].to_csv(f'{data_dir}/radar_performance_results_{sortie_id}.csv')

target_pd = test_results_rdp['target_pd']

#save matched passes
matched_filename = f'{data_dir}/{short_date}_{location}_matched_rdp_{sortie_id}.xlsx'
save_matched = ffunc.df_strip_timezone(matched_passes_rdp)
save_matched.to_excel(matched_filename, index=False)

len(matched_passes_rdp)

In [None]:
matched_passes_rdp.loc[abs(matched_passes_rdp.az_error)<az_gate, 'az_error'].hist(bins=100)
plt.title('RDP Azimuth Error (deg)')
az_error_filename = f'{data_dir}/{short_date}_{location}_azimuth_error.png'

plt.savefig(az_error_filename)

In [None]:
range_gate = 200/METERS_in_NM
matched_passes_rdp['range_error_m'] = matched_passes_rdp['range_error']*METERS_in_NM
matched_passes_rdp.loc[abs(matched_passes_rdp.range_error)<range_gate, 'range_error_m'].hist(bins=100)
plt.title('RDP Range Error (m)')
range_error_filename = f'{data_dir}/{short_date}_{location}_range_error.png'

plt.savefig(range_error_filename)

In [None]:
rslt_row

In [None]:
range_error_m = rslt_row['range_error_mean'][0] * METERS_in_NM
range_std_m = rslt_row['range_error_sd'][0] * METERS_in_NM

print(f'Mean Range Error (m): {range_error_m:.2f}, SD: {range_std_m:.2f}')


## Target A/C

In [None]:
data_dir

In [None]:
# update gates to match DevCom
range_gate = 200/METERS_in_NM
az_gate = 3

matched_passes_rdp['close_enough']=0
matched_passes_rdp.loc[(abs(matched_passes_rdp.range_error)<range_gate) & 
                       (abs(matched_passes_rdp.az_error)<az_gate), 'close_enough'] = 1

In [None]:
matched_passes_rdp.close_enough.value_counts()

In [None]:
reload(v)
plot_title = f'{location} Test Plane Matched\nMode: {flight_path}'
v.plot_target_match(rdp_feat, 
                    # test_feat,
                    adsb_feat,
                    matched_passes_rdp, 
                    target_address=test_plane, 
                    dist_tol=1, 
                    figsize=(8,5), 
                    include_noise=False,
                    include_pd = False,
                    include_rv=False, 
                    # plot_type='polar',
                    save_plot=True, 
                    # rv_min=100,
                    plot_title=plot_title, 
                    save_name=f'{data_dir}/test_plane_matched.png')

# TRACK FILTERED
Redoing the analysis with Track Filtered Plots only for comparison...

In [None]:
if tfp==True:
    # dist_cols = ['time_of_day','x','y', 'cal']
    dist_cols = ['time_of_day','x','y']

    # all targets
    tfp_feat = tfp_data.dropna(subset=dist_cols)
    adsb_feat = adsb_data.dropna(subset=dist_cols)
    test_feat = test_adsb.dropna(subset=dist_cols)

    radar_passes = paf.passes_from_rdp(tfp_data)

    test_results_tfp = paf.rdp_adsb_pd_analysis(tfp_feat,
                                                adsb_feat, 
                                                radar_passes,
                                                dist_cols=dist_cols,
                                                out_dir=data_dir,
                                                file_prefix=f"{name_base}_")

    overall_pd = test_results_tfp['overall_pd']
    print(f'Overall Pd = {overall_pd}')

In [None]:
if tfp==True:
        range_gate=1
        az_gate=3
        # reload(paf)

        matched_passes_tfp = test_results_tfp['matched_passes_pd']
        these_rslts = paf.eval_accuracy(matched_passes_tfp, range_gate=1, az_gate=3)
        rslt_row = pd.DataFrame({'match_count': len(matched_passes_tfp), 
                'range_gate': range_gate,
                'az_gate': az_gate, 
                'overall_pd': these_rslts['overall_pd'], 
                'range_error_mean': these_rslts['range_error_mean'],
                'range_error_sd' : these_rslts['range_error_sd'],
                'az_error_mean' : these_rslts['az_error_mean'],
                'az_error_sd' : these_rslts['az_error_sd']}, 
                index=[0])

        these_rslts['result_row'] = rslt_row

        # matched_passes_pd = ff.df_strip_timezone(matched_passes_pd)
        # matched_passes_pd.to_excel(f'matched_passes_{short_date}.xlsx', index=False)
        these_rslts['result_row'].to_csv(f'{data_dir}/radar_performance_results.csv')

        target_pd = test_results_tfp['target_pd']

        #save matched passes
        matched_filename = f'{data_dir}/{short_date}_{location}_matched_tfp.xlsx'
        save_matched = ffunc.df_strip_timezone(matched_passes_tfp)
        save_matched.to_excel(matched_filename, index=False)

        len(matched_passes_tfp)

In [None]:
rslt_row

In [None]:
# # test_plane_address = 10592485 # A1A0E5 - NOT TEST Plane
# test_plane_address = 11015472 #A81530 - TEST PLANE
plot_title = f'{location} Test Plane Matched\nFlight Path: {flight_path} TFP'
if tfp==True:
    v.plot_target_match(tfp_feat, 
                    # test_feat,
                    adsb_feat,
                    matched_passes_tfp, 
                    target_address=test_plane, 
                    dist_tol=1, 
                    figsize=(10,8), 
                    include_noise=True,
                    include_pd = True, 
                    # plot_type='polar',
                    save_plot=True, 
                    # rv_min=100,
                    plot_title=plot_title,
                    save_name=f'{data_dir}/test_plane_matched_tfp.png')

In [None]:
# matched_to_excel = ffunc.df_strip_timezone(matched_passes_pd.loc[matched_passes_pd.target_address==test_plane])
# matched_to_excel.to_excel(f'{data_dir}/test_plane_passesCAL.xlsx', index=False)

## Mosaics - Pd, Accuracy by Range, Altitude and Azimuth
If we have enough datapoints, we can bucket the results by different factors and evaluate performance within each bucket to identify strong and weak spots.

### Pd

In [None]:
theta_bins = np.linspace(0,360,37)
theta_labels = [int(x) for x in theta_bins[1:]]
matched_passes_rdp['theta_bin'] = pd.cut(matched_passes_rdp.theta_rdp,
                                        # bins=[0,90, 180, 270, 360],
                                        bins = theta_bins, 
                                        labels=theta_labels)
                                        # labels=[1,2,3,4])
# matched_passes_pd.theta_bin = matched_passes_pd.theta_bin.astype('int')                                        

matched_passes_rdp['flight_level'] = pd.cut(matched_passes_rdp.flight_level,
                                           bins=np.linspace(0,100,11), 
                                           labels = [int(x) for x in np.linspace(10,100,10)])
# matched_passes_pd.flight_level = matched_passes_pd.flight_level.astype('int')                                        

matched_passes_rdp['rho_bin'] = pd.cut(matched_passes_rdp.rho_rdp,
                                           bins=np.linspace(0,100,20), 
                                           labels = [int(x) for x in np.linspace(10,100,19)])
# matched_passes_pd.rho_bin = matched_passes_pd.rho_bin.astype('int')                                        

In [None]:
pd_mosaic = matched_passes_rdp[['theta_bin',
                                'rho_bin',
                                'close_enough']].groupby(['theta_bin','rho_bin']).agg(['count','sum'])
pd_mosaic.head()

In [None]:
pd_df = pd_mosaic['close_enough'].unstack()[['count','sum']].stack().reset_index()
pd_df['pd'] = pd_df['sum'] / pd_df['count']
pd_df.head()

In [None]:
pd_data = pd_df.pivot(index='rho_bin',columns='theta_bin', values='pd')
pd_data.sort_index(level=0, ascending=False, inplace=True)
# pd_data.head()
# cmap = sns.diverging_palette(h_neg=0, h_pos=129, s=74, l=44)

fig, ax1 = plt.subplots(figsize=(12, 5))

sns.color_palette('RdYlGn')
sns.heatmap(pd_data, cmap='RdYlGn', ax=ax1)
ax1.set_title('PD')
# ax1.grid(color='gray', linestyle='dotted', linewidth=1)

if save_plots == True:
    save_name = f'{data_dir}\pd_mosaic.png'
    fig.savefig(save_name)



In [None]:
az_error_max = 5
range_error_max = 0.5

accuracy_mosaic = matched_passes_rdp.loc[(matched_passes_rdp.close_enough==1) & 
                                        (matched_passes_rdp.az_error<az_error_max) & 
                                        (matched_passes_rdp.range_error < range_error_max), 
                                        ['theta_bin',
                                        'rho_bin',
                                        'range_error',
                                        'az_error',
                                        'close_enough']] \
                                    .groupby(['theta_bin','rho_bin']) \
                                    .agg(['count','mean','std'])

az_df = accuracy_mosaic['az_error'].unstack()[['mean']].stack().reset_index()
az_df.rename(columns={'mean':'az_error_mean_deg'}, inplace=True)
az_df.head()

In [None]:
az_data = az_df.pivot(index='rho_bin',columns='theta_bin', values='az_error_mean_deg')
az_data.sort_index(level=0, ascending=False, inplace=True)
avg_az_error = az_df.az_error_mean_deg.mean()

# pd_data.head()
# cmap = sns.diverging_palette(h_neg=0, h_pos=129, s=74, l=44)

fig, ax1 = plt.subplots(figsize=(12, 5))

sns.heatmap(az_data, cmap='RdYlGn_r', ax=ax1, cbar_kws={'label': 'Azimuth Error in Degrees'})
ax1.set_title(f"Azimuth Error\nAvg error: {avg_az_error:.2f} degrees")

save_name = f'{data_dir}/az_error_mosaic.png'
fig.savefig(save_name)


## Range Accuracy

In [None]:
range_df = accuracy_mosaic['range_error'].unstack()[['mean']].stack().reset_index()
range_df.rename(columns={'mean':'range_error_mean_nm'}, inplace=True)
range_df['range_error_mean_m'] = range_df['range_error_mean_nm'] / METERS_in_NM
range_df.head()

In [None]:
matched_passes_rdp['range_error_m'] = matched_passes_rdp['range_error']*METERS_in_NM
matched_passes_rdp.loc[abs(matched_passes_rdp.range_error<1), 'range_error_m'].hist(bins=100)
plt.title(f'RDP Range Error (m)\n{flight_path}')
az_error_filename = f'{data_dir}/{short_date}_{location}_range_error.png'


plt.savefig(az_error_filename)

In [None]:
range_data = range_df.pivot(index='rho_bin',columns='theta_bin', values='range_error_mean_m')
range_data.sort_index(level=0, ascending=False, inplace=True)
avg_range_error = range_df.range_error_mean_m.mean()

# pd_data.head()
# cmap = sns.diverging_palette(h_neg=0, h_pos=129, s=74, l=44)

fig, ax1 = plt.subplots(figsize=(12, 5))

sns.heatmap(range_data, cmap='RdYlGn_r', ax=ax1)
ax1.set_title(f'Range Error\nAvg error: {avg_range_error:.2f} meters')

save_name = f'{data_dir}/range_error_mosaic.png'
fig.savefig(save_name)


## Radial Velocity
We're seeing some apparently significant differences in average radial velocity between the 1st and 3rd quadrants. Let's take a look at average radial velocity by degree.

In [None]:
rv_data = rdp_data[['timestamp','rho','theta','cal']].copy()
rv_data = rv_data.assign(degree=round(rv_data.theta, 0), 
               rv = abs(rv_data.cal))

rv_data_deg = rv_data[['degree','rv']].groupby('degree').agg(['mean','size'])
rv_data_deg = rv_data_deg['rv'].reset_index()

In [None]:
rv_data_deg

In [None]:
# Create the scatter plot
fig, ax1 = plt.subplots()
ax1.scatter(data=rv_data_deg, x='degree', y='mean', s=2, color='purple')

# Fit a LOESS model
lowess_result = lowess(rv_data_deg['mean'], rv_data_deg['degree'], frac=0.3)

# Extract smoothed values and confidence intervals
lowess_x = lowess_result[:, 0]
lowess_y = lowess_result[:, 1]

# Add the LOESS curve to the plot
ax1.plot(lowess_x, lowess_y, color='blue', label='LOESS')

# To add confidence intervals, we need to estimate them
# Here is a simple way to calculate a 95% confidence interval using bootstrap

n_bootstraps = 1000
bootstrapped_y = np.zeros((n_bootstraps, len(lowess_x)))

for i in range(n_bootstraps):
    sample = rv_data_deg.sample(frac=1, replace=True)
    boot_lowess = lowess(sample['mean'], sample['degree'], frac=0.3)
    bootstrapped_y[i, :] = np.interp(lowess_x, boot_lowess[:, 0], boot_lowess[:, 1])

# Calculate confidence intervals
ci_lower = np.percentile(bootstrapped_y, 2.5, axis=0)
ci_upper = np.percentile(bootstrapped_y, 97.5, axis=0)

# Plot the confidence intervals
ax1.fill_between(lowess_x, ci_lower, ci_upper, color='blue', alpha=0.2, label='95% CI')

# Add labels and legend
ax1.set_title(f"Avg Radial Velocity by Azimuth\n{long_date}")
ax1.set_xlabel('Azimuth (deg)')
ax1.set_ylabel('Absolute Radial Velocity (m/s)')
ax1.grid()
ax1.legend()

save_name = f'{data_dir}/radial_velocity_by_azimuth.png'
fig.savefig(save_name)

plt.show()


In [None]:
# reload(v)
# v.plot_target_match(rdp_data, 
#                         test_adsb, 
#                         matched_passes_pd, 
#                         test_plane, 
#                         dist_tol=1, 
#                         figsize=(6,5), 
#                         include_noise=True,
#                         save_plot=save_plots, 
#                         save_name=save_name)

target_counts = matched_passes_rdp.target_address.value_counts()

target_folder = f'{data_dir}/targets'
ffunc.check_create_folder(target_folder)
for tgt_address in target_counts[target_counts>4].keys():
    save_name = f'{data_dir}/targets/target_matching_{hex(tgt_address)[2:]}_sortie{sortie_id}.png'
    # print(f'plotting {save_name}')
    v.plot_target_match(rdp_feat,
                        adsb_feat.loc[abs(adsb_feat.cal)<300], 
                        matched_passes_rdp, 
                        tgt_address, 
                        figsize=(10,8),
                        # plot_type='polar',
                        include_noise=True, 
                        include_pd = True, 
                        save_plot=save_plots, 
                        save_name=save_name)

In [None]:
# PD and Accuracy over the windfarm

# Rio
if location == 'Rio':
    rho_start = 3.7
    rho_end = 13
    az_start = 116
    az_end = 164

    # # Travis
    # rho_start = 1.8
    # rho_end = 9.9
    # az_start = 190
    # az_end = 273

    alt_min = 10
    alt_max = 200

    wra_matched = matched_passes_pd.loc[(matched_passes_pd.rho_adsb > rho_start) &
                                        (matched_passes_pd.rho_adsb < rho_end) &
                                        (matched_passes_pd.theta_adsb> az_start) & 
                                        (matched_passes_pd.theta_adsb< az_end) &
                                        (matched_passes_pd.alt_agl > alt_min) & 
                                        (matched_passes_pd.alt_agl < alt_max)]


    wra_results = paf.eval_accuracy(wra_matched, range_gate = 0.2, az_gate = 1)
    target_pd_df = wra_results['target_pd']
    target_pd_df.plot.scatter(x='target_address', y='pd')
    plt.grid()