# NSWC Drone Data Analysis

This notebook is intended for the analysis of the drone data provided by NSWC.

### Constants

In [68]:
READ_PATH = "../data/raw/crane/Marked_Radar_Data.csv"
WRITE_PATH = "../data/raw/crane/Hostile_Radar_Data.csv"

DROP_COLS = ['Name', 'Create Time', 'User Edit Time', 'Type', 'Associated UUID', 'Note', 'Source Name', 
             'Source Class', 'Source LID', 'Object ID','Range To Contact', 'Bearing To Contact', 'Creator', 
             'Editor', 'Origin Position (lat)', 'Origin Position (lon)', 'Origin Position (alt MSL)', 
             '2525', 'Radar Cross Section', 'Closest Time', 'Course', 'Source ID', 'Closest Distance', 
             'Deleted', 'Deleted Time', 'AIS MMSI', 'AIS IMO', 'AIS Call Sign', 'AIS Ship Type', 
             'AIS Destination', 'AIS ETA', 'Fused', 'Fused Tracks']

### Imports

In [34]:
import pandas as pd

### Helper Functions

In [55]:
def get_unknown_threat(group):
    return all(group["Combat ID"] == "UNKNOWN_THREAT")

def get_hostile(group):
    return any(group["Combat ID"] == "HOSTILE")

### Load the File

In [35]:
df = pd.read_csv(READ_PATH)
print(df.head(5))
print(df.tail(5))

                                   UUID    Name                  Create Time  \
0  7f5d87c3-f4e2-4025-8daf-9e9c0c5818dd  RADA-1  2023-11-15T13:41:08.577914Z   
1  7f5d87c3-f4e2-4025-8daf-9e9c0c5818dd  RADA-1  2023-11-15T13:41:08.577914Z   
2  7f5d87c3-f4e2-4025-8daf-9e9c0c5818dd  RADA-1  2023-11-15T13:41:08.577914Z   
3  7f5d87c3-f4e2-4025-8daf-9e9c0c5818dd  RADA-1  2023-11-15T13:41:08.577914Z   
4  7f5d87c3-f4e2-4025-8daf-9e9c0c5818dd  RADA-1  2023-11-15T13:41:08.577914Z   

                   Update Time User Edit Time   Type  \
0  2023-11-15T13:41:08.577914Z            NaN  TRACK   
1  2023-11-15T13:41:08.800179Z            NaN  TRACK   
2  2023-11-15T13:41:09.008504Z            NaN  TRACK   
3  2023-11-15T13:41:09.272647Z            NaN  TRACK   
4  2023-11-15T13:41:09.480139Z            NaN  TRACK   

                        Associated UUID  Note Source Name Source Class  ...  \
0  00000000-0000-0000-0000-000000000000   NaN   RIB-RADAR        RADAR  ...   
1  00000000-0000-0000-00

In [36]:
# drop unimportant columns
df.drop(columns=DROP_COLS, inplace=True)
print(f"Columns (after dropping): \n{df.columns}")

Columns (after dropping): 
Index(['UUID', 'Update Time', 'Combat ID', 'AZ', 'EL', 'Range',
       'Position (lat)', 'Position (lon)', 'Position (alt MSL)', 'Speed',
       'Radial Velocity'],
      dtype='object')


In [37]:
# drop the remaining NaN rows
df.dropna(axis="index", inplace=True)
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 42614 entries, 0 to 45366
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   UUID                42614 non-null  object 
 1   Update Time         42614 non-null  object 
 2   Combat ID           42614 non-null  object 
 3   AZ                  42614 non-null  float64
 4   EL                  42614 non-null  float64
 5   Range               42614 non-null  float64
 6   Position (lat)      42614 non-null  float64
 7   Position (lon)      42614 non-null  float64
 8   Position (alt MSL)  42614 non-null  float64
 9   Speed               42614 non-null  float64
 10  Radial Velocity     42614 non-null  float64
dtypes: float64(8), object(3)
memory usage: 3.9+ MB
None


In [59]:
# number of individual updates in the dataset
print(f"Total updates (rows): {df.shape[0]}")

# group the data on UUID to compute track statistics
grouped_df = df.groupby("UUID")

# total number of tracks
print(f"Total tracks: {len(grouped_df)}")

# number of tracks where every update is labeled "UNKNOWN_THREAT" (not drones)
unknown_threat = grouped_df.apply(get_unknown_threat)
unknown_threat_uuids = unknown_threat.index[unknown_threat]
print(f"\tUNKNOWN_THREAT tracks: {len(unknown_threat_uuids)}")

# number of tracks where at least one update is labeled "HOSTILE" (drones)
hostile = grouped_df.apply(get_hostile)
hostile_uuids = hostile.index[hostile]
print(f"\tHOSTILE tracks: {len(hostile_uuids)}")

Total updates (rows): 42614
Total tracks: 142
	UNKNOWN_THREAT tracks: 133
	HOSTILE tracks: 9


In [67]:
# create a new dataframe from the hostile UUIDs
hostile_df = df[df["UUID"].isin(hostile_uuids)]

# drop the combat id field to avoid future confusion
hostile_df.drop(columns=["Combat ID"], inplace=True)

# group the hostile dataframe on UUID
hostile_grouped_df = hostile_df.groupby("UUID")

# find out the number of updates for each of the hostile tracks
print(f"Number of updates for each HOSTILE track:\n{hostile_grouped_df.size()}")

# total number of hostile updates
print(f"\nTotal HOSTILE updates: {len(hostile_df)}")

Number of updates for each HOSTILE track:
UUID
00e7e451-7265-4006-9b07-9db2bfe7da95     530
22e1ccd9-fb65-4175-9cc4-a9a698bcc52c    2753
7b8d985f-f7b7-45c8-8cef-15f6f2cda72e    2081
8f853525-d2e7-4e5f-9975-4441b99de4f8     555
9d5dbb74-c7a3-42c3-b333-a0a57981c8b1     578
a72c56b8-1410-45a2-a58d-ec9c9572066b     668
b116b7fa-19de-4a43-acb0-b850eb73d9ce    1117
bac2d555-0a56-4fca-a2a9-641d077e3dfe    1139
d28262b2-c446-4ddb-8bdb-24b365cc4ced    1982
dtype: int64

Total HOSTILE updates: 11403


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hostile_df.drop(columns=["Combat ID"], inplace=True)


In [70]:
# write the hostile dataframe back to a file
hostile_df.to_csv(WRITE_PATH, index=False)