<a href="https://colab.research.google.com/github/olanrewajufarooq/MIROceanographyAnalysis/blob/main/Oceanography_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Analysis of the Oceanographic Data**

This notebook is used to analyse the data obtained from the Drifters and the CDT Casting on the Day 2 of the 2022/2023 Sea Trip. The information obtained is also compared with information from Day 1 and Day 3, likewise, the information from intake 1 (i.e. 2021/2022).

### Importing Necessary Modules for the Notebook

In [None]:
# Python-based Libraries
import os
from datetime import timedelta, time
from math import sin, cos, sqrt, atan2, radians
import datetime as dt

# Data Analysis Libraries
import numpy as np
import pandas as pd

# Graph plotting libraries
import matplotlib.pyplot as plt
from matplotlib import cm
try:
    import folium
except ModuleNotFoundError: # If folium library is not installed
    print("...Folium Library not found")
    !pip3 install folium
    print("...Folium Library installed")
    import folium
finally:
    print("Folium Library Imported")
    
# Importing the Module for CTD Analysis
try:
    import ctd
except ModuleNotFoundError: # If ctd library is not installed
    print("...CTD Library not found")
    #!pip install -U lazy_loader==0.1
    !conda install -c conda-forge ctd
    print("...CTD Library installed")
    import ctd
finally:
    print("CTD library Imported")

Folium Library Imported
...CTD Library not found


### Connecting to Path

In [None]:
# Defining the path of the Group folder on Google Drive
path = "./"

# Check if the files are accessible
os.listdir(path)

## Import Drifter Data



In [None]:
# Initializing a Dictionary datatype that stores data for each Drifter
data = {}

# Iterate through all file
for file in os.listdir(f"{path}/DriftersData"):

    # Data from the White Drifters are stored in ".csv" formats using "UTF-16 LE" Encoding
    if file.endswith(".csv"):
        
        data_key = file.split(".")[0][-4:] # Obtaining the name of the Drifter
        data_value = pd.read_csv(f'{path}/DriftersData/{file}', encoding="UTF-16 LE") #The encoding is very important.
        data[data_key] = data_value # Storing the data in the "data" dictionary
    
    # Data from the Yellow Drifters are stored in ".txt" formats using "UTF-8" Encoding
    elif file.endswith(".txt"):
        
        data_key = file.split(".")[0][-3:] # Obtaining the name of the Drifter
        data_value = pd.read_csv(f'{path}/DriftersData/{file}', encoding="UTF-8") #The encoding is very important.
        data[data_key] = data_value # Storing the data in the "data" dictionary

# Show the names of all drifters for which the data has been read
print(list(data.keys()))

### Import the Logsheet

In [None]:
log_df = pd.read_excel(f"{path}/logsheet.xlsx")

# Convert the Deployment Time and the Time of Recovery to datetime objects in Pandas
log_df['Deployment Time'] = pd.to_datetime(log_df['Deployment Time'],format= '%H:%M:%S' ).dt.time
log_df['Time of Recovery'] = pd.to_datetime(log_df['Time of Recovery'],format= '%H:%M:%S' ).dt.time

log_df.head(3)

## **Data Cleaning**



### Converting the time from UTC to Paris Time

In [None]:
# Checking the data from the yellow drifters
data['273'].head(3)

In [None]:
# Converting the UTC time to Paris Time [For Yellow Drifters]

def DataClean_YDrifters(data_df):
    data_df['Position time (UTC)'] = pd.to_datetime(data_df['Position time (UTC)'])
    data_df['Reception time (UTC)'] = pd.to_datetime(data_df['Reception time (UTC)'])

    data_df['Position time'] = data_df['Position time (UTC)'] + timedelta(hours = 2)
    data_df['Reception time'] = data_df['Reception time (UTC)'] + timedelta(hours = 2)

    data_df = data_df.drop(labels=['Position time (UTC)', 'Reception time (UTC)', 'Course (°)', 'Reception time',
                                        'Status', 'Battery (V)'], axis=1)

    data_df = data_df.rename(columns={"Latitude (°)":"Latitude", "Longitude (°)":"Longitude", 
                      "Speed (m/s)":"Speed", "Temperature (°C)":"Temperature"}, inplace=False)
    
    data_df["Position time"] = data_df["Position time"].dt.time
    
    return data_df

In [None]:
# Checking the data from others
data['0119'].head(3)

In [None]:
data['0119'].dtypes

In [None]:
# Converting the UTC time to Paris Time [For Other Drifters]

def DataClean_ODrifters(data_df):

    data_df['DeviceDateTime'] = pd.to_datetime(data_df['DeviceDateTime'])
    data_df['DeviceDateTime'] = data_df['DeviceDateTime'] + timedelta(hours = 2)

    data_df["Position time"] = pd.to_datetime(data_df["DeviceDateTime"]).dt.time
    
    data_df = data_df.drop(labels=['DeviceName', 'BatteryStatus', 'CommId', "DeviceDateTime"], axis=1)
    
    # Reversing the time to match Y drifter
    data_df_re = data_df[::-1].reset_index(drop = True) 
    
    return data_df_re

In [None]:
# Clean all data

for key in data.keys():
    if len(key) == 3:
        data[key] = DataClean_YDrifters(data[key])
    elif len(key) == 4:
        data[key] = DataClean_ODrifters(data[key])

In [None]:
data['277'].head(3)

In [None]:
data["0119"].head(3)

### Extracting Data from Deployment Time to Recovery Time

In [None]:
# Extract Data within the Deployment and Recovery time

def extractData(key, data, log_df):

    deploy_time = log_df["Deployment Time"][log_df["Name"] == float(key)]
    recov_time = log_df["Time of Recovery"][log_df["Name"] == float(key)]

    deploy_bool = np.array([(data[key]["Position time"][i] > deploy_time).to_numpy() for i in range(len(data[key]))]).flatten()
    recov_bool = np.array([(data[key]["Position time"][i] < recov_time).to_numpy() for i in range(len(data[key]))]).flatten()

    cleaning_bool = deploy_bool & recov_bool
    data[key] = data[key][cleaning_bool]
    data[key].reset_index(inplace = True)
    
    return data

In [None]:
for key in data.keys():
    data = extractData(key, data, log_df)
    
# All data has been cleaned and extracted at this point

In [None]:
data["0119"].tail(3)

In [None]:
data["277"].head(3)

## **Analysis**

### Computing the Trajectory Velocity

By: Chin

In [None]:
# approximate radius of earth in km
R = 6373.0


# create vectorize function to compute velocity
# #input time values in HH:MM:SS, time2 must be greater than time1 obviously
def calVelocity(data):
    # select columns for latitude, longtitude and time
    df_lat = np.array(data["Latitude"])
    df_long = np.array(data["Longitude"])
    df_time = data["Position time"]
    df_time = pd.to_datetime(df_time.astype(str), format='%H:%M:%S')
    
    # compute delta time
    d_time = np.array(df_time[1:]) - df_time[:-1]
    d_time = d_time.apply(lambda dt_i : dt_i.seconds/60.0)
    
    # compute distance from lat,long
    rad_lat = np.radians(df_lat)
    rad_long = np.radians(df_long)
    d_lat = rad_lat[1:] - np.array(rad_lat[:-1])
    d_long = rad_long[1:] - np.array(rad_long[:-1]) 
    
    a = np.sin(d_lat / 2.0)**2 + np.cos(rad_lat[:-1]) * np.cos(rad_lat[1:]) * np.sin(d_long / 2)**2
    c = 2.0 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    
    distance = R * c
    velocity = distance*1000.0/60.0/np.array(d_time, dtype=float)
    return pd.Series(np.append(0.0, velocity))

In [None]:
for key in data.keys():
    data[key]["velocity"] = calVelocity(data[key])
    data[key].drop("index", inplace = True, axis = 1) # Dropping unncessary index
    data [key] = data[key].iloc[1:] # Removing the first row (where v = 0)

In [None]:
data ['8436'].head(3)

### Exporting the Cleaned Data (with Velocities)

In [None]:
for key in data.keys():
    data[key].to_csv(f'{path}/CleanedDriftersData/{key}.csv')

# All the cleaned data has been exported to a folder for any necessary external use

### Visualizing the Trajectories and Velocities on Graph
By: Farooq and Maria

In [None]:
# rgb tuple to hexadecimal conversion
def rgb2hex(rgb):
    rgb = [hex(int(256*x)) for x in rgb]
    r, g, b = [str(x)[2:] for x in rgb]
    return f"#{r}{g}{b}"

In [None]:
# Determining the minimum and maximum velocities in the entire data

min_vel = 50
max_vel = 0

for key in data.keys():
    min_value = np.min(data[key]["velocity"])
    max_value = np.max(data[key]["velocity"])

    min_vel = min_value if min_value < min_vel else min_vel
    max_vel = max_value if max_value > max_vel else max_vel


min_vel, max_vel

Plotting the Trajectories and Velocities for Each Drifter

In [None]:
min_lat = 100
max_lat = 0
min_lon = 100
max_lon = 0

for key in data.keys():
    min_lat_val = np.min(data[key]["Latitude"])
    min_lon_val = np.min(data[key]["Longitude"])
    max_lat_val = np.max(data[key]["Latitude"])
    max_lon_val = np.max(data[key]["Longitude"])

    min_lat = min_lat_val if min_lat_val < min_lat else min_lat
    max_lat = max_lat_val if max_lat_val > max_lat else max_lat
    min_lon = min_lon_val if min_lon_val < min_lon else min_lon
    max_lon = max_lon_val if max_lon_val > max_lon else max_lon

lat_centre = (min_lat + max_lat)/2
lon_centre= (min_lon + max_lon)/2

lat_centre, lon_centre

In [None]:
# Initializing the Map
map = folium.Map(location = [lat_centre, lon_centre], zoom_start = 14.50,
                min_zoom=14, max_zoom=16)

In [None]:
def plot_vel_traj(map, df):
    folium.Marker([ data[key]["Latitude"][1], data[key]["Longitude"][1] ],
              #popup = key,
              icon=folium.DivIcon(html=f"""<div style="font-family: courier new; color: blue">{f"{key}"}</div>""")
              ).add_to(map)
 
    folium.Marker([ data[key]["Latitude"].iloc[-1], data[key]["Longitude"].iloc[-1] ],
              #popup = folium.Popup(f'{key}', parse_html=True),
              icon=folium.DivIcon(html=f"""<div style="font-family: courier new; color: blue">{f"{key} end"}</div>""")
              ).add_to(map)

    color_mapper = cm.ScalarMappable(cmap=cm.OrRd)
    rgb_values = [c[:3] for c in color_mapper.to_rgba(data[key]["velocity"])] # keep rgb and drop the "a" column
    colors = [rgb2hex(rgb) for rgb in rgb_values]

    start = False

    for lat, lon, vel, col in zip(data[key]["Latitude"], data[key]["Longitude"], data[key]["velocity"], colors):
        if start:
            folium.PolyLine([[prev_lat, prev_long], [lat, lon]], color='white', weight=2
                            ).add_to(map)
        else:
            start = True

        prev_lat = lat
        prev_long = lon

        folium.CircleMarker(location=(lat,lon),radius=4, fill=True, fill_color=col, 
                            fill_opacity=0.7, stroke=False).add_to(map)

    return map

In [None]:
for key in data.keys():
    map = plot_vel_traj(map, data[key])

map.save(f"{path}/Graphs/TrajAndVelocity.png")
map

### Analysis of Drifter Types

### Analysis of Circulation

### Analysis of the Daily Variability

Collection and Cleaning of Day 1 Data

Collecting and Cleaning of Day 3 Data

## **CTD Data Analysis**

### Import and Clean the CTD Data
By Haleem and Aduragbemi

### Plot the T-S Profile

### Compare the Outputs with 2021 Data