# **Analysis of the Oceanographic Data (Day 3)**

This notebook is used to analyse the data obtained from the Drifters and the CDT Casting on the Day 3 of the 2022/2023 Sea Trip. The information obtained is also compared with information from Day 1 and Day 3, likewise, the information from intake 1 (i.e. 2021/2022).

### Importing Necessary Modules for the Notebook

In [1]:
# Python-based Libraries
import os
from datetime import timedelta, time
from math import sin, cos, sqrt, atan2, radians
import datetime as dt
import io
from PIL import Image

# Data Analysis Libraries
import numpy as np
import pandas as pd

# Graph plotting libraries
import matplotlib.pyplot as plt
from matplotlib import cm
import folium

# Installations
# !apt update
# !apt install chromium-chromedriver
# !pip install selenium

### Connecting to Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
# Defining the path of the Group folder on Google Drive
path = "./drive/MyDrive/Colab Notebooks/OceanographyAnalysis"
# path = "./drive/SharedMyDrive/OceanographyAnalysis/"

# Check if the files are accessible
os.listdir(path)

['Other Days Data',
 'logsheet.xlsx',
 'Cleaned Drifters Data',
 'CTD Data',
 'Drifters Data',
 '.ipynb_checkpoints',
 'Last Year Data',
 'Graphs']

## Import Drifter Data



In [8]:
# Initializing a Dictionary datatype that stores data for each Drifter
data = {}

# Iterate through all file
for file in os.listdir(f"{path}/Other Days Data/Day 3 Data/drifters/"):

    # Data from the White Drifters are stored in ".csv" formats using "UTF-16 LE" Encoding
    if file.endswith(".csv"):
        data_key = file.split(".")[0][-4:] # Obtaining the name of the Drifter
        data_value = pd.read_csv(f'{path}/Other Days Data/Day 3 Data/drifters/{file}', encoding='UTF-16LE') #The encoding is very important.
        # if len(data_key) == 3:
        #     data_value = pd.read_csv(f'{path}/Other Days Data/Day 1 Data/drifters/{file}', encoding="UTF-16BE") #The encoding is very important.
        # else:
        #     data_value = pd.read_csv(f'{path}/Other Days Data/Day 1 Data/drifters/{file}') #The encoding is very important.
        data[data_key] = data_value # Storing the data in the "data" dictionary
    
    # Data from the Yellow Drifters are stored in ".txt" formats using "UTF-8" Encoding
    elif file.endswith(".txt"):
        
        data_key = file.split(".")[0][-3:] # Obtaining the name of the Drifter
        data_value = pd.read_csv(f'{path}/Other Days Data/Day 3 Data/drifters/{file}', encoding="UTF-8") #The encoding is very important.
        data[data_key] = data_value # Storing the data in the "data" dictionary

# Show the names of all drifters for which the data has been read
print(list(data.keys()))

['279', '6439', '7230', '3368', '2052', '273', '0119', '274', '8436', '9666']


### Import the Logsheet

In [21]:
log_df.shape

(8, 8)

In [22]:
log_df = pd.read_excel(f"{path}/Other Days Data/Day 3 Log Sheet (MIR Sea trip 13-10-2022).xlsx")
log_df = log_df[2:-1]
# Convert the Deployment Time and the Time of Recovery to datetime objects in Pandas
log_df['Deployment Time'] = pd.to_datetime(log_df['Deployment Time'],format= '%H:%M:%S' ).dt.time
log_df['Time of Recovery'] = pd.to_datetime(log_df['Time of Recovery'],format= '%H:%M:%S' ).dt.time

log_df.head()

Unnamed: 0,Station,Type,Deployment Time,Lon,Lat,Time of Recovery,Lon.1,Lat.1
2,2052.0,White w/o,09:54:00,005º59.972,"43º4,813",14:45:00,006º0.041,43º5.421
3,3368.0,White w/o,09:54:00,005º59.972,"43º4,813",14:39:00,005º59.593,43º5.133
4,274.0,Yellow,09:54:00,005º59.972,"43º4,813",02:41:00,005º59.716,43º5.224
5,8436.0,White w/o,10:03:00,005º58.987,43º4.686,13:52:00,005º58.677,43º4.495
6,6439.0,White,10:03:00,005º58.987,43º4.686,13:55:00,005º58.499,43º4.402


## **Data Cleaning**



### Converting the time from UTC to Paris Time

In [23]:
# Checking the data from the yellow drifters
data['273'].head(3)

Unnamed: 0,Position time (UTC),Reception time (UTC),Latitude (°),Longitude (°),Speed (m/s),Course (°),Status,Battery (V),Temperature (°C)
0,2022-10-13 07:10:00,2022-10-13 07:10:43,43.10469,5.88795,2.23889,90.1,1,4.086,15.2
1,2022-10-13 07:20:00,2022-10-13 07:20:28,43.10425,5.91188,4.17222,126.9,1,4.086,15.5
2,2022-10-13 07:30:00,2022-10-13 07:30:31,43.08842,5.93102,4.8,80.4,1,4.086,15.8


In [24]:
# Converting the UTC time to Paris Time [For Yellow Drifters]

def DataClean_YDrifters(data_df):
    data_df['Position time (UTC)'] = pd.to_datetime(data_df['Position time (UTC)'])
    data_df['Reception time (UTC)'] = pd.to_datetime(data_df['Reception time (UTC)'])

    data_df['Position time'] = data_df['Position time (UTC)'] + timedelta(hours = 2)
    data_df['Reception time'] = data_df['Reception time (UTC)'] + timedelta(hours = 2)

    data_df = data_df.drop(labels=['Position time (UTC)', 'Reception time (UTC)', 'Course (°)', 'Reception time',
                                        'Status', 'Battery (V)'], axis=1)

    data_df = data_df.rename(columns={"Latitude (°)":"Latitude", "Longitude (°)":"Longitude", 
                      "Speed (m/s)":"Speed", "Temperature (°C)":"Temperature"}, inplace=False)
    
    data_df["Position time"] = data_df["Position time"].dt.time
    
    return data_df

DataClean_YDrifters(data["273"]).head()

Unnamed: 0,Latitude,Longitude,Speed,Temperature,Position time
0,43.10469,5.88795,2.23889,15.2,09:10:00
1,43.10425,5.91188,4.17222,15.5,09:20:00
2,43.08842,5.93102,4.8,15.8,09:30:00
3,43.0828,5.96728,5.09167,16.1,09:40:00
4,43.0801,6.00169,1.99722,16.4,09:50:00


In [25]:
# Checking the data from others
data['0119'].head(3)

Unnamed: 0,DeviceName,DeviceDateTime,BatteryStatus,CommId,Latitude,Longitude
0,0-4410119,2022-10-13 12:30:43,GOOD,0-4410119,43.08086,5.97023
1,0-4410119,2022-10-13 12:25:41,GOOD,0-4410119,43.08093,5.96963
2,0-4410119,2022-10-13 12:20:41,GOOD,0-4410119,43.08086,5.9691


In [26]:
data['0119'].dtypes

DeviceName         object
DeviceDateTime     object
BatteryStatus      object
CommId             object
Latitude          float64
Longitude         float64
dtype: object

In [27]:
# Converting the UTC time to Paris Time [For Other Drifters]

def DataClean_ODrifters(data_df):

    data_df['DeviceDateTime'] = pd.to_datetime(data_df['DeviceDateTime'])
    data_df['DeviceDateTime'] = data_df['DeviceDateTime'] + timedelta(hours = 2)

    data_df["Position time"] = pd.to_datetime(data_df["DeviceDateTime"]).dt.time
    
    data_df = data_df.drop(labels=['DeviceName', 'BatteryStatus', 'CommId', "DeviceDateTime"], axis=1)
    
    # Reversing the time to match Y drifter
    data_df_re = data_df[::-1].reset_index(drop = True) 
    
    return data_df_re

In [28]:
# Clean all data

for key in data.keys():
    if len(key) == 3:
        data[key] = DataClean_YDrifters(data[key])
    elif len(key) == 4:
        data[key] = DataClean_ODrifters(data[key])

In [30]:
data['279'].head()

Unnamed: 0,Latitude,Longitude,Speed,Temperature,Position time
0,43.08836,5.93093,4.87222,16.2,09:30:00
1,43.08274,5.96728,5.01667,16.5,09:40:01
2,43.08017,6.00168,2.51667,16.9,09:49:59
3,43.07747,5.98267,0.0,20.6,10:10:01
4,43.07712,5.98199,0.0,20.7,10:20:00


In [31]:
data["0119"].head()

Unnamed: 0,Latitude,Longitude,Position time
0,43.10681,5.90103,09:16:29
1,43.10238,5.91457,09:21:29
2,43.09088,5.91985,09:26:30
3,43.08821,5.93506,09:31:30
4,43.08446,5.95272,09:36:27


### Extracting Data from Deployment Time to Recovery Time

In [37]:
# Extract Data within the Deployment and Recovery time

def extractData(key, data, log_df):

    deploy_time = log_df["Deployment Time"][log_df["Station"] == float(key)]
    recov_time = log_df["Time of Recovery"][log_df["Station"] == float(key)]

    deploy_bool = np.array([(data[key]["Position time"][i] > deploy_time).to_numpy() for i in range(len(data[key]))]).flatten()
    recov_bool = np.array([(data[key]["Position time"][i] < recov_time).to_numpy() for i in range(len(data[key]))]).flatten()

    cleaning_bool = deploy_bool & recov_bool
    data[key] = data[key][cleaning_bool]
    data[key].reset_index(inplace = True)
    
    return data

In [34]:
log_df["Deployment Time"][log_df["Station"] == float("0119")]

8    10:21:00
Name: Deployment Time, dtype: object

In [35]:
log_df["Time of Recovery"][log_df["Station"] == float("0119")]

8    14:30:00
Name: Time of Recovery, dtype: object

In [43]:
data.keys()

dict_keys(['279', '6439', '7230', '3368', '2052', '273', '0119', '274', '8436', '9666'])

In [44]:
for key in data.keys():
    data = extractData(key, data, log_df)
    
# All data has been cleaned and extracted at this point

ValueError: ignored

In [45]:
data["0119"].tail()

Unnamed: 0,index,Latitude,Longitude,Position time
44,56,43.0804,5.96759,14:05:41
45,57,43.0806,5.96809,14:10:40
46,58,43.08075,5.9686,14:16:06
47,59,43.08086,5.9691,14:20:41
48,60,43.08093,5.96963,14:25:41


In [47]:
data["279"].head()

Unnamed: 0,level_0,index,Latitude,Longitude,Speed,Temperature,Position time
0,0,3,43.07747,5.98267,0.0,20.6,10:10:01
1,1,4,43.07712,5.98199,0.0,20.7,10:20:00
2,2,5,43.07675,5.98137,0.0,20.8,10:30:00
3,3,6,43.07641,5.98081,0.0,20.8,10:40:00
4,4,7,43.07597,5.98009,0.0,20.8,10:50:00


## **Analysis**

### Computing the Trajectory Velocity

By: Chin

In [52]:
# approximate radius of earth in km
R = 6373.0


# #input time values in HH:MM:SS, time2 must be greater than time1 obviously
# def calVelocity(lat1, lon1, lat2, lon2, time1, time2):
#     start=time1
#     end=time2
#     start_dt = dt.datetime.strptime(start, '%H:%M:%S')
#     end_dt = dt.datetime.strptime(end, '%H:%M:%S')
#     diff = (end_dt - start_dt) 
#     timediff = diff.seconds/60 
#     print(type(start_dt))

#     lat1 = radians(lat1)
#     lon1 = radians(lon1)
#     lat2 = radians(lat2)
#     lon2 = radians(lon2)

#     dlon = lon2 - lon1
#     dlat = lat2 - lat1

#     a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
#     c = 2 * atan2(sqrt(a), sqrt(1 - a))

#     distance = R * c
#     velocity = distance*1000/60/timediff

#     # print("Result:", distance)
#     # print(timediff)
#     return velocity
    
# calVelocity(43.08366,5.95759,43.08342,5.95843, "11:11:00", "11:14:00")

# create vectorize function to compute velocity
def calVelocity(data):
    # select columns for latitude, longtitude and time
    df_lat = np.array(data["Latitude"])
    df_long = np.array(data["Longitude"])
    df_time = data["Position time"]
    df_time = pd.to_datetime(df_time.astype(str), format='%H:%M:%S')
    
    # compute delta time
    d_time = np.array(df_time[1:]) - df_time[:-1]
    d_time = d_time.apply(lambda dt_i : dt_i.seconds/60.0)
    
    # compute distance from lat,long
    rad_lat = np.radians(df_lat)
    rad_long = np.radians(df_long)
    d_lat = rad_lat[1:] - np.array(rad_lat[:-1])
    d_long = rad_long[1:] - np.array(rad_long[:-1]) 
    
    a = np.sin(d_lat / 2.0)**2 + np.cos(rad_lat[:-1]) * np.cos(rad_lat[1:]) * np.sin(d_long / 2)**2
    c = 2.0 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    
    distance = R * c
    velocity = distance*1000.0/60.0/np.array(d_time, dtype=float)
    return pd.Series(np.append(0.0, velocity))

In [53]:
for key in data.keys():
    data[key]["velocity"] = calVelocity(data[key])
    data[key].drop("index", inplace = True, axis = 1) # Dropping unncessary index
    data [key] = data[key].iloc[1:] # Removing the first row (where v = 0)

KeyError: ignored

In [None]:
data ['8436'].head()

Unnamed: 0,Latitude,Longitude,Position time,velocity
1,43.07685,5.97244,09:56:42,0.144393
2,43.07668,5.97191,10:01:41,0.157289
3,43.0768,5.97094,10:07:08,0.244439
4,43.07672,5.97034,10:11:43,0.180194
5,43.07665,5.96978,10:16:42,0.15438


### Exporting the Cleaned Data (with Velocities)

In [None]:
for key in data.keys():
    data[key].to_csv(f'{path}/Cleaned Drifters Data/{key}.csv')

# All the cleaned data has been exported to a folder for any necessary external use

### Visualizing the Trajectories and Velocities on Graph
By: Farooq and Maria

In [None]:
# Simple function to convert RGB to HEX color format
def rgb_to_hex(r, g, b):
  return ('#{:x}{:x}{:x}'). format(r, g, b)

rgb_to_hex(220, 220, 220)

'#dcdcdc'

In [None]:
# rgb tuple to hexadecimal conversion
def rgb2hex(rgb):
    rgb = [hex(int(256*x)) for x in rgb]
    r, g, b = [str(x)[2:] for x in rgb]
    return f"#{r}{g}{b}"

In [None]:
# Determining the minimum and maximum velocities in the entire data

min_vel = 50
max_vel = 0

for key in data.keys():
    min_value = np.min(data[key]["velocity"])
    max_value = np.max(data[key]["velocity"])

    min_vel = min_value if min_value < min_vel else min_vel
    max_vel = max_value if max_value > max_vel else max_vel


min_vel, max_vel

(0.025626215489519835, 0.3405533501074622)

Plotting the Trajectories and Velocities for Each Drifter

In [None]:
min_lat = 100
max_lat = 0
min_lon = 100
max_lon = 0

for key in data.keys():
    min_lat_val = np.min(data[key]["Latitude"])
    min_lon_val = np.min(data[key]["Longitude"])
    max_lat_val = np.max(data[key]["Latitude"])
    max_lon_val = np.max(data[key]["Longitude"])

    min_lat = min_lat_val if min_lat_val < min_lat else min_lat
    max_lat = max_lat_val if max_lat_val > max_lat else max_lat
    min_lon = min_lon_val if min_lon_val < min_lon else min_lon
    max_lon = max_lon_val if max_lon_val > max_lon else max_lon

lat_centre = (min_lat + max_lat)/2
lon_centre= (min_lon + max_lon)/2

lat_centre, lon_centre

(43.082495, 5.97621)

In [None]:
# Initializing the Map
map = folium.Map(location = [lat_centre, lon_centre], zoom_start = 15.45)

In [None]:
def plot_vel_traj(map, df):
    folium.Marker([ data[key]["Latitude"][1], data[key]["Longitude"][1] ],
              #popup = key,
              icon=folium.DivIcon(html=f"""<div style="font-family: courier new; color: blue">{f"{key}"}</div>""")
              ).add_to(map)
 
    folium.Marker([ data[key]["Latitude"].iloc[-1], data[key]["Longitude"].iloc[-1] ],
              #popup = folium.Popup(f'{key}', parse_html=True),
              icon=folium.DivIcon(html=f"""<div style="font-family: courier new; color: blue">{f"{key} end"}</div>""")
              ).add_to(map)

    color_mapper = cm.ScalarMappable(cmap=cm.OrRd)
    rgb_values = [c[:3] for c in color_mapper.to_rgba(data[key]["velocity"])] # keep rgb and drop the "a" column
    colors = [rgb2hex(rgb) for rgb in rgb_values]

    start = False

    for lat, lon, vel, col in zip(data[key]["Latitude"], data[key]["Longitude"], data[key]["velocity"], colors):
        if start:
            folium.PolyLine([[prev_lat, prev_long], [lat, lon]], color='white', weight=2
                            ).add_to(map)
        else:
            start = True

        prev_lat = lat
        prev_long = lon

        folium.CircleMarker(location=(lat,lon),radius=4, fill=True, fill_color=col, 
                            fill_opacity=0.7, stroke=False).add_to(map)

    return map

In [None]:
for key in data.keys():
    map = plot_vel_traj(map, data[key])

img_data = map._to_png(5)
img = Image.open(io.BytesIO(img_data))
img.save(f"{path}/Graphs/TrajAndVelocity.png")

map

WebDriverException: ignored

In [None]:
key = '0119'

map = folium.Map(location = [ np.average(data[key]["Latitude"]), np.average(data[key]["Longitude"]) ],
                                        zoom_start = 16)

folium.Marker([ data[key]["Latitude"][1], data[key]["Longitude"][1] ],
              popup = key).add_to(map)
 
folium.Marker([ data[key]["Latitude"].iloc[-1], data[key]["Longitude"].iloc[-1] ],
              popup = folium.Popup(f'{key}', parse_html=True)).add_to(map)

color_mapper = cm.ScalarMappable(cmap=cm.OrRd)
rgb_values = [c[:3] for c in color_mapper.to_rgba(data[key]["velocity"])] # keep rgb and drop the "a" column
colors = [rgb2hex(rgb) for rgb in rgb_values]

for lat, lon, vel, col in zip(data[key]["Latitude"], data[key]["Longitude"], data[key]["velocity"], colors):
    # folium.CircleMarker(location=(lat,lon),radius=4, fill_color=rgb_to_hex(int(col), int(col), int(col)), 
    #                     stroke=False, opcaity = 1).add_to(map)

    folium.CircleMarker(location=(lat,lon),radius=4, fill=True, fill_color=col, 
                        fill_opacity=0.7, stroke=False).add_to(map)

#my_map4.save("my_map4.html")

map

In [None]:
colors

1      7.572347
2      4.820343
3     25.537921
4     33.984269
5     34.972327
6     22.925881
7     15.410071
8     31.023367
9     30.327326
10    12.873009
11     0.000000
12     9.258262
13    50.000000
14    40.206609
Name: velocity, dtype: float64

### Analysis of Drifter Types

### Analysis of Circulation

### Analysis of the Daily Variability

## **CTD Data Analysis**

### Import and Clean the CTD Data
By Haleem and Aduragbemi

In [None]:
# Initializing a Dictionary datatype that stores data for each Drifter
ctd_data = {}

# Iterate through all file
for id, fil in enumerate(os.listdir(f"{path}/CTD Data")):
    data_key = str(id + 1) # Obtaining the name of the Drifter
    data_value = pd.read_csv(f'{path}/CTD Data/{fil}', encoding="UTF-8") #The encoding is very important.
    data[data_key] = data_value # Storing the data in the "data" dictionary

# Show the names of all drifters for which the data has been read
print(list(data.keys()))

ParserError: ignored

In [None]:
%matplotlib inline
from seabird.cnv import fCNV


#one_file = f'{path}/CTD Data/ST1_D2.cnv'

profile = fCNV(f'{path}/CTD Data/ST1_D2.cnv')
print(type(profile))


print("Header: %s" % profile.attributes.keys())

print('*' * 20)
print(profile.attributes)
print('*' * 20)
print(profile.keys())




df = profile.as_DataFrame()
df.head()










#with open(one_file, 'r') as f:
#     print(f.read())

<class 'seabird.cnv.fCNV'>
Header: dict_keys(['sbe_model', 'seasave', 'instrument_type', 'nquan', 'nvalues', 'start_time', 'bad_flag', 'file_type', 'md5', 'datetime', 'filename'])
********************
{'sbe_model': '9', 'seasave': 'V 7.26.7.121', 'instrument_type': 'CTD', 'nquan': '7', 'nvalues': '27893', 'start_time': 'Oct 12 2022 09:01:29 [System UTC, header]', 'bad_flag': '-9.990e-29', 'file_type': 'ascii', 'md5': '5a39b689b218610f028d560b98e67b04', 'datetime': datetime.datetime(2022, 10, 12, 9, 1, 29), 'filename': 'ST1_D2.cnv'}
********************
['PRES', 'TEMP', 'PSAL', 'oxygenvoltage', 'oxygen_ml_L', 'sbox0Mm/Kg', 'flag']


KeyError: ignored

ModuleNotFoundError: ignored

### Plot the T-S Profile

### Compare the Outputs with 2021 Data