In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("../data/clean/opensky_asia_clean.csv")
df.head()

Unnamed: 0,timestamp,icao24,callsign,latitude,longitude,baro_altitude,velocity,vertical_rate,true_track,on_ground
0,2026-02-13 16:28:11,80162c,AXB321,22.4359,49.5145,10363.2,217.83,0.0,280.75,False
1,2026-02-13 16:28:11,801640,AIC2303,16.4021,96.0041,10965.18,233.14,0.33,303.94,False
2,2026-02-13 16:28:11,801645,AIC1863,28.5596,77.089,,0.0,,278.44,True
3,2026-02-13 16:28:11,80162f,AIC212,28.6025,81.5784,10363.2,175.82,0.0,286.31,False
4,2026-02-13 16:28:11,80163c,IGO243H,25.545,76.8816,11894.82,246.1,0.0,177.72,False


### Feature introduction:
> 1. **timestamp**: timestamp at which data was recorded.
> 2. **icao24**: Unique ICAO 24-bit address of the transponder in hex string representation.
> 3. **callsign**: Callsign of the vehicle (8 chars). Can be null if no callsign has been received.
> 4. **latitude**: WGS-84 latitude in decimal degrees. Can be null.
> 5. **longitude**: WGS-84 longitude in decimal degrees. Can be null.
> 6. **baro_altitude**: Barometric altitude in meters. Can be null.
> 7. **velocity**: Velocity over ground in m/s. Can be null.
> 8. **vertical_rate**: Vertical rate in m/s. A positive value indicates that the airplane is climbing, a negative value indicates that it descends. Can be null.
> 9. **true_track**: True track in decimal degrees clockwise from north (north=0°). Can be null.
> 10. **on_ground**: Boolean value which indicates if the position was retrieved from a surface position report.

In [3]:
df["timestamp"].nunique()

1088

In [5]:
df = df.sort_values(by=["icao24", "timestamp"])

In [9]:
df.head()

Unnamed: 0,timestamp,icao24,callsign,latitude,longitude,baro_altitude,velocity,vertical_rate,true_track,on_ground
153377,2026-02-13 16:54:43,0100a2,MSR5060,30.8237,30.5251,9052.56,213.33,7.15,322.05,False
154423,2026-02-13 16:55:00,0100a2,MSR5060,30.8654,30.4872,9197.34,212.92,7.15,321.97,False
155466,2026-02-13 16:55:05,0100a2,MSR5060,30.8687,30.4843,9212.58,212.92,7.15,321.97,False
156509,2026-02-13 16:55:05,0100a2,MSR5060,30.8687,30.4843,9212.58,212.92,7.15,321.97,False
157548,2026-02-13 16:55:34,0100a2,MSR5060,30.9144,30.4426,9273.54,219.83,0.0,321.84,False


## Sin and Cos

Here, true track is circular, which means 0 degrees = 360 degrees but our model wouldn't understand that unless explicitly stated. So, it makes sense to convert it into sin and cos.

In [10]:
"""
convert degrees into radian first.
"""
theta = np.deg2rad(df["true_track"])
df["track_sin"] = np.sin(theta)
df["track_cos"] = np.cos(theta)

## Change in Time (Delta)

Next, comes the change in time (delta) for a specific aircraft. Showing us how much time has passed between two points of the same aircraft.

In [15]:
df["timestamp"] = pd.to_datetime(df["timestamp"])

In [16]:
"""
change in time (delta)
"""
df["delta_time"] = (
    df.groupby("icao24")["timestamp"]
        .diff()
        .dt.total_seconds()
)

## Distance Travelled

The haversine formula calculates the shortest distance ("as-the-crow-flies") between two points on a sphere (like Earth) using their latitude and longitude, with less than 1% error

D = 2 * μ_E * sin^-1( sqrt( sin^2((lat2-lat1)/2) + cos(lat1)cos(lat2)sin^2((lon2-lon1)/2) ) )
where:

    D = Great Circle Arc Length (distance)
    lat 1 = latitude of first point
    lon 1 = longitude of first point
    lat 2 = latitude of second point
    lon 2 = longitude of second point
    μE = Mean Radius of the Earth (6371.009 km)


In [17]:
def haversine(lat1, lon1, lat2, lon2):
    """calculates shortest distance between two points using haversine formula.

    Args:
        lat1 (float): latitude of first point
        lon1 (float): longitude of first point
        lat2 (float): latitude of second point
        lon2 (float): longitude of second point
    """
    R = 6371000     # radius of Earth
    
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])      # degree to radian
    dlat = lat2 - lat1      
    dlon = lon2 - lon1
    
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    
    return R * c

In [None]:
df["prev_lat"] = df.groupby("icao24")["latitude"].shift(1)
df["prev_lon"] = df.groupby("icao24")["longitude"].shift(1)
df["distance_delta"] = haversine(
    df["prev_lat"],
    df["prev_lon"],
    df["latitude"],
    df["longitude"]
)

## Acceleration (m/s²)

Calculate acceleration.

In [23]:
df["prev_velocity"] = df.groupby("icao24")["velocity"].shift(1)
df["acceleration"] = (
    (df["velocity"] - df["prev_velocity"]) / df["delta_time"]
)

## Turn rate (degrees per second)