# Import libraries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

import joblib
import plotly.graph_objects as go

import time
import os

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [326]:
#df_95 = pd.read_csv('/content/df_95_labeled_on_lift.csv')
df_310 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Sample_data/v5_20230408_082538_310m.csv')


In [327]:
 pd.options.mode.copy_on_write = True

# Import Data

In [328]:
def any_name_that_you_want(data):
    # print(mean(data))
    print(data)

In [329]:
def import_data(file_path):
    # Read data from the specified file path
    df_raw=pd.read_csv(file_path)

    # Return the imported DataFrame
    return df_raw

# Cleaning Data

# Feature Engineering

In [330]:
def calculate_alt_change(df):

    # Add new column with calculated Altitude Change
    df['Alt(m)_change'] = df['Alt(m)'].diff().fillna(0)

    # Remove outliers
    # Set the threshold value for filtering
    alt_threshold = 20

    # Filter the DataFrame based on the absolute value of 'Alt(m)_change' column
    df = df[df['Alt(m)_change'].abs() <= alt_threshold]

    return df

# option to add log file later on
# print shape before and after outlier detection and removal

In [331]:
def plot_alt_changes(df):

    # Create a histogram plot
    plt.hist(df['Alt(m)_change'],
             bins=20,
             color='skyblue',
             edgecolor='black')

    #Add labels and title
    plt.xlabel('Altitude (m)')
    plt.ylabel('Frequency')
    plt.title('Altitude Changes')

    #Show the plot
    plt.show()

In [332]:
def calculate_speed_change(df):

    # Add new column with calculated Speed Change
    df['Speed(m/s)_change'] = df['Speed(m/s)'].diff().fillna(0)

    # Remove outliers
    # Set the threshold value for filtering
    threshold_speed = 3

    # Filter the DataFrame based on the absolute value of 'Speed(m/s)_change' column
    df = df[df['Speed(m/s)_change'].abs() <= threshold_speed]

    return df

    # option to add log file later on
    # print shape before and after outlier detection and removal

In [333]:
def plot_speed_changes(df):

    # Create a histogram plot
    plt.hist(df['Speed(m/s)_change'],
             bins=20,
             color='skyblue',
             edgecolor='black')

    #Add labels and title
    plt.xlabel('Speed Change (m/s)')
    plt.ylabel('Frequency')
    plt.title('Speed Changes')

    #Show the plot
    plt.show()

In [334]:
def calculate_course_change(df):

    # Add new column with calculated Course Change
    df['Course_change'] = df['Course'].diff().fillna(0)

    return df

    # option to add log file later on
    # print shape before and after outlier detection and removal

In [368]:
def convert_datetime(df):

    # Convert 'Timestamp' column to datetime format
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='ns')

    return df.fillna(0)

# Prediction

In [336]:
def select_features(df):

    features_to_use = ['accelX(g)', 'accelY(g)', 'accelZ(g)', 'accelUserX(g)', 'accelUserY(g)',
       'accelUserZ(g)', 'gyroX(rad/s)', 'gyroY(rad/s)', 'gyroZ(rad/s)',
       'Roll(rads)', 'Pitch(rads)', 'Yaw(rads)', 'Lat', 'Long', 'Speed(m/s)',
       'HorizontalAccuracy(m)', 'VerticalAccuracy(m)', 'Course', 'calMagX(µT)',
       'calMagY(µT)', 'calMagZ(µT)', 'Alt(m)_change',
       'Speed(m/s)_change', 'Course_change']

    X=df[features_to_use]

    return X

In [337]:
def load_model(file_path_to_model):

    # Load the saved model
    return joblib.load(file_path_to_model)


In [338]:
def show_hyperparameters(model):
    # show hyperparameters
    return model.get_params()


In [339]:
def predict_on_features(model, df, features):

    predictions=model.predict(features)

    df['predicted']=predictions

    return df.reset_index()


# Mapping and Plotting

In [340]:
def plot_prediction(df, plot_title='Predictions'):
    # Check if 'Timestamp' column exists and is in datetime format
    if 'Timestamp' in df.columns and pd.api.types.is_datetime64_any_dtype(df['Timestamp']):
        # Create a scatter plot for Altitude over Time, colored by 'predicted' class
        fig = px.scatter(df, x='Timestamp', y='Alt(m)', color='predicted',
                         labels={'Alt(m)': 'Altitude (m)'},
                         title=plot_title,
                         color_discrete_map={0: 'blue', 1: 'red'})

        fig.update_traces(marker=dict(size=8),
                          selector=dict(mode='markers'))

        # Customize the legend
        fig.update_layout(
            legend_title_text='Status',
            width=1000,
            height=600
        )

        fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

        # Show the plot
        fig.show()
    else:
        print("Warning: DataFrame's 'Timestamp' column is not in datetime format and must be converted first.")

In [341]:
def plot_total_alt_over_time(df, plot_title='Total Tracked Altitude Over Time'):

    # Create a line plot using Plotly
    fig = go.Figure()

    # Add a trace for altitude over time
    fig.add_trace(go.Scatter(x=df['Timestamp'],
                             y=df['Alt(m)'],
                             mode='lines',
                             name='Altitude'))

    # Update layout
    fig.update_layout(title=plot_title,
                      xaxis_title='Timestamp',
                      yaxis_title='Altitude (m)')

    # Show plot
    fig.show()

# Prediction steps in one function

In [342]:
# use this to predict on a csv that already has all data points
def predict_on_data(path_to_csv_file, file_path_to_model):

    # Load the data
    df = pd.read_csv(path_to_csv_file)

    # Preprocess and feature engineering
    df = calculate_alt_change(df)
    df = calculate_speed_change(df)
    df = calculate_course_change(df)
    df = convert_datetime(df)

    # Feature selection
    features = select_features(df)

    # Load the model
    rfc = joblib.load(file_path_to_model)

    # Make predictions
    df = predict_on_features(rfc, df, features)

    # Plot predictions
    plot_prediction(df)

    return df

# Real time prediction function

##### This is the hot stuff to work on :)

In [343]:
def fetch_and_process_data(path_to_csv_file, chunk_size=60, wait_time=2):

    # Load the dataset
    df_with_all_rows = pd.read_csv(path_to_csv_file)

    # Calculate the total number of chunks
    total_chunks = len(df_with_all_rows) // chunk_size

    # Initialize an empty list to store all calculations
    all_calculations = pd.DataFrame()

    for i in range(total_chunks):
        # Fetch a chunk of 60 observations
        start_index = i * chunk_size
        end_index = start_index + chunk_size
        data_chunk = df_with_all_rows.iloc[start_index:end_index]

        if len(data_chunk) > 0:

            # Append the processed chunk to the list of all calculations
            all_calculations = pd.concat([all_calculations, data_chunk], ignore_index=True)

            # Print the first few rows of the processed chunk for demonstration
        #     print(f"Processed chunk {i+1}.")
        #     print(data_chunk.head())

            # funct(all_calculations)

            # Preprocess and feature engineering
            df = calculate_alt_change(all_calculations)
            df = calculate_speed_change(df)
            df = calculate_course_change(df)
            df = convert_datetime(df)

            print(df.shape)

             # Feature selection
            # features = select_features(df)

            # # Make predictions
            # df = predict_on_features(rfc, df, features)


            # print(len(data_chunk))
            # print(len(all_calculations))

        else:
            print(f"No more data to process in chunk {i+1}.")



        # Wait for declared wait_time before processing the next chunk
        time.sleep(wait_time)



# To do before going for one of the above mentioned options

Make sure that specify the paths and other variables needed

In [344]:
# These are the default values used in the function fetch_and_process_data, change if needed
chunk_size=60
wait_time=2

In [345]:
# Specify the file path to pre-trained model
path_to_model = '/content/rf_v_0.4.pkl'
# path_to_model = '/content/rf_v_0.1.pkl'

In [348]:
# Specify the file path to directory with data to predict on
#path_to_csv_file='/content/Andermatt_Gondelbahn_Gutsch-2024-02-04_11-58-32.csv'
#path_to_csv_file='/content/df_310_labeled_on_lift_v4.csv'
#path_to_csv_file='/content/df_290_labeled_on_lift.csv'
#path_to_csv_file='/content/df_95_labeled_on_lift.csv'
#path_to_csv_file='/content/Natschen-2024-02-04_12-01-46.csv'
#path_to_csv_file='/content/df_135_labeled_on_lift.csv'
path_to_csv_file='/content/v5_20230408_082538_310m.csv'

# Option 1: 'Real time' prediction

In [349]:
# Call this function to simulate prediction in 'real time'
fetch_and_process_data(path_to_csv_file,
                       chunk_size=60,
                       wait_time=0.1)

(59, 47)
(119, 47)
(179, 47)
(237, 47)
(297, 47)
(357, 47)
(417, 47)
(477, 47)
(537, 47)
(597, 47)
(657, 47)
(717, 47)
(775, 47)
(835, 47)
(895, 47)
(954, 47)
(1013, 47)
(1070, 47)
(1125, 47)
(1180, 47)
(1240, 47)
(1298, 47)
(1358, 47)
(1418, 47)
(1478, 47)
(1538, 47)
(1598, 47)
(1658, 47)
(1716, 47)
(1776, 47)
(1836, 47)
(1895, 47)
(1955, 47)
(2012, 47)
(2072, 47)
(2132, 47)
(2192, 47)
(2252, 47)
(2312, 47)
(2372, 47)
(2432, 47)
(2492, 47)
(2552, 47)
(2608, 47)
(2663, 47)
(2718, 47)
(2778, 47)
(2836, 47)
(2896, 47)
(2956, 47)
(3016, 47)
(3076, 47)
(3136, 47)
(3195, 47)
(3254, 47)
(3314, 47)


KeyboardInterrupt: 

# Option 2: One step to prediction

In [None]:
# Input an call this one function with your paths
predict_on_data(path_to_csv_file, path_to_model)

# Option 3: Go through everything step by step

In [369]:
df_option3=import_data(path_to_csv_file)

In [370]:
df_option3=calculate_alt_change(df_option3)

In [371]:
df_option3=calculate_speed_change(df_option3)

In [372]:
df_option3=calculate_course_change(df_option3)

In [373]:
df_option3=convert_datetime(df_option3)

In [374]:
features=select_features(df_option3)

In [375]:
rfc=load_model(path_to_model)

In [376]:
df_option3=predict_on_features(rfc, df_option3, features)

In [378]:
plot_prediction(df_option3)

# Misclassification Logic Work

In [None]:
column_to_mask = 'predicted'
chunk_size = 60
threshold = 0.3

# Load input dataset
df_with_all_rows = df_option3

# Calculate the total number of chunks
total_chunks = len(df_with_all_rows) // chunk_size
print('total_chunks = ', total_chunks)

remainder = len(df_with_all_rows) % chunk_size
print('remainder = ', remainder)

# Initialize an empty list to store all calculations
all_calculations = []

# Process each chunk
for i in range(total_chunks):
    print('index = ', i)
    # Fetch a chunk of data
    start_index = i * chunk_size
    end_index = start_index + chunk_size
    data_chunk = df_with_all_rows[column_to_mask].iloc[start_index:end_index]

    # Process the chunk based on the sum of values
    if sum(data_chunk)/len(data_chunk) >= threshold:
        processed_chunk = [1] * len(data_chunk)
        print("start position", start_index)
        print('end position', end_index)

    else:
        processed_chunk = [0] * len(data_chunk)

    # Append the processed chunk to the list of all calculations
    all_calculations = all_calculations + processed_chunk

print(f"Running remainder {i}.")
remainder_data_chunk = df_with_all_rows[column_to_mask].iloc[-remainder:]

if sum(remainder_data_chunk)/len(remainder_data_chunk) >= threshold:
    processed_chunk = [1] * len(remainder_data_chunk)
else:
    processed_chunk = [0] * len(remainder_data_chunk)
all_calculations = all_calculations + processed_chunk


df_option3['mask'] = all_calculations

In [380]:
# function for masking misclassification (logic)
def misclassification_mask(df, column_to_mask, chunk_size, threshold):
    """
    Apply a binary mask to each row in a DataFrame based on the average value of a specified column in chunks.

    Parameters:
    - df: pandas.DataFrame, the DataFrame to process.
    - column_to_mask: str, the name of the column to evaluate.
    - chunk_size: int, the number of rows in each chunk.
    - threshold: float, the threshold for determining the mask value.

    Returns:
    - df: pandas.DataFrame, the original DataFrame with an added 'mask' column.
    """
    # Calculate the total number of chunks
    total_chunks = len(df) // chunk_size

    # Initialize an empty list to store all calculations
    all_calculations = []

    # Process each chunk
    for i in range(total_chunks):
        # Fetch a chunk of data
        start_index = i * chunk_size
        end_index = start_index + chunk_size
        data_chunk = df[column_to_mask].iloc[start_index:end_index]

        # Process the chunk based on the sum of values
        processed_chunk = [1 if sum(data_chunk)/len(data_chunk) >= threshold else 0] * len(data_chunk)
        # Append the processed chunk to the list of all calculations
        all_calculations += processed_chunk

    # Process remainder if any
    remainder = len(df) % chunk_size
    if remainder > 0:
        remainder_data_chunk = df[column_to_mask].iloc[-remainder:]
        processed_chunk = [1 if sum(remainder_data_chunk)/len(remainder_data_chunk) >= threshold else 0] * len(remainder_data_chunk)
        all_calculations += processed_chunk

    # Assign the calculated mask to the DataFrame
    df['mask'] = all_calculations
    return df.reset_index()

# Example usage
# Assuming df_option3 is your DataFrame and already defined.
# df_option3 = pd.DataFrame(...)
# df_with_mask = apply_mask_based_on_threshold(df_option3, 'predicted', 60, 0.3)
# print(df_with_mask)


In [None]:
df_with_mask = misclassification_mask(df_option3, 'predicted', 60, 0.3)
df_with_mask


In [None]:
fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='mask',
                  labels={'Alt(m)': 'Altitude (m)'},
                  title=f'Masked Predictions {path_to_csv_file}',
                  color_discrete_map={0: 'blue', 1: 'red'})

fig.update_traces(marker=dict(size=8),
                  selector=dict(mode='markers'))

# Customize the legend
fig.update_layout(
    legend_title_text='Status',
    width=1000,
    height=600
)

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# Show the plot
fig.show()
print(path_to_csv_file)

# Other methods

In [None]:
column_to_mask = 'predicted'
span = 10

# Load the dataset
df_with_all_rows = df_option3

placeholder = np.zeros(len(df_with_all_rows))

for i in range(len(df_with_all_rows)):
  if sum(df_with_all_rows[column_to_mask][i-span:i]) == 0:
    placeholder[i] = 0
  elif sum(df_with_all_rows[column_to_mask][i-span:i]) > 0:
    if sum(df_with_all_rows[column_to_mask][i:i+span])>0:
      placeholder[i] = 1
    else:
      placeholder[i] = 0

placeholder_filled = placeholder
len(placeholder_filled)

In [None]:
import pandas as pd

df_with_all_rows = df_option3

span = 10

placeholder = pd.Series(index=df_with_all_rows.index)

rolling_sum = df_with_all_rows[column_to_mask].rolling(window=span).sum()

placeholder[(rolling_sum == 0)] = 0
placeholder[(rolling_sum >= 0) & (rolling_sum.shift(-span) > 0)] = 1

placeholder_filled = placeholder.fillna(0)

In [None]:
df_option3['2ndmask'] = placeholder_filled
df_option3['2ndmask'].value_counts()

In [None]:
import numpy as np

column_to_mask = 'predicted'
span = 10

# Load the dataset
df_with_all_rows = df_option3

# Initialize placeholder filled with zeroes
placeholder = np.zeros(len(df_with_all_rows))

for i in range(len(df_with_all_rows)):
    sum_before = sum(df_with_all_rows[column_to_mask].iloc[max(0, i-span):i+1])
    if sum_before == 0:
        placeholder[i] = 0
    elif sum_before > 0:
        sum_after = sum(df_with_all_rows[column_to_mask].iloc[i:i+span+1])
        placeholder[i] = 1 if sum_after > 0 else 0

placeholder_filled = placeholder


In [None]:
df_option3['2ndmask'] = placeholder_filled
df_option3['2ndmask'].value_counts()

In [None]:
fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='2ndmask',
                  labels={'Alt(m)': 'Altitude (m)'},
                  title='2ndmask prediction',
                  color_discrete_map={0: 'blue', 1: 'red'})

fig.update_traces(marker=dict(size=8),
                  selector=dict(mode='markers'))

# Customize the legend
fig.update_layout(
    legend_title_text='Status',
    width=1000,
    height=600
)

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# Show the plot
fig.show()
print(path_to_csv_file)

# Defining on Lift event

In [None]:
column_to_mask = 'predicted'
chunk_size = 60
threshold = 0.3

# Load the dataset
df_with_all_rows = df_option3

# Calculate the total number of chunks
total_chunks = len(df_with_all_rows) // chunk_size
print('total_chunks = ', total_chunks)

remainder = len(df_with_all_rows) % chunk_size
print('remainder = ', remainder)

# Initialize an empty list to store all calculations
all_calculations = []
event_log = {}


# Process each chunk
for i in range(total_chunks):
    #print('index = ', i)
    # Fetch a chunk of data
    start_index = i * chunk_size
    end_index = start_index + chunk_size
    data_chunk = df_with_all_rows[column_to_mask].iloc[start_index:end_index]

    # Process the chunk based on the sum of values
    if sum(data_chunk)/len(data_chunk) >= threshold:
        processed_chunk = [1] * len(data_chunk)
        #print("start position", start_index)
        #print('end position', end_index)
        event_log[i] = start_index, end_index

    else:
        processed_chunk = [0] * len(data_chunk)

    # Append the processed chunk to the list of all calculations
    all_calculations = all_calculations + processed_chunk

#print(f"Running remainder {i}.")
remainder_data_chunk = df_with_all_rows[column_to_mask].iloc[-remainder:]

if sum(remainder_data_chunk)/len(remainder_data_chunk) >= threshold:
    processed_chunk = [1] * len(remainder_data_chunk)
else:
    processed_chunk = [0] * len(remainder_data_chunk)
all_calculations = all_calculations + processed_chunk


df_option3['mask'] = all_calculations

In [None]:
event_log

In [None]:
# maybe store events as a dict to have a key for them
continuous_events = []

start = None
end = None

for key in sorted(event_log.keys()):
    if start is None:
        start, end = event_log[key]
    elif end == event_log[key][0]:
        end = event_log[key][1]
    else:
        continuous_events.append((start, end))
        start, end = event_log[key]

# Append the last continuous event
if start is not None and end is not None:
    continuous_events.append((start, end))

print("Continuous events:")
for event in continuous_events:
    print(event)

print('number of continous events', len(continuous_events))

In [387]:

def on_lift_event_identification(df, column_to_mask, chunk_size, threshold):
    """
    Applies a binary mask to a DataFrame based on the average value of a specified column in chunks,
    and logs the start and end index of chunks meeting the threshold.

    Parameters:
    - df: pandas.DataFrame, the DataFrame to process.
    - column_to_mask: str, the column based on whose values the mask will be applied.
    - chunk_size: int, the number of rows in each chunk.
    - threshold: float, the threshold value for applying the mask.

    Returns:
    - df: pandas.DataFrame, the DataFrame with an added 'mask' column.
    - event_log: dict, log of chunk indices and their start and end positions that meet the threshold.
    """

    # Calculate the total number of chunks and remainder
    total_chunks = len(df) // chunk_size
    remainder = len(df) % chunk_size

    # Initialize an empty list for calculations and a dictionary for event logging
    all_calculations = []
    event_log = {}

    # Process each chunk
    for i in range(total_chunks):
        start_index = i * chunk_size
        end_index = start_index + chunk_size
        data_chunk = df[column_to_mask].iloc[start_index:end_index]

        # Process the chunk
        if sum(data_chunk) / len(data_chunk) >= threshold:
            processed_chunk = [1] * len(data_chunk)
            event_log[i] = (start_index, end_index)
        else:
            processed_chunk = [0] * len(data_chunk)

        all_calculations += processed_chunk

    # Process remainder
    if remainder:
        remainder_data_chunk = df[column_to_mask].iloc[-remainder:]
        if sum(remainder_data_chunk) / len(remainder_data_chunk) >= threshold:
            processed_chunk = [1] * len(remainder_data_chunk)
        else:
            processed_chunk = [0] * len(remainder_data_chunk)
        all_calculations += processed_chunk
        # Optionally log remainder chunk if it meets the threshold
        if sum(remainder_data_chunk) / len(remainder_data_chunk) >= threshold:
            event_log[total_chunks] = (len(df) - remainder, len(df))

    # Assign calculated masks to the DataFrame
    df['mask'] = all_calculations

    ## Storing in a dict instead

    continuous_events_dict = {}
    event_index = 1

    start = None
    end = None

    for key in sorted(event_log.keys()):
        if start is None:
            start, end = event_log[key]
        elif end == event_log[key][0]:
            end = event_log[key][1]
        else:
            continuous_events_dict[event_index] = (start, end)
            event_index += 1
            start, end = event_log[key]
    # Append the last continuous event
    if start is not None and end is not None:
        continuous_events_dict[event_index] = (start, end)

    print("Continuous events as dictionary:")
    for key, value in continuous_events_dict.items():
        print(f"{key}: {value}")

    print('number of continuous events:', len(continuous_events_dict))

    df['event']=0
    for label in continuous_events_dict:
      range_val = [x for x in range(continuous_events_dict[label][0], continuous_events_dict[label][1] + 1)]
      df.loc[range_val, 'event'] = label


    return df

# Example usage:
# df_option3 = pd.DataFrame(...) # Assuming df_option3 is your DataFrame
# df_on_lift = on_lift_event_identification(df_option3, 'predicted', 60, 0.3)
# print(df_on_lift)


In [388]:
df_on_lift = on_lift_event_identification(df_option3, 'predicted', 60, 0.3)
df_on_lift


Continuous events as dictionary:
1: (0, 120)
2: (420, 660)
3: (1680, 1920)
4: (2160, 2460)
5: (2880, 3120)
6: (3420, 3720)
7: (5100, 5340)
8: (6660, 6720)
9: (9300, 9540)
10: (10140, 10500)
11: (10920, 11280)
12: (11520, 11760)
13: (12600, 12900)
number of continuous events: 13


Unnamed: 0,index,Timestamp,accelX(g),accelY(g),accelZ(g),accelUserX(g),accelUserY(g),accelUserZ(g),gyroX(rad/s),gyroY(rad/s),...,magZ(µT),calMagX(µT),calMagY(µT),calMagZ(µT),Alt(m)_change,Speed(m/s)_change,Course_change,predicted,mask,event
0,0,2023-04-08 08:25:39.548,0.1469,-0.5016,-0.6164,-0.0205,0.0419,0.2105,-0.2433,-0.1863,...,-81.2584,2.0595,-32.6295,-32.3720,0.000000,0.00,0.0,0,1,1
1,1,2023-04-08 08:25:40.552,0.1552,-0.5138,-0.7686,0.0539,-0.0730,0.1244,-0.2232,0.2461,...,-76.2836,11.9967,-32.6897,-27.5039,15.920899,0.83,-2.8,1,1,1
2,2,2023-04-08 08:25:41.555,0.1574,-0.6052,-0.8676,0.0057,0.0007,-0.0876,0.0985,-0.2090,...,-71.0870,11.1734,-44.5715,-21.9304,6.374756,-0.66,0.0,1,1,1
3,4,2023-04-08 08:25:43.563,0.3829,-1.1274,0.5387,0.0678,-0.2406,0.3286,-0.4226,-0.4921,...,-28.8152,15.7277,-39.5037,20.0695,3.253784,1.18,2.5,1,1,1
4,5,2023-04-08 08:25:44.566,0.5455,-0.9876,-0.0720,0.1791,-0.0073,-0.2420,0.1047,0.4837,...,-38.4458,12.2346,-44.4381,10.6295,2.300049,-1.18,-3.2,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16581,16729,2023-04-08 13:05:32.768,0.0138,-0.0408,-0.9926,-0.0026,0.0018,0.0064,0.0108,0.0008,...,-93.9798,10.3347,-18.6698,-41.0179,0.000000,0.00,0.0,0,0,0
16582,16730,2023-04-08 13:05:33.772,0.0180,-0.0398,-0.9924,0.0016,0.0030,0.0065,0.0073,0.0041,...,-93.5711,10.5753,-18.7652,-40.5260,0.000000,0.00,0.0,0,0,0
16583,16731,2023-04-08 13:05:34.776,0.0113,-0.0406,-0.9931,-0.0048,0.0024,0.0058,0.0068,0.0043,...,-93.4262,10.6274,-18.6357,-40.7302,0.000000,0.00,0.0,0,0,0
16584,16732,2023-04-08 13:05:35.780,0.0343,-0.0407,-0.9879,0.0183,0.0024,0.0111,0.0080,0.0030,...,-93.7540,10.0764,-18.8708,-40.8857,0.000000,0.00,0.0,0,0,0


In [391]:
df_on_lift['event'].value_counts()

0     13333
10      361
11      361
4       301
6       301
13      301
2       241
3       241
5       241
7       241
9       241
12      241
1       121
8        61
Name: event, dtype: int64

In [386]:
event_log


{7: (420, 480),
 8: (480, 540),
 9: (540, 600),
 10: (600, 660),
 17: (1020, 1080),
 18: (1080, 1140),
 19: (1140, 1200),
 20: (1200, 1260),
 34: (2040, 2100),
 35: (2100, 2160),
 36: (2160, 2220),
 37: (2220, 2280),
 38: (2280, 2340),
 46: (2760, 2820),
 47: (2820, 2880),
 48: (2880, 2940),
 49: (2940, 3000),
 67: (4020, 4080),
 68: (4080, 4140),
 69: (4140, 4200),
 70: (4200, 4260),
 75: (4500, 4560),
 76: (4560, 4620),
 77: (4620, 4680),
 78: (4680, 4740)}

In [392]:
import plotly.express as px

fig = px.scatter()
# Your existing code
fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='mask',
                  labels={'Alt(m)': 'Altitude (m)'},
                  title=f'Masked Predictions {path_to_csv_file}',
                  color_discrete_map={0: 'blue', 1: 'red'})

fig.update_traces(marker=dict(size=8),
                  selector=dict(mode='markers'))

fig.update_layout(
    legend_title_text='Status',
    width=1000,
    height=600
)

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# # Add labels for specific points
indexes_to_label = continuous_events  # List of indexes you want to label
for index in indexes_to_label:
     fig.add_trace(go.Scatter(x=[df_option3.loc[index[0], 'Timestamp']],
                               y=[df_option3.loc[index[0], 'Alt(m)']],
                              mode='markers+text',
                              marker=dict(size=20, color='green'),
                              text=[f'start'],
                              textposition='top right'
                              )
                  )

for index in indexes_to_label:
     fig.add_trace(go.Scatter(x=[df_option3.loc[index[1], 'Timestamp']],
                               y=[df_option3.loc[index[1], 'Alt(m)']],
                              mode='markers+text',
                              marker=dict(size=20, color='red'),
                              text=[f'End'],
                              textposition='bottom left'
                              )
                  )


# Show the plot
fig.show()


In [393]:
## Storing in a dict instead

continuous_events_dict = {}
event_index = 1

start = None
end = None

for key in sorted(event_log.keys()):
    if start is None:
        start, end = event_log[key]
    elif end == event_log[key][0]:
        end = event_log[key][1]
    else:
        continuous_events_dict[event_index] = (start, end)
        event_index += 1
        start, end = event_log[key]
# Append the last continuous event
if start is not None and end is not None:
    continuous_events_dict[event_index] = (start, end)

print("Continuous events as dictionary:")
for key, value in continuous_events_dict.items():
    print(f"{key}: {value}")

print('number of continuous events:', len(continuous_events_dict))


Continuous events as dictionary:
1: (420, 660)
2: (1020, 1260)
3: (2040, 2340)
4: (2760, 3000)
5: (4020, 4260)
6: (4500, 4740)
number of continuous events: 6


In [394]:
import plotly.express as px

fig = px.scatter()
# Your existing code
fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='mask',
                  labels={'Alt(m)': 'Altitude (m)'},
                  title=f'Masked Predictions {path_to_csv_file}',
                  #color_discrete_map={0: 'blue', 1: 'red'}
                 )

fig.update_traces(marker=dict(size=8),
                  selector=dict(mode='markers'))

fig.update_layout(
    legend_title_text='Status',
    width=1000,
    height=600
)

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# # Add labels for specific points
indexes_to_label = continuous_events_dict  # List of indexes you want to label

for index in indexes_to_label:
     fig.add_trace(go.Scatter(x=[df_option3.loc[indexes_to_label[index][0], 'Timestamp']],
                              y=[df_option3.loc[indexes_to_label[index][0], 'Alt(m)']],
                              mode='markers+text',
                              marker=dict(size=15, color='green'),
                              text=[f'Start {index}'],
                              textposition='bottom left'
                              )
                  )

for index in indexes_to_label:
     fig.add_trace(go.Scatter(x=[df_option3.loc[indexes_to_label[index][1], 'Timestamp']],
                               y=[df_option3.loc[indexes_to_label[index][1], 'Alt(m)']],
                              mode='markers+text',
                              marker=dict(size=15, color='red'),
                              text=[f'End'],
                              textposition='top right'
                              )
                  )


# Show the plot
fig.show()


In [None]:
df_option3=df_option3.reset_index()

In [None]:
import plotly.express as px

fig = px.scatter()
# Your existing code
# fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='mask',
#                   labels={'Alt(m)': 'Altitude (m)'},
#                   title=f'Masked Predictions {path_to_csv_file}',
#                   #color_discrete_map={0: 'blue', 1: 'red'}
#                  )

# fig.update_traces(marker=dict(size=8),
#                   selector=dict(mode='markers'))

# fig.update_layout(
#     legend_title_text='Status',
#     width=1000,
#     height=600
# )

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# # Add labels for specific points
indexes_to_label = continuous_events_dict  # List of indexes you want to label

for index in indexes_to_label:
    range_val = [x for x in range(indexes_to_label[index][0], indexes_to_label[index][1] + 1)]
    available_points = df_option3.loc[range_val].dropna(subset=['Timestamp', 'Alt(m)'])
    if not available_points.empty:
        fig.add_trace(go.Scatter(x=available_points['Timestamp'],
                                  y=available_points['Alt(m)'],
                                  mode='markers+text',
                                  marker=dict(size=15, color=index),
                                  text=[f'{index}'],
                                  textposition='middle left',
                                  )

                      )

fig.show()
