# Import libraries

In [39]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

import joblib
import plotly.graph_objects as go

import time
import os

In [3]:
 pd.options.mode.copy_on_write = True

# Import Data

In [4]:
def any_name_that_you_want(data):
    # print(mean(data))
    print(data)

In [5]:
def import_data(file_path):
    # Read data from the specified file path
    df_raw=pd.read_csv(file_path)

    # Return the imported DataFrame
    return df_raw

# Cleaning Data

# Feature Engineering

In [6]:
def calculate_alt_change(df):

    # Add new column with calculated Altitude Change
    df['Alt(m)_change'] = df['Alt(m)'].diff().fillna(0)

    # Remove outliers
    # Set the threshold value for filtering
    alt_threshold = 20

    # Filter the DataFrame based on the absolute value of 'Alt(m)_change' column
    df = df[df['Alt(m)_change'].abs() <= alt_threshold]

    return df

# option to add log file later on
# print shape before and after outlier detection and removal

In [7]:
def plot_alt_changes(df):

    # Create a histogram plot
    plt.hist(df['Alt(m)_change'],
             bins=20,
             color='skyblue',
             edgecolor='black')

    #Add labels and title
    plt.xlabel('Altitude (m)')
    plt.ylabel('Frequency')
    plt.title('Altitude Changes')

    #Show the plot
    plt.show()

In [8]:
def calculate_speed_change(df):

    # Add new column with calculated Speed Change
    df['Speed(m/s)_change'] = df['Speed(m/s)'].diff().fillna(0)

    # Remove outliers
    # Set the threshold value for filtering
    threshold_speed = 3

    # Filter the DataFrame based on the absolute value of 'Speed(m/s)_change' column
    df = df[df['Speed(m/s)_change'].abs() <= threshold_speed]

    return df

    # option to add log file later on
    # print shape before and after outlier detection and removal

In [9]:
def plot_speed_changes(df):

    # Create a histogram plot
    plt.hist(df['Speed(m/s)_change'],
             bins=20,
             color='skyblue',
             edgecolor='black')

    #Add labels and title
    plt.xlabel('Speed Change (m/s)')
    plt.ylabel('Frequency')
    plt.title('Speed Changes')

    #Show the plot
    plt.show()

In [10]:
def calculate_course_change(df):

    # Add new column with calculated Course Change
    df['Course_change'] = df['Course'].diff().fillna(0)

    return df

    # option to add log file later on
    # print shape before and after outlier detection and removal

In [11]:
def convert_datetime(df):

    # Convert 'Timestamp' column to datetime format
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='ns')

    return df

# Prediction

In [12]:
def select_features(df):

    features_to_use = ['accelX(g)', 'accelY(g)', 'accelZ(g)', 'accelUserX(g)', 'accelUserY(g)',
       'accelUserZ(g)', 'gyroX(rad/s)', 'gyroY(rad/s)', 'gyroZ(rad/s)',
       'Roll(rads)', 'Pitch(rads)', 'Yaw(rads)', 'Lat', 'Long', 'Speed(m/s)',
       'HorizontalAccuracy(m)', 'VerticalAccuracy(m)', 'Course', 'calMagX(µT)',
       'calMagY(µT)', 'calMagZ(µT)', 'Alt(m)_change',
       'Speed(m/s)_change', 'Course_change']

    X=df[features_to_use]

    return X

In [13]:
def load_model(file_path_to_model):

    # Load the saved model
    return joblib.load(file_path_to_model)


In [14]:
def show_hyperparameters(model):
    # show hyperparameters
    return model.get_params()


In [15]:
def predict_on_features(model, df, features):

    predictions=model.predict(features)

    df['predicted']=predictions

    return df


# Mapping and Plotting

In [16]:
def plot_prediction(df, plot_title='Predictions'):
    # Check if 'Timestamp' column exists and is in datetime format
    if 'Timestamp' in df.columns and pd.api.types.is_datetime64_any_dtype(df['Timestamp']):
        # Create a scatter plot for Altitude over Time, colored by 'predicted' class
        fig = px.scatter(df, x='Timestamp', y='Alt(m)', color='predicted',
                         labels={'Alt(m)': 'Altitude (m)'},
                         title=plot_title,
                         color_discrete_map={0: 'blue', 1: 'red'})

        fig.update_traces(marker=dict(size=8),
                          selector=dict(mode='markers'))

        # Customize the legend
        fig.update_layout(
            legend_title_text='Status',
            width=1000,
            height=600
        )

        fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

        # Show the plot
        fig.show()
    else:
        print("Warning: DataFrame's 'Timestamp' column is not in datetime format and must be converted first.")

In [17]:
def plot_total_alt_over_time(df, plot_title='Total Tracked Altitude Over Time'):

    # Create a line plot using Plotly
    fig = go.Figure()

    # Add a trace for altitude over time
    fig.add_trace(go.Scatter(x=df['Timestamp'],
                             y=df['Alt(m)'],
                             mode='lines',
                             name='Altitude'))

    # Update layout
    fig.update_layout(title=plot_title,
                      xaxis_title='Timestamp',
                      yaxis_title='Altitude (m)')

    # Show plot
    fig.show()

# Prediction steps in one function

In [18]:
# use this to predict on a csv that already has all data points
def predict_on_data(path_to_csv_file, file_path_to_model):

    # Load the data
    df = pd.read_csv(path_to_csv_file)

    # Preprocess and feature engineering
    df = calculate_alt_change(df)
    df = calculate_speed_change(df)
    df = calculate_course_change(df)
    df = convert_datetime(df)

    # Feature selection
    features = select_features(df)

    # Load the model
    rfc = joblib.load(file_path_to_model)

    # Make predictions
    df = predict_on_features(rfc, df, features)

    # Plot predictions
    plot_prediction(df)

    return df

# Real time prediction function

##### This is the hot stuff to work on :)

In [19]:
def fetch_and_process_data(path_to_csv_file, chunk_size=60, wait_time=2):

    # Load the dataset
    df_with_all_rows = pd.read_csv(path_to_csv_file)

    # Calculate the total number of chunks
    total_chunks = len(df_with_all_rows) // chunk_size

    # Initialize an empty list to store all calculations
    all_calculations = pd.DataFrame()

    for i in range(total_chunks):
        # Fetch a chunk of 60 observations
        start_index = i * chunk_size
        end_index = start_index + chunk_size
        data_chunk = df_with_all_rows.iloc[start_index:end_index]

        if len(data_chunk) > 0:

            # Append the processed chunk to the list of all calculations
            all_calculations = pd.concat([all_calculations, data_chunk], ignore_index=True)

            # Print the first few rows of the processed chunk for demonstration
        #     print(f"Processed chunk {i+1}.")
        #     print(data_chunk.head())

            # funct(all_calculations)

            # Preprocess and feature engineering
            df = calculate_alt_change(all_calculations)
            df = calculate_speed_change(df)
            df = calculate_course_change(df)
            df = convert_datetime(df)

            print(df.shape)

             # Feature selection
            # features = select_features(df)

            # # Make predictions
            # df = predict_on_features(rfc, df, features)


            # print(len(data_chunk))
            # print(len(all_calculations))

        else:
            print(f"No more data to process in chunk {i+1}.")



        # Wait for declared wait_time before processing the next chunk
        time.sleep(wait_time)



# To do before going for one of the above mentioned options

Make sure that specify the paths and other variables needed

In [20]:
# These are the default values used in the function fetch_and_process_data, change if needed
chunk_size=60
wait_time=2

In [200]:
# Specify the file path to pre-trained model
path_to_model = '/content/rf_v_0.4.pkl'
# path_to_model = '/content/rf_v_0.1.pkl'

In [222]:
# Specify the file path to directory with data to predict on
#path_to_csv_file='/content/Andermatt_Gondelbahn_Gutsch-2024-02-04_11-58-32.csv'
#path_to_csv_file='/content/df_310_labeled_on_lift_v4.csv'
#path_to_csv_file='/content/df_290_labeled_on_lift.csv'
#path_to_csv_file='/content/df_95_labeled_on_lift.csv'
path_to_csv_file='/content/Natschen-2024-02-04_12-01-46.csv'
#path_to_csv_file='/content/df_135_labeled_on_lift.csv'

# Option 1: 'Real time' prediction

In [None]:
# Call this function to simulate prediction in 'real time'
fetch_and_process_data(path_to_csv_file,
                       chunk_size=60,
                       wait_time=0.1)

# Option 2: One step to prediction

In [None]:
# Input an call this one function with your paths
predict_on_data(path_to_csv_file, path_to_model)

# Option 3: Go through everything step by step

In [223]:
df_option3=import_data(path_to_csv_file)

In [224]:
df_option3=calculate_alt_change(df_option3)

In [225]:
df_option3=calculate_speed_change(df_option3)

In [226]:
df_option3=calculate_course_change(df_option3)

In [227]:
df_option3=convert_datetime(df_option3)

In [228]:
features=select_features(df_option3)

In [229]:
rfc=load_model(path_to_model)

In [230]:
df_option3=predict_on_features(rfc, df_option3, features)

In [231]:
plot_prediction(df_option3)

# Option 3: Simulate real time data input

In [67]:
# Load the model
rfc = joblib.load(path_to_model)

In [147]:
def fetch_and_process_data(path_to_csv_file, chunk_size=60, wait_time=2):

    # Load the dataset
    df_with_all_rows = pd.read_csv(path_to_csv_file)

    # Calculate the total number of chunks
    total_chunks = len(df_with_all_rows) // chunk_size
    remainder = len(df_with_all_rows) % chunk_size

    # Initialize an empty list to store all calculations
    all_calculations = pd.DataFrame()

    fig = px.scatter()

    for i in range(total_chunks):
        # Fetch a chunk of 60 observations
        start_index = i * chunk_size
        end_index = start_index + chunk_size
        data_chunk = df_with_all_rows.iloc[start_index:end_index]
        print('index = ', i+1)

        # Append the processed chunk to the list of all calculations
        all_calculations = pd.concat([all_calculations, data_chunk], ignore_index=True)

        # Preprocess and feature engineering
        df = calculate_alt_change(all_calculations)
        df = calculate_speed_change(df)
        df = calculate_course_change(df)
        df = convert_datetime(df)


        # Feature selection
        features = select_features(df)

        # # # Make predictions
        df = predict_on_features(rfc, df, features)

        print(df.shape)
        print(len(data_chunk))
        print('len all calc = ', len(all_calculations))

        # # Create a scatter plot for Altitude over Time, colored by 'predicted' class
        # fig = px.scatter(df, x='Timestamp', y='Alt(m)', color='predicted',
        #                 labels={'Alt(m)': 'Altitude (m)'},
        #                 title='Prediction',
        #                 color_discrete_map={0: 'blue', 1: 'red'})

        # fig.update_traces(marker=dict(size=8),
        #                   selector=dict(mode='markers'))

        # # Customize the legend
        # fig.update_layout(
        #     legend_title_text='Status',
        #     width=1000,
        #     height=900
        # )

        # fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

        # # Show the plot
        # fig.show()
        # # Wait for declared wait_time before processing the next chunk
        # time.sleep(wait_time)





    print(f"processing remainder data chunk {i}.")
    remainder_data_chunk = df_with_all_rows.iloc[remainder:]

    # Append the processed chunk to the list of all calculations
    all_calculations = pd.concat([remainder_data_chunk], ignore_index=True)

    # Preprocess and feature engineering
    df = calculate_alt_change(all_calculations)
    df = calculate_speed_change(df)
    df = calculate_course_change(df)
    df = convert_datetime(df)

    # # Feature selection
    features = select_features(df)

    # # # Make predictions
    df = predict_on_features(rfc, df, features)

    print('lenght of df = ', df.shape)
    #print(len(data_chunk))
    print('len all calc = ', len(all_calculations))

    # # # Create a scatter plot for Altitude over Time, colored by 'predicted' class
    # fig = px.scatter(df, x='Timestamp', y='Alt(m)', color='predicted',
    #                 labels={'Alt(m)': 'Altitude (m)'},
    #                 title='Prediction',
    #                 color_discrete_map={0: 'blue', 1: 'red'})

    # fig.update_traces(marker=dict(size=8),
    #                   selector=dict(mode='markers'))

    # # Customize the legend
    # fig.update_layout(
    #     legend_title_text='Status',
    #     width=1000,
    #     height=900
    # )

    # fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

    # Show the plot
    # fig.show()



`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [None]:
# Call this function to simulate prediction in 'real time'
fetch_and_process_data(path_to_csv_file,
                       chunk_size=60,
                       wait_time=.01)

# Misclassification Logic Work

In [236]:
column_to_mask = 'predicted'
chunk_size = 60
threshold = 0.3

# Load input dataset
df_with_all_rows = df_option3

# Calculate the total number of chunks
total_chunks = len(df_with_all_rows) // chunk_size
print('total_chunks = ', total_chunks)

remainder = len(df_with_all_rows) % chunk_size
print('remainder = ', remainder)

# Initialize an empty list to store all calculations
all_calculations = []

# Process each chunk
for i in range(total_chunks):
    print('index = ', i)
    # Fetch a chunk of data
    start_index = i * chunk_size
    end_index = start_index + chunk_size
    data_chunk = df_with_all_rows[column_to_mask].iloc[start_index:end_index]

    # Process the chunk based on the sum of values
    if sum(data_chunk)/len(data_chunk) >= threshold:
        processed_chunk = [1] * len(data_chunk)
        print("start position", start_index)
        print('end position', end_index)

    else:
        processed_chunk = [0] * len(data_chunk)

    # Append the processed chunk to the list of all calculations
    all_calculations = all_calculations + processed_chunk

print(f"Running remainder {i}.")
remainder_data_chunk = df_with_all_rows[column_to_mask].iloc[-remainder:]

if sum(remainder_data_chunk)/len(remainder_data_chunk) >= threshold:
    processed_chunk = [1] * len(remainder_data_chunk)
else:
    processed_chunk = [0] * len(remainder_data_chunk)
all_calculations = all_calculations + processed_chunk


df_option3['mask'] = all_calculations

total_chunks =  187
remainder =  9
index =  0
index =  1
start position 60
end position 120
index =  2
start position 120
end position 180
index =  3
start position 180
end position 240
index =  4
start position 240
end position 300
index =  5
start position 300
end position 360
index =  6
start position 360
end position 420
index =  7
index =  8
index =  9
index =  10
index =  11
index =  12
index =  13
index =  14
index =  15
index =  16
index =  17
index =  18
index =  19
index =  20
index =  21
index =  22
index =  23
index =  24
index =  25
index =  26
index =  27
index =  28
index =  29
index =  30
index =  31
index =  32
index =  33
index =  34
index =  35
index =  36
index =  37
index =  38
index =  39
index =  40
index =  41
index =  42
index =  43
index =  44
index =  45
index =  46
index =  47
index =  48
index =  49
index =  50
index =  51
index =  52
start position 3120
end position 3180
index =  53
start position 3180
end position 3240
index =  54
start position 3240
end 

In [233]:
fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='mask',
                  labels={'Alt(m)': 'Altitude (m)'},
                  title=f'Masked Predictions {path_to_csv_file}',
                  color_discrete_map={0: 'blue', 1: 'red'})

fig.update_traces(marker=dict(size=8),
                  selector=dict(mode='markers'))

# Customize the legend
fig.update_layout(
    legend_title_text='Status',
    width=1000,
    height=600
)

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# Show the plot
fig.show()
print(path_to_csv_file)

/content/Natschen-2024-02-04_12-01-46.csv


# Other methods

In [416]:
column_to_mask = 'predicted'
span = 10

# Load the dataset
df_with_all_rows = df_option3

placeholder = np.zeros(len(df_with_all_rows))

for i in range(len(df_with_all_rows)):
  if sum(df_with_all_rows[column_to_mask][i-span:i]) == 0:
    placeholder[i] = 0
  elif sum(df_with_all_rows[column_to_mask][i-span:i]) > 0:
    if sum(df_with_all_rows[column_to_mask][i:i+span])>0:
      placeholder[i] = 1
    else:
      placeholder[i] = 0

placeholder_filled = placeholder
len(placeholder_filled)

18428

In [398]:
import pandas as pd

df_with_all_rows = df_option3

span = 10

placeholder = pd.Series(index=df_with_all_rows.index)

rolling_sum = df_with_all_rows[column_to_mask].rolling(window=span).sum()

placeholder[(rolling_sum == 0)] = 0
placeholder[(rolling_sum >= 0) & (rolling_sum.shift(-span) > 0)] = 1

placeholder_filled = placeholder.fillna(0)





In [417]:
df_option3['2ndmask'] = placeholder_filled
df_option3['2ndmask'].value_counts()

0.0    15319
1.0     3109
Name: 2ndmask, dtype: int64

In [419]:
import numpy as np

column_to_mask = 'predicted'
span = 10

# Load the dataset
df_with_all_rows = df_option3

# Initialize placeholder filled with zeroes
placeholder = np.zeros(len(df_with_all_rows))

for i in range(len(df_with_all_rows)):
    sum_before = sum(df_with_all_rows[column_to_mask].iloc[max(0, i-span):i+1])
    if sum_before == 0:
        placeholder[i] = 0
    elif sum_before > 0:
        sum_after = sum(df_with_all_rows[column_to_mask].iloc[i:i+span+1])
        placeholder[i] = 1 if sum_after > 0 else 0

placeholder_filled = placeholder


In [420]:
df_option3['2ndmask'] = placeholder_filled
df_option3['2ndmask'].value_counts()

0.0    15264
1.0     3164
Name: 2ndmask, dtype: int64

In [421]:
fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='2ndmask',
                  labels={'Alt(m)': 'Altitude (m)'},
                  title='2ndmask prediction',
                  color_discrete_map={0: 'blue', 1: 'red'})

fig.update_traces(marker=dict(size=8),
                  selector=dict(mode='markers'))

# Customize the legend
fig.update_layout(
    legend_title_text='Status',
    width=1000,
    height=600
)

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# Show the plot
fig.show()
print(path_to_csv_file)

/content/df_310_labeled_on_lift_v4.csv


# Defining on Lift event

In [237]:
column_to_mask = 'predicted'
chunk_size = 60
threshold = 0.3

# Load the dataset
df_with_all_rows = df_option3

# Calculate the total number of chunks
total_chunks = len(df_with_all_rows) // chunk_size
print('total_chunks = ', total_chunks)

remainder = len(df_with_all_rows) % chunk_size
print('remainder = ', remainder)

# Initialize an empty list to store all calculations
all_calculations = []
event_log = {}


# Process each chunk
for i in range(total_chunks):
    #print('index = ', i)
    # Fetch a chunk of data
    start_index = i * chunk_size
    end_index = start_index + chunk_size
    data_chunk = df_with_all_rows[column_to_mask].iloc[start_index:end_index]

    # Process the chunk based on the sum of values
    if sum(data_chunk)/len(data_chunk) >= threshold:
        processed_chunk = [1] * len(data_chunk)
        #print("start position", start_index)
        #print('end position', end_index)
        event_log[i] = start_index, end_index

    else:
        processed_chunk = [0] * len(data_chunk)

    # Append the processed chunk to the list of all calculations
    all_calculations = all_calculations + processed_chunk

#print(f"Running remainder {i}.")
remainder_data_chunk = df_with_all_rows[column_to_mask].iloc[-remainder:]

if sum(remainder_data_chunk)/len(remainder_data_chunk) >= threshold:
    processed_chunk = [1] * len(remainder_data_chunk)
else:
    processed_chunk = [0] * len(remainder_data_chunk)
all_calculations = all_calculations + processed_chunk


df_option3['mask'] = all_calculations

total_chunks =  187
remainder =  9


In [238]:
event_log

{1: (60, 120),
 2: (120, 180),
 3: (180, 240),
 4: (240, 300),
 5: (300, 360),
 6: (360, 420),
 52: (3120, 3180),
 53: (3180, 3240),
 54: (3240, 3300),
 56: (3360, 3420),
 60: (3600, 3660),
 61: (3660, 3720),
 71: (4260, 4320),
 72: (4320, 4380),
 82: (4920, 4980),
 83: (4980, 5040),
 84: (5040, 5100),
 85: (5100, 5160),
 92: (5520, 5580),
 93: (5580, 5640),
 94: (5640, 5700),
 95: (5700, 5760),
 96: (5760, 5820),
 97: (5820, 5880),
 113: (6780, 6840),
 114: (6840, 6900),
 121: (7260, 7320),
 122: (7320, 7380),
 128: (7680, 7740),
 129: (7740, 7800),
 133: (7980, 8040),
 134: (8040, 8100),
 138: (8280, 8340),
 139: (8340, 8400)}

In [240]:
# maybe store events as a dict to have a key for them
continuous_events = []

start = None
end = None

for key in sorted(event_log.keys()):
    if start is None:
        start, end = event_log[key]
    elif end == event_log[key][0]:
        end = event_log[key][1]
    else:
        continuous_events.append((start, end))
        start, end = event_log[key]

# Append the last continuous event
if start is not None and end is not None:
    continuous_events.append((start, end))

print("Continuous events:")
for event in continuous_events:
    print(event)

print('number of continous events', len(continuous_events))

Continuous events:
(60, 420)
(3120, 3300)
(3360, 3420)
(3600, 3720)
(4260, 4380)
(4920, 5160)
(5520, 5880)
(6780, 6900)
(7260, 7380)
(7680, 7800)
(7980, 8100)
(8280, 8400)
number of continous events 12


In [241]:
import plotly.express as px

fig = px.scatter()
# Your existing code
fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='mask',
                  labels={'Alt(m)': 'Altitude (m)'},
                  title=f'Masked Predictions {path_to_csv_file}',
                  color_discrete_map={0: 'blue', 1: 'red'})

fig.update_traces(marker=dict(size=8),
                  selector=dict(mode='markers'))

fig.update_layout(
    legend_title_text='Status',
    width=1000,
    height=600
)

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# # Add labels for specific points
indexes_to_label = continuous_events  # List of indexes you want to label
for index in indexes_to_label:
     fig.add_trace(go.Scatter(x=[df_option3.loc[index[0], 'Timestamp']],
                               y=[df_option3.loc[index[0], 'Alt(m)']],
                              mode='markers+text',
                              marker=dict(size=20, color='green'),
                              text=[f'start'],
                              textposition='top right'
                              )
                  )

for index in indexes_to_label:
     fig.add_trace(go.Scatter(x=[df_option3.loc[index[1], 'Timestamp']],
                               y=[df_option3.loc[index[1], 'Alt(m)']],
                              mode='markers+text',
                              marker=dict(size=20, color='red'),
                              text=[f'End'],
                              textposition='bottom left'
                              )
                  )


# Show the plot
fig.show()


In [158]:
## Storing in a dict instead

continuous_events_dict = {}
event_index = 1

start = None
end = None

for key in sorted(event_log.keys()):
    if start is None:
        start, end = event_log[key]
    elif end == event_log[key][0]:
        end = event_log[key][1]
    else:
        continuous_events_dict[event_index] = (start, end)
        event_index += 1
        start, end = event_log[key]
# Append the last continuous event
if start is not None and end is not None:
    continuous_events_dict[event_index] = (start, end)

print("Continuous events as dictionary:")
for key, value in continuous_events_dict.items():
    print(f"{key}: {value}")

print('number of continuous events:', len(continuous_events_dict))


Continuous events as dictionary:
1: (0, 120)
2: (420, 660)
3: (1680, 1920)
4: (2160, 2460)
5: (2880, 3120)
6: (3420, 3720)
7: (5100, 5340)
8: (6660, 6720)
9: (9300, 9540)
10: (10140, 10500)
11: (10920, 11280)
12: (11520, 11760)
13: (12600, 12900)
number of continuous events: 13


In [97]:
import plotly.express as px

fig = px.scatter()
# Your existing code
fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='mask',
                  labels={'Alt(m)': 'Altitude (m)'},
                  title=f'Masked Predictions {path_to_csv_file}',
                  #color_discrete_map={0: 'blue', 1: 'red'}
                 )

fig.update_traces(marker=dict(size=8),
                  selector=dict(mode='markers'))

fig.update_layout(
    legend_title_text='Status',
    width=1000,
    height=600
)

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# # Add labels for specific points
indexes_to_label = continuous_events_dict  # List of indexes you want to label

for index in indexes_to_label:
     fig.add_trace(go.Scatter(x=[df_option3.loc[indexes_to_label[index][0], 'Timestamp']],
                              y=[df_option3.loc[indexes_to_label[index][0], 'Alt(m)']],
                              mode='markers+text',
                              marker=dict(size=15, color='green'),
                              text=[f'Start {index}'],
                              textposition='bottom left'
                              )
                  )

for index in indexes_to_label:
     fig.add_trace(go.Scatter(x=[df_option3.loc[indexes_to_label[index][1], 'Timestamp']],
                               y=[df_option3.loc[indexes_to_label[index][1], 'Alt(m)']],
                              mode='markers+text',
                              marker=dict(size=15, color='red'),
                              text=[f'End'],
                              textposition='top right'
                              )
                  )


# Show the plot
fig.show()


In [159]:
import plotly.express as px

fig = px.scatter()
# Your existing code
# fig = px.scatter(df_option3, x='Timestamp', y='Alt(m)', color='mask',
#                   labels={'Alt(m)': 'Altitude (m)'},
#                   title=f'Masked Predictions {path_to_csv_file}',
#                   #color_discrete_map={0: 'blue', 1: 'red'}
#                  )

# fig.update_traces(marker=dict(size=8),
#                   selector=dict(mode='markers'))

# fig.update_layout(
#     legend_title_text='Status',
#     width=1000,
#     height=600
# )

fig.for_each_trace(lambda trace: trace.update(name='Not on the lift' if trace.name == 0 else 'On the lift'))

# # Add labels for specific points
indexes_to_label = continuous_events_dict  # List of indexes you want to label

for index in indexes_to_label:
    range_val = [x for x in range(indexes_to_label[index][0], indexes_to_label[index][1] + 1)]
    available_points = df_option3.loc[range_val].dropna(subset=['Timestamp', 'Alt(m)'])
    if not available_points.empty:
        fig.add_trace(go.Scatter(x=available_points['Timestamp'],
                                  y=available_points['Alt(m)'],
                                  mode='markers+text',
                                  marker=dict(size=15, color=index),
                                  text=[f'{index}'],
                                  textposition='middle left',
                                  )

                      )

fig.show()


In [175]:
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from itertools import count
import random