# Gloss Annotation Timeline and Frequency Visualiser

In [None]:

This tutorial helps you visualise annotated glosses in your SASL dataset.
We will explore gloss frequency distributions and generate a timeline of glosses over time per video.

### Objectives:
- Visualise most frequent glosses
- Explore gloss usage per video/timestamp
- Plot timelines and gloss distributions


In [None]:
# Step 1: Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


In [None]:
# Step 2: Load the gloss annotation dataset

# Update the path to your dataset file
df = pd.read_csv('../data/sasl_gloss_annotations.csv')
df.head()

In [None]:
# Step 3: Frequency of gloss labels

plt.figure(figsize=(14, 6))
gloss_freq = df['gloss'].value_counts().head(20)
sns.barplot(x=gloss_freq.values, y=gloss_freq.index, palette='magma')
plt.title('Top 20 Gloss Labels')
plt.xlabel('Frequency')
plt.ylabel('Gloss')
plt.tight_layout()
plt.show()

In [None]:
# Step 4: Plot gloss timelines for a single video (if applicable)

# Example: plot gloss occurrences for one video ID
if 'video_id' in df.columns and 'timestamp' in df.columns:
    sample_video = df['video_id'].unique()[0]
    video_df = df[df['video_id'] == sample_video]

    fig = px.scatter(video_df, x='timestamp', y='gloss', title=f'Gloss Timeline for Video {sample_video}',
                     labels={'timestamp': 'Time (s)', 'gloss': 'Gloss Label'})
    fig.show()
else:
    print("Required columns (video_id, timestamp) not found.")


In [None]:
# Step 5: (Optional) Heatmap of glosses by signer (if signer_id is available)

if 'signer_id' in df.columns:
    gloss_signer_matrix = pd.crosstab(df['signer_id'], df['gloss'])
    plt.figure(figsize=(14, 6))
    sns.heatmap(gloss_signer_matrix.loc[:, gloss_signer_matrix.columns[:20]], cmap='YlGnBu', linewidths=0.5)
    plt.title('Heatmap of Gloss Frequency by Signer')
    plt.xlabel('Gloss')
    plt.ylabel('Signer ID')
    plt.tight_layout()
    plt.show()
