In [1]:
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

import math
from collections import defaultdict

In [2]:
df = pd.read_csv("dataset.csv")  

In [3]:
df.head()

Unnamed: 0,user,activity,timestamp,x-axis,y-axis,z-axis
0,1,Walking,4991922345000,0.69,10.8,-2.03
1,1,Walking,4991972333000,6.85,7.44,-0.5
2,1,Walking,4992022351000,0.93,5.63,-0.5
3,1,Walking,4992072339000,-2.11,5.01,-0.69
4,1,Walking,4992122358000,-4.59,4.29,-1.95


In [4]:
df_original = df.copy()

In [5]:
users_with_all = (
    df.groupby('user')['activity']
      .nunique()
      .reset_index()
      .query('activity == 6')['user']
      .tolist()
)
print("Users with all 6 activities:", users_with_all)


Users with all 6 activities: [3, 5, 6, 7, 8, 12, 13, 18, 19, 20, 21, 24, 27, 29, 31, 32, 33, 34, 36]


In [None]:
import numpy as np
import json
from IPython.display import display, clear_output
from ipywidgets import Button, Text, VBox, HBox, Label, Textarea, IntText, Dropdown
import ipywidgets as widgets

np.random.seed(1)

# Create windows from the dataset
def create_windows(df, window_size_s=3, overlap=0.5):
    """
    Create sliding windows from the dataframe
    window_size_s: window size in seconds
    overlap: fraction of overlap (0.5 = 50% overlap)
    """
    windows = []
    
    for user in sorted(df['user'].unique()):
        for activity in df['activity'].unique():
            df_subset = df[(df['user'] == user) & (df['activity'] == activity)].copy()
            
            if df_subset.empty:
                continue
            
            # Sort by timestamp
            df_subset = df_subset.sort_values('timestamp').reset_index(drop=True)
            
            # Convert timestamp to seconds from start
            t0 = df_subset['timestamp'].iloc[0]
            df_subset['time_s'] = (df_subset['timestamp'] - t0) / 1e9
            
            # Create windows
            t_max = df_subset['time_s'].max()
            step = window_size_s * (1 - overlap)
            
            window_start = 0
            while window_start + window_size_s <= t_max:
                window_end = window_start + window_size_s
                
                window_data = df_subset[
                    (df_subset['time_s'] >= window_start) & 
                    (df_subset['time_s'] < window_end)
                ].copy()
                
                if len(window_data) > 10:  # Only keep windows with enough data points
                    windows.append({
                        'user': user,
                        'activity': activity,
                        'start_time': window_start,
                        'end_time': window_end,
                        'data': window_data
                    })
                
                window_start += step
    
    return windows

# ========================================
# CONFIGURATION: Adjust these to control how many windows to label
# ========================================

# Option 1: Select specific users (None = all users)
selected_users = [21]  # Label only users 1-5 to start <------------------------------------------------------------------------------- CHANGE THIS I AM DOING 3rd USER
# selected_users = None  # Uncomment to use all users

# Option 2: Select specific activities (None = all activities)
selected_activities = None  # Use all activities
# selected_activities = ['Walking', 'Jogging']  # Uncomment to limit to specific activities

# Option 3: Reduce overlap to create fewer windows
overlap_ratio = 0.75  # 0.75 = 75% overlap (more windows), 0.0 = no overlap (fewer windows)

# Option 4: Limit total windows (useful for testing)
max_windows = 25  # Set to a number like 100 to limit for testing
# max_windows = 100  # Uncomment to test with only 100 windows

# ========================================

# Filter dataframe if needed
df_filtered = df.copy()
if selected_users is not None:
    df_filtered = df_filtered[df_filtered['user'].isin(selected_users)]
if selected_activities is not None:
    df_filtered = df_filtered[df_filtered['activity'].isin(selected_activities)]

print(f"Dataset: {len(df_filtered)} samples")
print(f"Users: {sorted(df_filtered['user'].unique())}")
print(f"Activities: {sorted(df_filtered['activity'].unique())}")
print()
print("Creating windows... (this may take a moment)")

windows = create_windows(df_filtered, window_size_s=3, overlap=overlap_ratio)

if max_windows is not None and len(windows) > max_windows:
    print(f"Balancing {max_windows} windows evenly across activities...")

    # Group windows by activity
    activity_groups = defaultdict(list)
    for w in windows:
        activity_groups[w['activity']].append(w)

    # Calculate how many windows per activity
    num_activities = len(activity_groups)
    windows_per_label = math.floor(max_windows / num_activities)

    balanced_windows = []

    for activity, group in activity_groups.items():
        np.random.shuffle(group)  # Randomize order
        balanced_windows.extend(group[:windows_per_label])

    # If there‚Äôs leftover (e.g. 100 % num_activities != 0), add extra from random remaining
    remaining = max_windows - len(balanced_windows)
    if remaining > 0:
        leftovers = [w for group in activity_groups.values() for w in group if w not in balanced_windows]
        np.random.shuffle(leftovers)
        balanced_windows.extend(leftovers[:remaining])

    windows = balanced_windows

print(f"‚úì Created {len(windows)} windows to label")


Dataset: 35158 samples
Users: [np.int64(3)]
Activities: ['Downstairs', 'Jogging', 'Sitting', 'Standing', 'Upstairs', 'Walking']

Creating windows... (this may take a moment)
Balancing 25 windows evenly across activities...
‚úì Created 25 windows to label


In [38]:
# inspect the windows a bit so we make sure they were fine 

from collections import Counter
import matplotlib.pyplot as plt

print("\nWindow counts by activity:")
print(Counter([w['activity'] for w in windows]))

w = np.random.choice(windows)
print(f"\nExample window ‚Üí user {w['user']}, activity {w['activity']}, {len(w['data'])} samples")
display(w['data'].head())



Window counts by activity:
Counter({'Walking': 5, 'Jogging': 4, 'Sitting': 4, 'Standing': 4, 'Upstairs': 4, 'Downstairs': 4})

Example window ‚Üí user 3, activity Jogging, 60 samples


Unnamed: 0,user,activity,timestamp,x-axis,y-axis,z-axis,time_s
9689,3,Jogging,102223981666000,0.19,-9.08,-0.53,987.760163
9690,3,Jogging,102224021491000,-0.42,12.98,10.5,987.799988
9691,3,Jogging,102224061774000,-5.28,11.65,-8.31,987.840271
9692,3,Jogging,102224101661000,-7.86,12.91,-6.82,987.880158
9693,3,Jogging,102224183112000,-2.98,19.76,-0.65,987.961609


In [39]:
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import ipywidgets as widgets
from ipywidgets import VBox, HBox, Label, Button

# ========================================
# CONCEPT CONFIGURATION
# ========================================
CONCEPTS = [
    'periodicity',
    'temporal_stability',
    'coordination',
]

CONCEPT_DESCRIPTIONS = {
    'periodicity': "How regular and rhythmic the motion is. Random ‚Üí 0.0, Some pattern ‚Üí 0.5, Clear oscillation ‚Üí 1.0",
    'temporal_stability': "How consistent the motion pattern remains across the window. Highly variable ‚Üí 0.0, Moderately stable ‚Üí 0.5, Very stable ‚Üí 1.0",
    'coordination': "How synchronized the x, y, z axes are. Independent/noisy ‚Üí 0.0, Partial alignment ‚Üí 0.5, Strongly aligned ‚Üí 1.0"
}

# ========================================


class WindowLabeler:
    def __init__(self, windows, concepts, labels_file='window_labels.csv'):
        self.windows = windows
        self.concepts = concepts
        self.labels_file = labels_file
        self.current_idx = 0
        self.labels = {}

        self.load_labels()

        # UI components
        self.output = widgets.Output()
        self.info_label = Label(value="")
        self.progress_label = Label(value="")
        self.stats_label = Label(value="")

        # Create sliders
        self.concept_sliders = {}
        slider_widgets = []
        for concept in self.concepts:
            desc = widgets.HTML(
                value=f"<b>{concept.replace('_', ' ').title()}</b>: <i>{CONCEPT_DESCRIPTIONS[concept]}</i>",
                layout=widgets.Layout(width='400px')
            )

            input_field = widgets.BoundedFloatText(
                value=0.0,
                min=0.0,
                max=1.0,
                step=0.01,
                description="Score:",
                style={'description_width': '60px'},
                layout=widgets.Layout(width='250px')
            )

            slider_widgets.append(VBox([desc, input_field]))
            self.concept_sliders[concept] = input_field


        # Layout sliders in two columns
        mid = len(slider_widgets) // 2
        sliders_box = HBox([VBox(slider_widgets[:mid]), VBox(slider_widgets[mid:])])

        # Buttons
        self.reset_btn = Button(description='‚Ü∫ Reset All', button_style='')
        self.save_label_btn = Button(description='üíæ Save & Next', button_style='success')
        self.save_all_btn = Button(description='üíæ Export All', button_style='primary')

        self.reset_btn.on_click(self.on_reset)
        self.save_label_btn.on_click(self.on_save_label)
        self.save_all_btn.on_click(self.on_save_all)

        # Layout
        label_box = VBox([
            Label(value="Rate each concept from 0 (absent) to 1 (fully present):"),
            sliders_box,
            self.reset_btn
        ])
        action_box = HBox([self.save_label_btn, self.save_all_btn])

        self.ui = VBox([
            self.info_label,
            self.progress_label,
            self.output,
            Label(value="‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ"),
            label_box,
            action_box,
            self.stats_label
        ])

    # =============================
    # Labeling logic
    # =============================

    def load_labels(self):
        try:
            df = pd.read_csv(self.labels_file)
            for _, row in df.iterrows():
                idx = int(row['window_idx'])
                self.labels[idx] = {c: float(row[c]) for c in self.concepts}
            print(f"Loaded {len(self.labels)} existing labels.")
        except FileNotFoundError:
            print("No existing labels found ‚Äî starting fresh.")
        except Exception as e:
            print(f"‚ö†Ô∏è Error loading labels: {e}")
            self.labels = {}

    def save_labels_to_file(self):
        rows = []
        for idx, label_data in self.labels.items():
            w = self.windows[idx]
            row = {
                'window_idx': idx,
                'user': w['user'],
                'activity': w['activity'],
                'start_time': w['start_time'],
                'end_time': w['end_time'],
            }
            for c in self.concepts:
                row[c] = label_data.get(c, 0.0)
            rows.append(row)

        df = pd.DataFrame(rows)
        df.to_csv(self.labels_file, index=False)
        return len(df)

    def plot_window(self, window_idx):
        window = self.windows[window_idx]
        data = window['data']

        with self.output:
            clear_output(wait=True)

            # --- 3 separate axis plots ---
            fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True)
            fig.suptitle(f"Window {window_idx} | {window['activity']} (User {window['user']})", fontsize=16)

            axes[0].plot(data['time_s'], data['x-axis'], 'b-', linewidth=1.5)
            axes[0].set_ylabel('X-axis', fontsize=12)
            axes[0].grid(True, alpha=0.3)
            axes[0].set_title('X-axis Acceleration', fontsize=11)

            axes[1].plot(data['time_s'], data['y-axis'], 'g-', linewidth=1.5)
            axes[1].set_ylabel('Y-axis', fontsize=12)
            axes[1].grid(True, alpha=0.3)
            axes[1].set_title('Y-axis Acceleration', fontsize=11)

            axes[2].plot(data['time_s'], data['z-axis'], 'r-', linewidth=1.5)
            axes[2].set_ylabel('Z-axis', fontsize=12)
            axes[2].set_xlabel('Time (s)', fontsize=12)
            axes[2].grid(True, alpha=0.3)
            axes[2].set_title('Z-axis Acceleration', fontsize=11)

            plt.tight_layout()
            plt.show()

            # --- Combined plot ---
            fig2, ax = plt.subplots(1, 1, figsize=(14, 5))
            ax.plot(data['time_s'], data['x-axis'], 'b-', label='X-axis', alpha=0.7)
            ax.plot(data['time_s'], data['y-axis'], 'g-', label='Y-axis', alpha=0.7)
            ax.plot(data['time_s'], data['z-axis'], 'r-', label='Z-axis', alpha=0.7)
            ax.set_xlabel('Time (s)')
            ax.set_ylabel('Acceleration')
            ax.set_title('Combined View - All Axes', fontsize=14)
            ax.legend()
            ax.grid(True, alpha=0.3)
            plt.tight_layout()
            plt.show()

    def update_display(self):
        window = self.windows[self.current_idx]
        self.info_label.value = (
            f"User: {window['user']} | Activity: {window['activity']} | "
            f"{window['start_time']:.2f}s ‚Üí {window['end_time']:.2f}s"
        )
        labeled_count = len(self.labels)
        self.progress_label.value = f"Window {self.current_idx+1}/{len(self.windows)} | Labeled: {labeled_count}"
        self.stats_label.value = "‚óã Needs label" if self.current_idx not in self.labels else "‚úì Labeled"

        if self.current_idx in self.labels:
            for c in self.concepts:
                self.concept_sliders[c].value = self.labels[self.current_idx].get(c, 0.0)
        else:
            for c in self.concepts:
                self.concept_sliders[c].value = 0.0

        self.plot_window(self.current_idx)

    def on_save_label(self, btn):
        concept_values = {c: float(self.concept_sliders[c].value) for c in self.concepts}
        self.labels[self.current_idx] = concept_values
        count = self.save_labels_to_file()

        if self.current_idx < len(self.windows) - 1:
            self.current_idx += 1
            self.update_display()
        else:
            self.stats_label.value = f"‚úÖ Finished! {count} windows labeled and saved."
            print("üéâ All windows labeled. Export complete.")

    def on_reset(self, btn):
        for c in self.concepts:
            self.concept_sliders[c].value = 0.0

    def on_save_all(self, btn):
        count = self.save_labels_to_file()
        self.stats_label.value = f"üíæ Saved {count} labels to {self.labels_file}"
        print(f"‚úÖ Exported all labels to {self.labels_file}")

    def display(self):
        display(self.ui)
        self.update_display()


# ========================================
# AFTER LABELING ‚Äî MERGE WITH ORIGINAL DATASET
# ========================================

def merge_labels_with_original(df_original, windows, labels_file, concepts):
    """Expand window-level concept labels to sample-level dataset."""
    df_labels = pd.read_csv(labels_file)
    labeled_rows = []

    for _, row in df_labels.iterrows():
        idx = int(row['window_idx'])
        w = windows[idx]
        w_data = w['data'].copy()
        for c in concepts:
            w_data[c] = row[c]
        labeled_rows.append(w_data)

    df_final = pd.concat(labeled_rows, ignore_index=True)
    print(f"‚úÖ Created dataset with {len(df_final)} samples and {len(concepts)} new columns.")
    return df_final

In [40]:
# run harveer code

print("Initializing labeler...")
print(f"Concepts to label: {', '.join(CONCEPTS)}")
print(f"Windows to label: {len(windows)}")
labeler = WindowLabeler(windows, concepts=CONCEPTS, labels_file='window_labels.csv')
print("Ready to label! Use the interface below:")

labeler.display()

Initializing labeler...
Concepts to label: periodicity, temporal_stability, coordination
Windows to label: 25
No existing labels found ‚Äî starting fresh.
Ready to label! Use the interface below:


VBox(children=(Label(value=''), Label(value=''), Output(), Label(value='‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚Ä¶

In [41]:
df_final = merge_labels_with_original(df_original, windows, 'window_labels.csv', CONCEPTS)
df_final.to_csv('dataset_with_concepts.csv', index=False)
print("‚úÖ Saved dataset_with_concepts.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'window_labels.csv'