

1. Definitions Based on Observations
Search in Place:
Small movements concentrated in a limited spatial area.
Displacement vectors over time have minimal overall magnitude.
No significant directional change.
Backtracking:
A substantial reversal of direction.
Movement spans a considerable distance.
A significant angular change in the displacement vectors (e.g., more than 160 degrees).

2. Proposed Rules for Classification
Rule for Search in Place: Total displacement in a sliding window should remain below a defined threshold.
Angular changes should not vary significantly within the window (indicating limited directional shifts).
Rule for Backtracking: Significant angular reversal (e.g., > 160 degrees).
Displacement magnitude over the sliding window exceeds a defined threshold.


3. Algorithm Enhancement
The algorithm can be modified as follows: Compute total displacement and angular change within each sliding window.
Apply thresholds for displacement and angular change:
Classify windows with low displacement and angular change as "Search in Place."
Classify windows with high angular change and significant displacement as "Backtracking."



In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Define input/output paths
data_folder = "/Users/liziang/Desktop/Cornell/DAIL/content/Original_Tra"
output_pdf_file = "/Users/liziang/Desktop/Cornell/DAIL/All_Trajectory_Plots.pdf"
output_csv_file = "/Users/liziang/Desktop/Cornell/DAIL/Table_Backtrack.csv"

# Parameters
T = 7  # Sliding window size in seconds
theta_threshold = 175  # Angle threshold in degrees
distance_threshold = 50.0  # Minimum movement distance to consider significant
min_speed = 50.0
angle_threshold = 15  # Min angle change to be considered significant
window_size = 3  # Steps to check movement changes
acceleration_threshold = 120  # Threshold for validating opposite acceleration

# Initialize results storage
backtracking_records = []
pdf_pages = PdfPages(output_pdf_file)

def preprocess_trajectory(t, x, y):
    """ Preprocess trajectory data: removes unnecessary points and identifies straight-line movements. """
    t, x, y = t[2:], x[2:], y[2:]  # Remove first 2 points
    y = -y  # Flip y-coordinates
    t, x, y = t[50:], x[50:], y[50:]  # Remove first 50 points for backtracking

    # Calculate displacement vectors
    dx, dy = np.diff(x, prepend=x[0]), np.diff(y, prepend=y[0])
    dt = np.diff(t, prepend=t[0])
    dt[dt == 0] = np.finfo(float).eps  # Prevent division by zero

    velocity_vectors = np.vstack((dx / dt, dy / dt)).T
    norms = np.linalg.norm(velocity_vectors, axis=1)
    norms[norms == 0] = np.finfo(float).eps
    unit_vectors = velocity_vectors / norms[:, None]

    angles = np.zeros(len(t))
    straight_line_points = set()

    for i in range(1, len(t) - window_size + 1):
        v1, v2 = unit_vectors[i - 1], unit_vectors[i + window_size - 1]
        dot_product = np.dot(v1, v2)
        angle = np.arccos(np.clip(dot_product, -1.0, 1.0)) * (180 / np.pi)
        angles[i] = angle

        if angle < angle_threshold:
            straight_line_points.update(range(i, i + window_size))

    all_indices = np.arange(len(t))
    significant_points = sorted(set(all_indices) - straight_line_points)

    return t, x, y, significant_points, list(straight_line_points), velocity_vectors

def detect_backtracking(t, x, y, velocity_vectors):
    """ Detects backtracking using velocity direction changes. """
    dx, dy = np.diff(x, prepend=x[0]), np.diff(y, prepend=y[0])
    dt = np.diff(t, prepend=t[0])
    dt[dt == 0] = np.finfo(float).eps
    displacements = np.vstack((dx / dt, dy / dt)).T

    backtracking_indices = []
    for i in range(len(t)):
        t_prev_start, t_next_end = t[i] - T / 2, t[i] + T / 2
        prev_indices, next_indices = np.where((t >= t_prev_start) & (t < t[i]))[0], np.where((t > t[i]) & (t <= t_next_end))[0]

        if len(prev_indices) < 2 or len(next_indices) < 2:
            continue

        V_prev, V_next = np.mean(displacements[prev_indices], axis=0), np.mean(displacements[next_indices], axis=0)

        if np.linalg.norm(V_prev) < min_speed or np.linalg.norm(V_next) < distance_threshold:
            continue

        dot_product = np.dot(V_prev, V_next)
        norms = np.linalg.norm(V_prev) * np.linalg.norm(V_next)
        if norms == 0:
            continue

        theta = np.arccos(np.clip(dot_product / norms, -1.0, 1.0)) * (180 / np.pi)

        if theta > theta_threshold:
            backtracking_indices.append(i)

    # **Post-processing: Reduce duplicate detections**
    grouped_indices = []
    temp_group = [backtracking_indices[0]] if backtracking_indices else []

    for idx in backtracking_indices[1:]:
        if idx == temp_group[-1] + 1:
            temp_group.append(idx)
        else:
            if len(temp_group) >= 2:
                grouped_indices.append(temp_group)
            temp_group = [idx]

    if len(temp_group) >= 2:
        grouped_indices.append(temp_group)

    # Select only one **representative point** per group
    filtered_backtracking = [g[len(g)//2] for g in grouped_indices]

    return filtered_backtracking

def validate_backtracking(t, x, y, backtracking_indices, velocity_vectors):
    """ Validates backtracking based on acceleration direction. """
    acceleration_vectors = np.diff(velocity_vectors, axis=0)
    if acceleration_vectors.shape[0] != velocity_vectors.shape[0]:
        acceleration_vectors = np.vstack((np.zeros_like(velocity_vectors[0]), acceleration_vectors))

    validated_backtracking = []
    for i in backtracking_indices:
        if i == 0 or i >= len(acceleration_vectors):
            continue

        acc_vec, vel_vec = acceleration_vectors[i], velocity_vectors[i - 1]
        dot_product = np.dot(acc_vec, vel_vec)
        norms = np.linalg.norm(acc_vec) * np.linalg.norm(vel_vec)

        if norms == 0:
            continue

        angle = np.arccos(np.clip(dot_product / norms, -1.0, 1.0)) * (180 / np.pi)
        if angle > acceleration_threshold:
            validated_backtracking.append(i)

    return validated_backtracking

# Process all files in the folder
for filename in os.listdir(data_folder):
    if filename.endswith("_cleaned.txt"):
        file_path = os.path.join(data_folder, filename)
        participant_id = filename.split("_")[0]  # Extract participant ID

        try:
            data = pd.read_csv(file_path, delimiter=',')
            data = data.iloc[3:].reset_index(drop=True)
            required_columns = {'UNIX', 'xHead', 'yHead', 'Task'}
            if not required_columns.issubset(data.columns):
                print(f"Skipping file {filename}: Missing required columns")
                continue

            data['Time'] = data['UNIX'] / 1000.0
            t, x, y, task = data['Time'].values, data['xHead'].values, data['yHead'].values, data['Task'].values

            # Apply preprocessing
            t_proc, x_proc, y_proc, sig_points, straight_points, vel_vectors = preprocess_trajectory(t, x, y)

            # Detect and validate backtracking
            backtrack_idx = detect_backtracking(t_proc, x_proc, y_proc, vel_vectors)
            validated_backtracking = validate_backtracking(t_proc, x_proc, y_proc, backtrack_idx, vel_vectors)

            # Save backtracking records
            for idx in validated_backtracking:
                backtracking_records.append({
                    'ParticipantID': participant_id,
                    'Task': task[idx],
                    'UnixTime': int(data.loc[idx, 'UNIX']),
                    'CoordinateX': x_proc[idx],
                    'CoordinateY': y_proc[idx]
                })

            # Plot results
            plt.figure(figsize=(10, 6))
            plt.plot(x, -y, label='Original Trajectory', alpha=0.6, color='gray')
            plt.scatter(np.array(x_proc)[sig_points], np.array(y_proc)[sig_points], color='red', label='Significant Points')
            plt.scatter(np.array(x_proc)[straight_points], np.array(y_proc)[straight_points], color='green', alpha=0.1, label='Straight-Line Movement')

            if validated_backtracking:
                plt.scatter(
                    np.array(x_proc)[validated_backtracking],
                    np.array(y_proc)[validated_backtracking],
                    color='blue',
                    s=100,
                    label='Validated Backtracking Moments'
                )

            plt.xlabel('X Position')
            plt.ylabel('Y Position')
            plt.title(f'Trajectory with Validated Backtracking Moments (Participant {participant_id})')
            plt.legend()
            plt.grid(True)

            pdf_pages.savefig()
            plt.close()

        except Exception as e:
            print(f"Error processing file {filename}: {e}")

# Save backtracking table
pd.DataFrame(backtracking_records).to_csv(output_csv_file, index=False)
pdf_pages.close()

print(f"All trajectory plots saved to {output_pdf_file}")
print(f"Backtracking table saved to {output_csv_file}")


Error processing file 93_cleaned.txt: No columns to parse from file
All trajectory plots saved to /Users/liziang/Desktop/Cornell/DAIL/All_Trajectory_Plots.pdf
Backtracking table saved to /Users/liziang/Desktop/Cornell/DAIL/Table_Backtrack.csv


In [None]:
import re
from PyPDF2 import PdfReader, PdfWriter

def sort_pdf_by_participant(input_pdf, output_pdf):
    # Read the PDF
    reader = PdfReader(input_pdf)
    pages_with_participants = []

    # Extract participant numbers and associate with pages
    for i, page in enumerate(reader.pages):
        text = page.extract_text()
        match = re.search(r"Participant (\d+)", text)
        if match:
            participant_number = int(match.group(1))
            pages_with_participants.append((participant_number, i))

    # Sort pages by participant number
    pages_with_participants.sort(key=lambda x: x[0])

    # Create a new PDF with sorted pages
    writer = PdfWriter()
    for _, page_index in pages_with_participants:
        writer.add_page(reader.pages[page_index])

    # Save the sorted PDF
    with open(output_pdf, "wb") as f:
        writer.write(f)

data_folder = "/Users/liziang/Desktop/Cornell/DAIL/All_Trajectory_Plots_6.0.pdf"
output_pdf_file = "/Users/liziang/Desktop/Cornell/DAIL/All_Trajectory_Plots_6.0_sorted.pdf"
        
# Usage
sort_pdf_by_participant(data_folder, output_pdf_file)
