In [1]:
!pip install -q transformers sentencepiece torch scikit-learn

from google.colab import drive
drive.mount('/content/drive', force_remount=True)
print("✓ Drive mounted")

Mounted at /content/drive
✓ Drive mounted


In [2]:
# ============================================
# CELL 2: Imports
# ============================================
import os
import pickle
import numpy as np
import csv
import matplotlib.pyplot as plt
from matplotlib.animation import FFMpegWriter
import torch
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from IPython.display import Video, HTML
import warnings
warnings.filterwarnings('ignore')

print("✓ All imports successful")

✓ All imports successful


In [3]:
# ============================================
# CELL 3: Configuration
# ============================================
# YOUR PATHS
PKL_PATH = "/content/drive/MyDrive/phoenixsmall_unzipped/phoenixsmall/mediapipe_pose_keypoints.pkl"
GLOSS_CSV = "/content/drive/MyDrive/phoenixsmall_unzipped/phoenixsmall/gloss_map.csv"
OUTPUT_DIR = "/content/outputs"

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Parameters
FRAMES_PER_GLOSS = 48  # Standard duration for each sign
FPS = 25
VIDEO_WIDTH, VIDEO_HEIGHT = 960, 720

# Verify paths
print("Keypoints exist:", os.path.exists(PKL_PATH))
print("Gloss CSV exists:", os.path.exists(GLOSS_CSV))

Keypoints exist: True
Gloss CSV exists: True


In [4]:
# ============================================
# CELL 4: Load Data
# ============================================
print("\n=== LOADING DATA ===")

# Load keypoints
with open(PKL_PATH, "rb") as f:
    kp_data = pickle.load(f)
print(f"✓ Loaded {len(kp_data)} videos")

# Load gloss mapping
gloss_to_video = {}
video_to_gloss = {}
all_glosses = []

with open(GLOSS_CSV, "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    header = next(reader)
    video_idx = header.index("video")
    gloss_idx = header.index("gloss")

    for row in reader:
        gloss = row[gloss_idx]
        video = row[video_idx]
        if video in kp_data:  # Only include videos we have keypoints for
            gloss_to_video[gloss] = video
            video_to_gloss[video] = gloss
            all_glosses.append(gloss)

unique_glosses = list(set(all_glosses))
print(f"✓ Loaded {len(gloss_to_video)} gloss-video pairs")
print(f"✓ Unique glosses: {len(unique_glosses)}")
print(f"Sample glosses: {unique_glosses[:10]}")

# Inspect data structure
sample_video = list(kp_data.keys())[0]
sample_frames = kp_data[sample_video]
print(f"\nData structure check:")
print(f"  Sample video: {sample_video}")
print(f"  Number of frames: {len(sample_frames)}")
print(f"  Keypoints per frame: {len(sample_frames[0])}")
print(f"  First keypoint: {sample_frames[0][0]}")


=== LOADING DATA ===
✓ Loaded 50 videos
✓ Loaded 20 gloss-video pairs
✓ Unique glosses: 20
Sample glosses: ['GLOSS_10', 'GLOSS_18', 'GLOSS_2', 'GLOSS_16', 'GLOSS_9', 'GLOSS_8', 'GLOSS_11', 'GLOSS_13', 'GLOSS_1', 'GLOSS_5']

Data structure check:
  Sample video: 05October_2010_Tuesday_tagesschau-4236
  Number of frames: 82
  Keypoints per frame: 33
  First keypoint: [0.483663409948349, 0.2328890562057495, -1.3068078756332397, 0.9956284761428833]


In [5]:
# ============================================
# CELL 5: Keypoint Processing Functions
# ============================================
def frames_to_np(frames):
    """Convert list of frames to numpy array"""
    return np.array(frames, dtype=float)

def resample_frames(frames_np, target_len):
    """Resample frames to target length using linear interpolation"""
    n = frames_np.shape[0]
    if n == target_len:
        return frames_np

    # Linear interpolation
    idx = np.linspace(0, n-1, target_len)
    low = np.floor(idx).astype(int)
    high = np.ceil(idx).astype(int)
    high = np.clip(high, 0, n-1)  # Ensure within bounds

    w = (idx - low).reshape(-1, 1, 1)
    return frames_np[low] * (1 - w) + frames_np[high] * w

def smooth_sequence(seq, win_body=5, win_hands=12, hand_ids=(15, 16, 17, 18, 19, 20, 21, 22)):
    """Apply temporal smoothing to keypoint sequence"""
    F, K, C = seq.shape
    out = seq.copy()

    for k in range(K):
        win = win_hands if k in hand_ids else win_body
        for i in range(F):
            start = max(0, i - win // 2)
            end = min(F, i + win // 2 + 1)
            out[i, k] = seq[start:end, k].mean(axis=0)

    return out

def stabilize_pose(seq):
    """Stabilize the pose by centering around hip position"""
    seq2 = seq.copy()

    # MediaPipe hip keypoints are at indices 23 and 24
    if seq2.shape[1] > 24:
        hips = (seq2[:, 23, :2] + seq2[:, 24, :2]) / 2.0
        hip_x, hip_y = np.median(hips[:, 0]), np.median(hips[:, 1])

        # Center around (0.5, 0.6) for better framing
        seq2[:, :, 0] -= (hip_x - 0.5)
        seq2[:, :, 1] -= (hip_y - 0.6)

    return seq2

def fix_broken_frame(frame):
    """Repair invalid keypoints in a frame"""
    fixed = []
    for kp in frame:
        try:
            arr = np.array(kp, dtype=float)
            if arr.size < 2:
                arr = np.array([0., 0., 0., 0.], dtype=float)

            # Ensure 4 values: [x, y, z, confidence]
            if arr.size < 4:
                arr = np.pad(arr, (0, 4 - arr.size), constant_values=0)
            elif arr.size > 4:
                arr = arr[:4]

            if np.any(np.isnan(arr)) or np.any(np.isinf(arr)):
                arr = np.array([0., 0., 0., 0.], dtype=float)
        except:
            arr = np.array([0., 0., 0., 0.], dtype=float)

        fixed.append(arr)

    return np.stack(fixed)

print("✓ Keypoint processing functions ready")

✓ Keypoint processing functions ready


In [6]:
# ============================================
# CELL 6: Sign Connector (Co-articulation)
# ============================================
class SignConnector:
    """Predicts co-articulation duration and generates smooth transitions"""

    def __init__(self):
        self.default_duration = 8  # frames
        self.hand_indices = [15, 16, 17, 18, 19, 20, 21, 22]  # MediaPipe hand keypoints

    def predict_duration(self, sign1_end, sign2_start):
        """Predict co-articulation duration based on hand distance"""
        # Extract hand positions
        hands1 = sign1_end[self.hand_indices, :2]  # x, y only
        hands2 = sign2_start[self.hand_indices, :2]

        # Calculate Euclidean distance
        distance = np.linalg.norm(hands1 - hands2)

        # Scale duration based on distance (larger distance = longer transition)
        duration = int(self.default_duration + distance * 20)
        duration = np.clip(duration, 4, 20)  # Min 4, max 20 frames

        return duration

    def generate_coarticulation(self, sign1_end, sign2_start, duration):
        """Generate smooth transition between two signs"""
        # Linear interpolation in 3D space
        alphas = np.linspace(0, 1, duration).reshape(-1, 1, 1)
        transition = sign1_end * (1 - alphas) + sign2_start * alphas

        return transition

connector = SignConnector()
print("✓ Sign connector initialized")


✓ Sign connector initialized


In [7]:
# ============================================
# CELL 7: Sign Sequence Builder
# ============================================
def get_sequence_for_gloss(gloss):
    """Get keypoint sequence for a single gloss"""
    if gloss not in gloss_to_video:
        print(f"Warning: Gloss '{gloss}' not found, using random sign")
        gloss = np.random.choice(list(gloss_to_video.keys()))

    video_id = gloss_to_video[gloss]
    frames_list = kp_data[video_id]

    # Convert to numpy and resample
    arr = frames_to_np(frames_list)
    resampled = resample_frames(arr, FRAMES_PER_GLOSS)

    return resampled

def build_sequence(gloss_list):
    """Build complete sign sequence from gloss list with co-articulations"""
    if not gloss_list:
        raise ValueError("Gloss list cannot be empty")

    # Get all sign sequences
    sign_sequences = [get_sequence_for_gloss(g) for g in gloss_list]

    # Build final sequence with co-articulations
    final_sequence = [sign_sequences[0]]

    for i in range(1, len(sign_sequences)):
        prev_sign_end = sign_sequences[i-1][-1]
        curr_sign_start = sign_sequences[i][0]

        # Predict and generate co-articulation
        duration = connector.predict_duration(prev_sign_end, curr_sign_start)
        coartic = connector.generate_coarticulation(
            prev_sign_end, curr_sign_start, duration
        )

        final_sequence.append(coartic)
        final_sequence.append(sign_sequences[i])

    # Concatenate all parts
    full_seq = np.concatenate(final_sequence, axis=0)

    # Apply smoothing and stabilization
    full_seq = smooth_sequence(full_seq)
    full_seq = stabilize_pose(full_seq)

    return full_seq

print("✓ Sequence builder ready")

✓ Sequence builder ready


In [8]:
# ============================================
# CELL 8: Video Renderer
# ============================================
# MediaPipe pose connections (skeleton)
POSE_CONNECTIONS = [
    # Face
    (0, 1), (1, 2), (2, 3), (3, 7),
    (0, 4), (4, 5), (5, 6), (6, 8),
    # Torso
    (9, 10),
    (11, 12), (11, 13), (13, 15),  # Left arm
    (12, 14), (14, 16),  # Right arm
    (11, 23), (12, 24),  # Torso to hips
    (23, 24),  # Hips
    # Legs
    (23, 25), (25, 27), (27, 29), (29, 31),  # Left leg
    (24, 26), (26, 28), (28, 30), (30, 32),  # Right leg
    # Hands
    (15, 17), (15, 19), (15, 21),  # Left hand
    (16, 18), (16, 20), (16, 22),  # Right hand
]

def frame_to_pixels(frame, width=VIDEO_WIDTH, height=VIDEO_HEIGHT):
    """Convert normalized coordinates to pixel coordinates"""
    arr = np.array(frame, dtype=float)
    if arr.shape[1] < 2:
        return np.zeros((arr.shape[0], 2))

    x = np.clip(arr[:, 0], 0, 1) * width
    y = np.clip(arr[:, 1], 0, 1) * height

    pts = np.stack([x, y], axis=1)
    pts[~np.isfinite(pts)] = 0

    return pts

def render_video(sequence, output_path, title="Sign Language"):
    """Render keypoint sequence as video"""
    print(f"\n=== RENDERING VIDEO ===")
    print(f"Frames: {len(sequence)}")
    print(f"Output: {output_path}")

    fig, ax = plt.subplots(figsize=(VIDEO_WIDTH/100, VIDEO_HEIGHT/100), dpi=100)
    ax.set_xlim(0, VIDEO_WIDTH)
    ax.set_ylim(VIDEO_HEIGHT, 0)
    ax.axis('off')
    ax.set_facecolor('#f0f0f0')

    # Initialize plot elements
    scatter = ax.scatter([], [], s=80, c='#2c3e50', zorder=3, alpha=0.8)
    lines = [ax.plot([], [], linewidth=6, color='#34495e',
                     solid_capstyle='round', alpha=0.7)[0]
             for _ in POSE_CONNECTIONS]

    # Highlight hands
    left_hand = ax.plot([], [], 'o', markersize=20, color='#e74c3c',
                        markeredgecolor='white', markeredgewidth=2, zorder=5)[0]
    right_hand = ax.plot([], [], 'o', markersize=20, color='#3498db',
                         markeredgecolor='white', markeredgewidth=2, zorder=5)[0]

    # Add title
    ax.text(VIDEO_WIDTH/2, 50, title, fontsize=24, ha='center',
            weight='bold', color='#2c3e50')

    writer = FFMpegWriter(fps=FPS, bitrate=5000)

    with writer.saving(fig, output_path, 100):
        for i, frame in enumerate(sequence):
            if i % 25 == 0:
                print(f"  Progress: {i}/{len(sequence)} frames", end='\r')

            # Fix frame and convert to pixels
            frame_fixed = fix_broken_frame(frame)
            pts = frame_to_pixels(frame_fixed)

            # Update keypoints
            scatter.set_offsets(pts)

            # Update skeleton lines
            for line, (a, b) in zip(lines, POSE_CONNECTIONS):
                if a < len(pts) and b < len(pts):
                    xa, ya = float(pts[a, 0]), float(pts[a, 1])
                    xb, yb = float(pts[b, 0]), float(pts[b, 1])

                    # Only draw if both points are valid
                    if xa > 0 and ya > 0 and xb > 0 and yb > 0:
                        line.set_data([xa, xb], [ya, yb])
                    else:
                        line.set_data([], [])
                else:
                    line.set_data([], [])

            # Update hand markers (wrists: 15=left, 16=right)
            if 15 < len(pts) and pts[15, 0] > 0:
                left_hand.set_data([pts[15, 0]], [pts[15, 1]])
            else:
                left_hand.set_data([], [])

            if 16 < len(pts) and pts[16, 0] > 0:
                right_hand.set_data([pts[16, 0]], [pts[16, 1]])
            else:
                right_hand.set_data([], [])

            writer.grab_frame()

    plt.close(fig)
    print(f"\n✓ Video saved: {output_path}")

print("✓ Renderer ready")

✓ Renderer ready


In [9]:
# ============================================
# CELL 9: Text-to-Gloss Translator (Simple Version)
# ============================================
class SimpleText2Gloss:
    """Simple rule-based text-to-gloss translator"""

    def __init__(self, glosses):
        self.glosses = glosses
        self.gloss_set = set(glosses)

    def translate(self, text):
        """
        Simple translation: map words to glosses
        In production, this would be an mBART model
        """
        words = text.upper().replace('.', '').replace(',', '').split()
        gloss_sequence = []

        for word in words:
            # Try direct match
            if word in self.gloss_set:
                gloss_sequence.append(word)
            # Try with GLOSS_ prefix (common pattern)
            elif f"GLOSS_{word}" in self.gloss_set:
                gloss_sequence.append(f"GLOSS_{word}")
            # Try fuzzy match (find similar gloss)
            else:
                for gloss in self.glosses:
                    if word in gloss or gloss in word:
                        gloss_sequence.append(gloss)
                        break

        # If no matches found, use random glosses
        if not gloss_sequence:
            gloss_sequence = [np.random.choice(self.glosses)
                             for _ in range(min(len(words), 5))]

        return gloss_sequence

translator = SimpleText2Gloss(unique_glosses)
print("✓ Text2Gloss translator ready")

✓ Text2Gloss translator ready


In [10]:
# ============================================
# CELL 10: Complete Pipeline Function
# ============================================
def spoken_to_sign(text, output_name="output"):
    """Complete Spoken2Sign pipeline"""
    print(f"\n{'='*50}")
    print(f"SPOKEN2SIGN TRANSLATION")
    print(f"{'='*50}")
    print(f"Input text: {text}")

    # Step 1: Text to Gloss
    print("\n[1/3] Translating text to gloss sequence...")
    gloss_sequence = translator.translate(text)
    print(f"  Gloss sequence: {gloss_sequence}")

    # Step 2: Build sign sequence
    print("\n[2/3] Building sign sequence...")
    sign_sequence = build_sequence(gloss_sequence)
    print(f"  Total frames: {len(sign_sequence)}")
    print(f"  Duration: {len(sign_sequence) / FPS:.2f} seconds")

    # Step 3: Render video
    print("\n[3/3] Rendering video...")
    output_path = f"{OUTPUT_DIR}/{output_name}.mp4"
    render_video(sign_sequence, output_path, title=text[:40])

    print(f"\n{'='*50}")
    print(f"✓ TRANSLATION COMPLETE!")
    print(f"{'='*50}")

    return output_path, gloss_sequence

print("✓ Complete pipeline ready")


✓ Complete pipeline ready


In [11]:
# ============================================
# CELL 11: TEST THE SYSTEM
# ============================================
print("\n" + "="*60)
print("TESTING SPOKEN2SIGN SYSTEM")
print("="*60)

# Test with first 3 glosses
test_glosses = unique_glosses[:3]
print(f"\nTest 1: Using glosses directly")
print(f"Glosses: {test_glosses}")

sequence = build_sequence(test_glosses)
output1 = f"{OUTPUT_DIR}/test_direct_glosses.mp4"
render_video(sequence, output1, title="Test: Direct Glosses")

# Display video
display(Video(output1, width=640, height=480))



TESTING SPOKEN2SIGN SYSTEM

Test 1: Using glosses directly
Glosses: ['GLOSS_10', 'GLOSS_18', 'GLOSS_2']

=== RENDERING VIDEO ===
Frames: 181
Output: /content/outputs/test_direct_glosses.mp4

✓ Video saved: /content/outputs/test_direct_glosses.mp4


In [12]:
# ============================================
# CELL 12: TEXT-TO-SIGN TRANSLATION
# ============================================
# Now test with actual text
test_texts = [
    "HELLO WORLD",
    "GOOD MORNING",
    "THANK YOU"
]

for i, text in enumerate(test_texts):
    video_path, glosses = spoken_to_sign(text, f"translation_{i+1}")
    print(f"\nVideo saved: {video_path}")
    print(f"Glosses used: {glosses}\n")
    display(Video(video_path, width=640, height=480))


SPOKEN2SIGN TRANSLATION
Input text: HELLO WORLD

[1/3] Translating text to gloss sequence...
  Gloss sequence: [np.str_('GLOSS_6'), np.str_('GLOSS_8')]

[2/3] Building sign sequence...
  Total frames: 107
  Duration: 4.28 seconds

[3/3] Rendering video...

=== RENDERING VIDEO ===
Frames: 107
Output: /content/outputs/translation_1.mp4

✓ Video saved: /content/outputs/translation_1.mp4

✓ TRANSLATION COMPLETE!

Video saved: /content/outputs/translation_1.mp4
Glosses used: [np.str_('GLOSS_6'), np.str_('GLOSS_8')]




SPOKEN2SIGN TRANSLATION
Input text: GOOD MORNING

[1/3] Translating text to gloss sequence...
  Gloss sequence: [np.str_('GLOSS_8'), np.str_('GLOSS_16')]

[2/3] Building sign sequence...
  Total frames: 112
  Duration: 4.48 seconds

[3/3] Rendering video...

=== RENDERING VIDEO ===
Frames: 112
Output: /content/outputs/translation_2.mp4

✓ Video saved: /content/outputs/translation_2.mp4

✓ TRANSLATION COMPLETE!

Video saved: /content/outputs/translation_2.mp4
Glosses used: [np.str_('GLOSS_8'), np.str_('GLOSS_16')]




SPOKEN2SIGN TRANSLATION
Input text: THANK YOU

[1/3] Translating text to gloss sequence...
  Gloss sequence: [np.str_('GLOSS_17'), np.str_('GLOSS_5')]

[2/3] Building sign sequence...
  Total frames: 116
  Duration: 4.64 seconds

[3/3] Rendering video...

=== RENDERING VIDEO ===
Frames: 116
Output: /content/outputs/translation_3.mp4

✓ Video saved: /content/outputs/translation_3.mp4

✓ TRANSLATION COMPLETE!

Video saved: /content/outputs/translation_3.mp4
Glosses used: [np.str_('GLOSS_17'), np.str_('GLOSS_5')]



In [13]:
# ============================================
# CELL 13: INTERACTIVE TRANSLATION
# ============================================
def translate_custom_text():
    """Interactive function for custom translations"""
    print("\n" + "="*60)
    print("CUSTOM TEXT TRANSLATION")
    print("="*60)
    print("\nAvailable glosses sample:", unique_glosses[:20])
    print(f"\nTotal glosses available: {len(unique_glosses)}")

    # Get user input
    text = input("\nEnter text to translate to sign language: ")

    if text:
        video_path, glosses = spoken_to_sign(text, "custom_translation")
        display(Video(video_path, width=640, height=480))
        return video_path
    else:
        print("No text entered")

# Run interactive translation
translate_custom_text()



CUSTOM TEXT TRANSLATION

Available glosses sample: ['GLOSS_10', 'GLOSS_18', 'GLOSS_2', 'GLOSS_16', 'GLOSS_9', 'GLOSS_8', 'GLOSS_11', 'GLOSS_13', 'GLOSS_1', 'GLOSS_5', 'GLOSS_3', 'GLOSS_12', 'GLOSS_0', 'GLOSS_19', 'GLOSS_15', 'GLOSS_17', 'GLOSS_6', 'GLOSS_4', 'GLOSS_7', 'GLOSS_14']

Total glosses available: 20

Enter text to translate to sign language: I go school

SPOKEN2SIGN TRANSLATION
Input text: I go school

[1/3] Translating text to gloss sequence...
  Gloss sequence: [np.str_('GLOSS_17'), np.str_('GLOSS_13'), np.str_('GLOSS_6')]

[2/3] Building sign sequence...
  Total frames: 178
  Duration: 7.12 seconds

[3/3] Rendering video...

=== RENDERING VIDEO ===
Frames: 178
Output: /content/outputs/custom_translation.mp4

✓ Video saved: /content/outputs/custom_translation.mp4

✓ TRANSLATION COMPLETE!


'/content/outputs/custom_translation.mp4'

In [14]:
# ============================================
# CELL 14: BATCH PROCESSING
# ============================================
def batch_translate(text_list, output_prefix="batch"):
    """Translate multiple texts"""
    results = []

    for i, text in enumerate(text_list):
        print(f"\n{'='*50}")
        print(f"Processing {i+1}/{len(text_list)}: {text}")
        print(f"{'='*50}")

        video_path, glosses = spoken_to_sign(text, f"{output_prefix}_{i+1}")
        results.append({
            'text': text,
            'glosses': glosses,
            'video': video_path
        })

    return results

# Example batch processing
batch_texts = [
    "HELLO",
    "GOODBYE",
    "THANK YOU"
]

print("\n" + "="*60)
print("BATCH TRANSLATION DEMO")
print("="*60)

batch_results = batch_translate(batch_texts[:2], "demo")  # Process first 2

# Display all results
for result in batch_results:
    print(f"\nText: {result['text']}")
    print(f"Glosses: {result['glosses']}")
    display(Video(result['video'], width=640, height=480))



BATCH TRANSLATION DEMO

Processing 1/2: HELLO

SPOKEN2SIGN TRANSLATION
Input text: HELLO

[1/3] Translating text to gloss sequence...
  Gloss sequence: [np.str_('GLOSS_1')]

[2/3] Building sign sequence...
  Total frames: 48
  Duration: 1.92 seconds

[3/3] Rendering video...

=== RENDERING VIDEO ===
Frames: 48
Output: /content/outputs/demo_1.mp4

✓ Video saved: /content/outputs/demo_1.mp4

✓ TRANSLATION COMPLETE!

Processing 2/2: GOODBYE

SPOKEN2SIGN TRANSLATION
Input text: GOODBYE

[1/3] Translating text to gloss sequence...
  Gloss sequence: [np.str_('GLOSS_5')]

[2/3] Building sign sequence...
  Total frames: 48
  Duration: 1.92 seconds

[3/3] Rendering video...

=== RENDERING VIDEO ===
Frames: 48
Output: /content/outputs/demo_2.mp4

✓ Video saved: /content/outputs/demo_2.mp4

✓ TRANSLATION COMPLETE!

Text: HELLO
Glosses: [np.str_('GLOSS_1')]



Text: GOODBYE
Glosses: [np.str_('GLOSS_5')]


In [15]:
# ============================================
# CELL 15: SYSTEM STATISTICS
# ============================================
print("\n" + "="*60)
print("SYSTEM STATISTICS")
print("="*60)

print(f"\nDataset:")
print(f"  Total videos: {len(kp_data)}")
print(f"  Total glosses: {len(unique_glosses)}")
print(f"  Gloss-video pairs: {len(gloss_to_video)}")

print(f"\nConfiguration:")
print(f"  Frames per gloss: {FRAMES_PER_GLOSS}")
print(f"  Video FPS: {FPS}")
print(f"  Resolution: {VIDEO_WIDTH}x{VIDEO_HEIGHT}")

print(f"\nOutput directory: {OUTPUT_DIR}")
print(f"  Files created: {len(os.listdir(OUTPUT_DIR))}")

# List all output files
print(f"\nGenerated videos:")
for file in sorted(os.listdir(OUTPUT_DIR)):
    if file.endswith('.mp4'):
        file_path = os.path.join(OUTPUT_DIR, file)
        size_mb = os.path.getsize(file_path) / (1024*1024)
        print(f"  - {file} ({size_mb:.2f} MB)")

print("\n" + "="*60)
print("✓ SYSTEM READY FOR USE!")
print("="*60)


SYSTEM STATISTICS

Dataset:
  Total videos: 50
  Total glosses: 20
  Gloss-video pairs: 20

Configuration:
  Frames per gloss: 48
  Video FPS: 25
  Resolution: 960x720

Output directory: /content/outputs
  Files created: 7

Generated videos:
  - custom_translation.mp4 (1.96 MB)
  - demo_1.mp4 (0.44 MB)
  - demo_2.mp4 (0.39 MB)
  - test_direct_glosses.mp4 (1.77 MB)
  - translation_1.mp4 (1.16 MB)
  - translation_2.mp4 (1.10 MB)
  - translation_3.mp4 (1.13 MB)

✓ SYSTEM READY FOR USE!
